vgteam · adamnovak · May 21, 2026 · May 12, 2026 · May 12, 2026 · May 13, 2026
diff --git a/deps/gbwtgraph b/deps/gbwtgraph
diff --git a/src/algorithms/chain_items.cpp b/src/algorithms/chain_items.cpp
@@ -640,13 +640,9 @@ TracedScore chain_items_dp(vector<TracedScore>& chain_scores,
                            const HandleGraph& graph,
                            int gap_open,
                            int gap_extension,
+                           const ChainScoringScheme& scheme,
                            const transition_iterator& for_each_transition,
-                           int item_bonus,
-                           double item_scale,
-                           double gap_scale,
-                           double points_per_possible_match,
                            size_t max_indel_bases,
-                           int recomb_penalty,
                            bool show_work) {
 
     DiagramExplainer diagram(show_work);
@@ -659,7 +655,9 @@ TracedScore chain_items_dp(vector<TracedScore>& chain_scores,
         cerr << "Chaining group of " << to_chain.size() << " items" << endl;
     }
 
-    crash_unless(recomb_penalty >= 0);
+    crash_unless(scheme.recombination_penalty >= 0);
+    crash_unless(scheme.consistency_bonus >= 0);
+
 
     // Compute a base seed average length.
     // TODO: Weight anchors differently?
@@ -683,11 +681,11 @@ TracedScore chain_items_dp(vector<TracedScore>& chain_scores,
     // We store the bonus used to select the current winning predecessor for
     // each seed in this vector, which runs alongside the DP table.
     //
-    // Starting from nowhere means full path conservation, so bonus = recomb_penalty.
-    std::vector<int> eval_bonuses(to_chain.size(), recomb_penalty);
+    // Starting from nowhere means full path conservation, so bonus = scheme.consistency_bonus.
+    std::vector<int> eval_bonuses(to_chain.size(), scheme.consistency_bonus);
     for (size_t i = 0; i < to_chain.size(); i++) {
         // Set up DP table so we can start anywhere with that item's score, scaled and with bonus applied.
-        chain_scores[i] = {(int)(to_chain[i].score() * item_scale + item_bonus), TracedScore::nowhere(), to_chain[i].anchor_end_paths()};
+        chain_scores[i] = {(int)(to_chain[i].score() * scheme.item_scale + scheme.item_bonus), TracedScore::nowhere(), to_chain[i].anchor_end_paths()};
     }
 
     // We will run this over every transition in a good DP order.
@@ -706,18 +704,18 @@ TracedScore chain_items_dp(vector<TracedScore>& chain_scores,
         auto& here = to_chain[transition.to_anchor];
 
         // How many points is it worth to collect?
-        auto item_points = here.score() * item_scale + item_bonus;
+        auto item_points = here.score() * scheme.item_scale + scheme.item_bonus;
 
         std::string here_gvnode;
         if (diagram) {
             here_gvnode = "i" + std::to_string(transition.to_anchor);
         }
 
         // If we come from nowhere, we get those points.
-        // This also has full path conservation (bonus = recomb_penalty).
+        // This also has full path conservation (bonus = scheme.consistency_bonus).
         {
             TracedScore from_nowhere = {(int)item_points, TracedScore::nowhere(), here.anchor_end_paths()};
-            int nowhere_bonus = recomb_penalty;
+            int nowhere_bonus = scheme.consistency_bonus;
             int eval_nowhere = from_nowhere.score + nowhere_bonus;
             int eval_current = chain_scores[transition.to_anchor].score + eval_bonuses[transition.to_anchor];
             if (eval_nowhere > eval_current) {
@@ -787,13 +785,13 @@ TracedScore chain_items_dp(vector<TracedScore>& chain_scores,
             //
             // But we account for anchor length in the item points, so don't use it
             // here.
-            jump_points = -score_chain_gap(indel_length, base_seed_length) * gap_scale;
+            jump_points = -score_chain_gap(indel_length, base_seed_length) * scheme.gap_scale;
 
             // add recombination penalty if necessary
-            jump_points -= check_recombination(chain_scores[transition.from_anchor], here) * recomb_penalty;
+            jump_points -= check_recombination(chain_scores[transition.from_anchor], here) * scheme.recombination_penalty;
 
             // We can also account for the non-indel material, which we assume will have some identity in it.
-            jump_points += possible_match_length * points_per_possible_match;
+            jump_points += possible_match_length * scheme.points_per_possible_match;
         }
 
         if (jump_points != numeric_limits<int>::min()) {
@@ -805,15 +803,15 @@ TracedScore chain_items_dp(vector<TracedScore>& chain_scores,
                                                         .set_shared_paths(here.anchor_paths());
 
             // Evaluate heuristic to preserve path flexibility without inflating actual scoring DP.
-            // Bonus = fraction of conserved paths * recomb_penalty.
+            // Bonus = fraction of conserved paths * scheme.consistency_bonus.
             // Bonus is 0 when recombination occurs (no shared paths).
             int eval_bonus_from = 0;
-            if (recomb_penalty > 0) {
+            if (scheme.consistency_bonus > 0) {
                 int pre_count = __builtin_popcountll(source_score.paths);
                 if (pre_count > 0 && (source_score.paths & here.anchor_start_paths()) != 0) {
                     // No recombination: bonus = fraction of paths conserved * penalty
                     int post_count = __builtin_popcountll(from_source_score.paths);
-                    eval_bonus_from = (recomb_penalty * post_count) / pre_count;
+                    eval_bonus_from = (scheme.consistency_bonus * post_count) / pre_count;
                 }
                 // Recombination case (no shared paths): bonus stays 0
             }
@@ -895,7 +893,7 @@ TracedScore chain_items_dp(vector<TracedScore>& chain_scores,
 
         if (diagram) {
             // Draw the item in the diagram
-            auto item_points = here.score() * item_scale + item_bonus;
+            auto item_points = here.score() * scheme.item_scale + scheme.item_bonus;
             std::string here_gvnode = "i" + std::to_string(to_anchor);
             std::stringstream label_stream;
             label_stream << "#" << to_anchor << " " << here << " = " << item_points
@@ -941,8 +939,7 @@ TracedScore chain_items_dp(vector<TracedScore>& chain_scores,
 vector<pair<vector<size_t>, int>> chain_items_traceback(const vector<TracedScore>& chain_scores,
                                                         const VectorView<Anchor>& to_chain,
                                                         const TracedScore& best_past_ending_score_ever,
-                                                        int item_bonus,
-                                                        double item_scale,
+                                                        const ChainScoringScheme& scheme,
                                                         size_t max_tracebacks) {
 
     // We will fill this in with all the tracebacks, and then sort and truncate.
@@ -990,7 +987,7 @@ vector<pair<vector<size_t>, int>> chain_items_traceback(const vector<TracedScore
                     // Take away all the points we got for coming from there and being ourselves.
                     penalty += chain_scores[here].score;
                     // But then re-add our score for just us
-                    penalty -= (to_chain[here].score() * item_scale + item_bonus);
+                    penalty -= (to_chain[here].score() * scheme.item_scale + scheme.item_bonus);
                     // TODO: Score this more simply.
                     // TODO: find the edge to nowhere???
                     break;
@@ -1029,13 +1026,9 @@ ChainsResult find_best_chains(const VectorView<Anchor>& to_chain,
                                                    const HandleGraph& graph,
                                                    int gap_open,
                                                    int gap_extension,
-                                                   int recomb_penalty,
+                                                   const ChainScoringScheme& scheme,
                                                    size_t max_chains,
                                                    const transition_iterator& for_each_transition,
-                                                   int item_bonus,
-                                                   double item_scale,
-                                                   double gap_scale,
-                                                   double points_per_possible_match,
                                                    size_t max_indel_bases,
                                                    bool show_work) {
 
@@ -1057,20 +1050,16 @@ ChainsResult find_best_chains(const VectorView<Anchor>& to_chain,
                                                              graph,
                                                              gap_open,
                                                              gap_extension,
+                                                             scheme,
                                                              for_each_transition,
-                                                             item_bonus,
-                                                             item_scale,
-                                                             gap_scale,
-                                                             points_per_possible_match,
                                                              max_indel_bases,
-                                                             recomb_penalty,
-                                                             show_work);                                                             
+                                                             show_work);
 #ifdef debug_chaining
     std::cerr << "[REC INFO] Recombination number for chain: " << best_past_ending_score_ever.rec_num << "\tscore: " << best_past_ending_score_ever.score << "\tpaths: " << best_past_ending_score_ever.paths << std::endl;
 #endif
     // Then do the tracebacks
     vector<pair<vector<size_t>, int>> tracebacks = chain_items_traceback(
-        chain_scores, to_chain, best_past_ending_score_ever, item_bonus, item_scale, max_chains);
+        chain_scores, to_chain, best_past_ending_score_ever, scheme, max_chains);
 
     if (tracebacks.empty()) {
         // Somehow we got nothing
@@ -1181,12 +1170,8 @@ pair<int, vector<size_t>> find_best_chain(const VectorView<Anchor>& to_chain,
                                           const HandleGraph& graph,
                                           int gap_open,
                                           int gap_extension,
-                                          int recomb_penalty,
+                                          const ChainScoringScheme& scheme,
                                           const transition_iterator& for_each_transition,
-                                          int item_bonus,
-                                          double item_scale,
-                                          double gap_scale,
-                                          double points_per_possible_match,
                                           size_t max_indel_bases) {
 
     ChainsResult cr = find_best_chains(
@@ -1195,13 +1180,9 @@ pair<int, vector<size_t>> find_best_chain(const VectorView<Anchor>& to_chain,
         graph,
         gap_open,
         gap_extension,
-        recomb_penalty,
+        scheme,
         1,
         for_each_transition,
-        item_bonus,
-        item_scale,
-        gap_scale,
-        points_per_possible_match,
         max_indel_bases
     );
     return cr.chains.front().scored_chain;

diff --git a/src/algorithms/chain_items.hpp b/src/algorithms/chain_items.hpp
@@ -382,6 +382,7 @@ ostream& operator<<(ostream& out, const TracedScore& value);
  */
 void sort_anchor_indexes(const std::vector<Anchor>& items, std::vector<size_t>& indexes);
 
+/// Represents a possible transition between anchors
 struct transition_info {
     // Index of the source anchor within the list of anchors
     size_t from_anchor;
@@ -392,14 +393,37 @@ struct transition_info {
     // Distance between anchors in the read
     size_t read_distance;
 
-    // Constructor; read_distance defaults to max if not given
+    /// Build transition info from loose values
     inline transition_info(size_t from, size_t to, size_t graph_dist, size_t read_dist = std::numeric_limits<size_t>::max())
         : from_anchor(from), to_anchor(to), graph_distance(graph_dist), read_distance(read_dist) {}
 };
 
+
+/// Represents the scoring scheme for chains, to determine which are best.
+/// Doesn't cover the parameters that really belong to an alignment scoring
+/// scheme (like gap open and extend).
+struct ChainScoringScheme {
+    /// Score bonus for each item collected
+    int item_bonus = 0;
+    /// Scale to apply to each item's own score
+    double item_scale = 1.0;
+    /// Scale to apply to the scores of gaps
+    double gap_scale = 1.0;
+    /// Add this many points per potential match between two seeds
+    double points_per_possible_match = 0;
+    /// Penalize this many points per recombination
+    int recombination_penalty = 0;
+    /// Apply a bonus during alternative selection (but not to actual DP
+    /// scores) of this many points when matching haplotype paths are
+    /// preserved, scaled by fraction of haplotypes preserved.
+    int consistency_bonus = 0;
+};
+
 /// A single chain result: scored chain plus the recombination count observed
 /// on its endpoint.
+/// TODO: Is there a better name for the abstraction this is getting at?
 struct ChainWithRec {
+    // TODO: Shouldn't we split this into 2 fields?
     std::pair<int, std::vector<size_t>> scored_chain;
     // Positions (anchor indices) in the chain that introduce a recombination
     // event between anchors. These correspond to anchors where we had to
@@ -418,6 +442,8 @@ struct ChainWithRec {
 
 /// Result of finding best chains: a list of chains each paired with the
 /// recombination count observed at that chain's endpoint.
+/// TODO: Can we get rid of this once we're sure it won't need more fields?
+/// TODO: Is there a better name for this?
 struct ChainsResult {
     std::vector<ChainWithRec> chains;
 };
@@ -525,8 +551,7 @@ std::vector<transition_info> calculate_transition_read_distances(
  *
  * Input items must be sorted by start position in the read.
  *
- * Takes the given per-item bonus for each item collected, and scales item
- * scores by the given scale.
+ * Uses the given scoring scheme to score chains.
  *
  * Uses a transition iterator to enumerate where we can come from to reach an
  * item. 
@@ -538,15 +563,12 @@ TracedScore chain_items_dp(vector<TracedScore>& chain_scores,
                            const VectorView<Anchor>& to_chain,
                            const SnarlDistanceIndex& distance_index,
                            const HandleGraph& graph,
+                           // TODO: We should maybe just take an EditAlignmentScorer here.
                            int gap_open,
                            int gap_extension,
+                           const ChainScoringScheme& scheme = ChainScoringScheme(),
                            const transition_iterator& for_each_transition = lookback_transition_iterator(150, 0, 100),
-                           int item_bonus = 0,
-                           double item_scale = 1.0,
-                           double gap_scale = 1.0,
-                           double points_per_possible_match = 0,
                            size_t max_indel_bases = 100,
-                           int recomb_penalty = 0,
                            bool show_work = false
                         );
 
@@ -567,8 +589,7 @@ TracedScore chain_items_dp(vector<TracedScore>& chain_scores,
 vector<pair<vector<size_t>, int>> chain_items_traceback(const vector<TracedScore>& chain_scores,
                                                         const VectorView<Anchor>& to_chain,
                                                         const TracedScore& best_past_ending_score_ever,
-                                                        int item_bonus = 0,
-                                                        double item_scale = 1.0,
+                                                        const ChainScoringScheme& scheme = ChainScoringScheme(),
                                                         size_t max_tracebacks = 1);
 
 
@@ -586,13 +607,9 @@ ChainsResult find_best_chains(const VectorView<Anchor>& to_chain,
                                                    const HandleGraph& graph,
                                                    int gap_open,
                                                    int gap_extension,
-                                                   int recomb_penalty = 0,
+                                                   const ChainScoringScheme& scheme = ChainScoringScheme(),
                                                    size_t max_chains = 1,
                                                    const transition_iterator& for_each_transition = lookback_transition_iterator(150, 0, 100), 
-                                                   int item_bonus = 0,
-                                                   double item_scale = 1.0,
-                                                   double gap_scale = 1.0,
-                                                   double points_per_possible_match = 0,
                                                    size_t max_indel_bases = 100,
                                                    bool show_work = false);
 
@@ -610,12 +627,8 @@ pair<int, vector<size_t>> find_best_chain(const VectorView<Anchor>& to_chain,
                                           const HandleGraph& graph,
                                           int gap_open,
                                           int gap_extension,
-                                          int recomb_penalty = 0,
+                                          const ChainScoringScheme& scheme = ChainScoringScheme(),
                                           const transition_iterator& for_each_transition = lookback_transition_iterator(150, 0, 100),
-                                          int item_bonus = 0,
-                                          double item_scale = 1.0,
-                                          double gap_scale = 1.0,
-                                          double points_per_possible_match = 0,
                                           size_t max_indel_bases = 100);
 
 /**
+8 −4		include/gbwtgraph/index.h
+6 −0		include/gbwtgraph/internal.h
+3 −0		include/gbwtgraph/utils.h
+3 −0		src/index.cpp
+1 −1		src/path_cover.cpp
+2 −0		src/utils.cpp
+2 −0		tests/test_index.cpp