From c48617d8af957f3a39f19c57e5fc5f807c56d311 Mon Sep 17 00:00:00 2001 From: Zia Truong <194475824+electricEpilith@users.noreply.github.com> Date: Tue, 9 Dec 2025 12:54:05 -0800 Subject: [PATCH 01/75] add hublabel build and query code, get tests to build --- CMakeLists.txt | 5 +- bdsg/include/bdsg/ch.hpp | 70 ++ bdsg/include/bdsg/hublabel.hpp | 65 ++ bdsg/include/bdsg/landmark.hpp | 245 ++++++ bdsg/include/bdsg/snarl_distance_index.hpp | 17 +- bdsg/src/ch.cpp | 887 +++++++++++++++++++++ bdsg/src/hublabel.cpp | 394 +++++++++ bdsg/src/landmark.cpp | 501 ++++++++++++ bdsg/src/snarl_distance_index.cpp | 98 +-- bdsg/src/test_libbdsg.cpp | 33 +- 10 files changed, 2260 insertions(+), 55 deletions(-) create mode 100644 bdsg/include/bdsg/ch.hpp create mode 100644 bdsg/include/bdsg/hublabel.hpp create mode 100644 bdsg/include/bdsg/landmark.hpp create mode 100644 bdsg/src/ch.cpp create mode 100644 bdsg/src/hublabel.cpp create mode 100644 bdsg/src/landmark.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index ed36e21e..45998cbc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,8 +18,8 @@ OPTION(USE_INSTALLED_LIBHANDLEGRAPH "Use the version of libhandlegraph installed # TODO: We can only do out-of-source builds! # TODO: How do we error out meaningfully on in-source builds? -# We build using c++14 -set(CMAKE_CXX_STANDARD 14) +# We build using c++20 +set(CMAKE_CXX_STANDARD 20) # We need library paths to be relative in the build directories so we can let # the libraries in our Python module find each other when we package them into # a wheel. This only works on CMake 3.14+; older CMake we have to bully with @@ -306,6 +306,7 @@ add_library(bdsg_objs OBJECT ${bdsg_DIR}/src/strand_split_overlay.cpp ${bdsg_DIR}/src/utility.cpp ${bdsg_DIR}/src/vectorizable_overlays.cpp + ${bdsg_DIR}/src/ch.cpp ${bdsg_DIR}/src/snarl_distance_index.cpp ) diff --git a/bdsg/include/bdsg/ch.hpp b/bdsg/include/bdsg/ch.hpp new file mode 100644 index 00000000..cc8668cd --- /dev/null +++ b/bdsg/include/bdsg/ch.hpp @@ -0,0 +1,70 @@ +/* +file for quickly playing around with stuff +*/ +#include "landmark.hpp" +#include "hublabel.hpp" + +#include +#include +#include +#include +#include + +namespace bdsg { +NODE_UINT bgid(const handle_t& h, bdsg::HashGraph& hg); + +NODE_UINT rev_bgid(NODE_UINT n); + +typedef struct NodeProp { + DIST_UINT seqlen; + DIST_UINT max_out = 0; + NODE_UINT contracted_neighbors = 0; + NODE_UINT level = 0; + NODE_UINT arc_cover = 1; + bool contracted = false; + NODE_UINT new_id; +} NodeProp; + +typedef struct EdgeProp { + bool contracted = false; + DIST_UINT weight = 0; + NODE_UINT arc_cover = 1; + bool ori = true; +} EdgeProp; + +typedef boost::adjacency_list CHOverlay; +typedef boost::filtered_graph> ContractedGraph; + +CHOverlay make_boost_graph(bdsg::HashGraph& hg); +CHOverlay make_boost_graph(bdsg::SnarlDistanceIndex::TemporaryDistanceIndex& temp_index, pair& snarl_index, SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record, vector>& all_children, const HandleGraph* graph); + +int edge_diff(ContractedGraph::vertex_descriptor nid, ContractedGraph& ch, CHOverlay& ov, vector& node_dists, int hop_limit); + +void contract(CHOverlay::vertex_descriptor nid, ContractedGraph& ch, CHOverlay& ov, vector& node_dists, vector& shouldnt_contract, int hop_limit); + +void make_contraction_hierarchy(CHOverlay& ov); + + +DIST_UINT binary_intersection_ch(vector& v1, vector& v2); + +DIST_UINT hhl_query(size_t rank1, size_t rank2, std::function reader); + +void down_dijk(int node, CHOverlay& ov, vector& node_dists, vector>& labels, vector>& labels_rev); + +void down_dijk_rev(int node, CHOverlay& ov, vector& node_dists, vector>& labels, vector>& labels_rev); + +void test_dijk(int node, CHOverlay& ov, vector& node_dists, vector>& labels, vector>& labels_rev); + +void test_dijk_rev(int node, CHOverlay& ov, vector& node_dists, vector>& labels, vector>& labels_rev); + +void create_labels(vector>& labels, vector>& labels_rev, CHOverlay& ov); + +vector pack_labels(const vector>& labels, const vector>& labels_back); + +//not necessary stuff +void write_to_csv(CHOverlay& ov, string out_path); + +void write_to_gr(CHOverlay& ov, string out_path); + +vector read_node_order(string in_path); +} diff --git a/bdsg/include/bdsg/hublabel.hpp b/bdsg/include/bdsg/hublabel.hpp new file mode 100644 index 00000000..885f370e --- /dev/null +++ b/bdsg/include/bdsg/hublabel.hpp @@ -0,0 +1,65 @@ +/* +header file for hub labeling stuff +*/ +#include "landmark.hpp" +#include + +namespace bdsg { + +typedef unordered_map NsDistMap; +typedef struct HubRecord { + NodeId hub {}; + DIST_UINT dist {}; + + HubRecord() : hub{0}, dist{INF_INT} {} + HubRecord(NodeId hid, DIST_UINT min_dist) : hub{hid}, dist{min_dist} {} + + auto operator<=>(const HubRecord& r2) const { + return hub <=> r2.hub; + } + + auto operator<=>(const NodeId& n) const { + return hub <=> n; + } + //based off https://uscilab.github.io/cereal/serialization_functions.html + template + void serialize(Archive& a) { + a(hub, dist); + } +} HubRecord; + +typedef pair,vector> HubRecsPair; +typedef uint32_t NODE_UINT; + + +//first vec stores hub nodes whose left ns the label's ns reaches +//second vec stores hub nodes whose right ns the label's ns reaches +typedef pair,vector> HubsPair; +typedef pair Label; + +typedef tuple QueueObj2; +bool pqcomp2(const QueueObj2& o1, const QueueObj2& o2); +typedef std::priority_queue, function> PriorityQueue2; + +typedef tuple HwQueueObj; +bool pqcomp_hw(const HwQueueObj& o1, const HwQueueObj& o2); +typedef std::priority_queue, function> HwPriorityQueue; + +bool label_contains(HubRecsPair& label, NodesideId query); + +DIST_UINT get_label_dist(HubRecsPair& label, NodesideId query); + +DIST_UINT hl_build_intersect(HubRecsPair& l1, HubRecsPair& l2, vector& node_lens); + +DIST_UINT get_degree(NodeId node, bdsg::HashGraph& g); + +void pruned_dijkstra(bdsg::HashGraph& g, NodesideId start, vector& labels, vector& path_lengths, vector& rank_to_ns, vector& ns_to_rank, vector& rank_node_lens,int stop_dist = INF_INT); + +DIST_UINT hl_query(NodesideId i, NodesideId j, vector& labels, bdsg::HashGraph& g); +DIST_UINT hl_query(NodesideId i, NodesideId j, vector& labels, vector& rank_node_lens); + +vector make_labels(bdsg::HashGraph& g); + + + +} diff --git a/bdsg/include/bdsg/landmark.hpp b/bdsg/include/bdsg/landmark.hpp new file mode 100644 index 00000000..06bf7c0c --- /dev/null +++ b/bdsg/include/bdsg/landmark.hpp @@ -0,0 +1,245 @@ +#ifndef LANDMARK_HPP +#define LANDMARK_HPP + +#include +#include +#include +#include +#include +#include +//#include +//#include + +using namespace std; +namespace bdsg { +//inf implementation is largest possible int +#define INF_INT numeric_limits::max() +#define DIST_NBITS 32 +#define DIST_UINT uint32_t +#define ARR2D_OFFSET 1 + +typedef int NodeId; +typedef int NodesideId; +typedef enum EnterDir {OTHER_NODESIDE=0,OTHER_NODE=1} EnterDir; +typedef boost::multi_array Array2D; +typedef unordered_map Ball; +class SdslArray2D { +private: + vector> arr2d; + uint8_t offset = ARR2D_OFFSET; + static DIST_UINT get_inf() { + bitset uint_bits; + uint_bits.reset().flip(DIST_NBITS - 1); + return static_cast(uint_bits.to_ulong()); + } + DIST_UINT Inf_UInt = 0; + DIST_UINT row_count = 0; + DIST_UINT col_count = 0; + static int toOut(DIST_UINT entry) { + if (entry == 0) { + return INF_INT; + } else { + return static_cast(entry-ARR2D_OFFSET); + } + } +public: + using size_type = DIST_UINT; + SdslArray2D(int nrow, int ncol) { + row_count = nrow; col_count = ncol; + arr2d.resize(row_count); + for (DIST_UINT i = 0; i < row_count; i++) { + sdsl::int_vector sdsl_row(col_count, Inf_UInt, DIST_NBITS); + arr2d[i] = move(sdsl_row); + } + } + + SdslArray2D(Array2D& a) { + auto ashape = a.shape(); + row_count = ashape[0]; col_count = ashape[1]; + arr2d.resize(row_count); + for (DIST_UINT i = 0; i < row_count; i++) { + sdsl::int_vector sdsl_row(col_count, Inf_UInt, DIST_NBITS); + for (DIST_UINT j = 0; j < col_count; j++) { + int entry = a[i][j]; + if (entry == INF_INT) { + sdsl_row[j] = Inf_UInt; + } else { + sdsl_row[j] = static_cast(entry)+offset; + } + } + arr2d[i] = move(sdsl_row); + } + } + + sdsl::int_vector& operator[](int i) { + return arr2d[i]; + } + + DIST_UINT get(int i, int j) { + DIST_UINT entry = arr2d[i][j]; + if (entry == 0) { + return INF_INT; + } else { + return entry-offset; + } + } + + void set(int i, int j, DIST_UINT val) { + if (val == INF_INT) { + arr2d[i][j] = 0; + } else { + arr2d[i][j] = val+offset; + } + } + + size_type serialize(ostream& out, sdsl::structure_tree_node* prev_node=nullptr, string structure_name="SdslArr2d") const { + sdsl::structure_tree_node* arr_node = sdsl::structure_tree::add_child(prev_node, structure_name, sdsl::util::class_name(*this)); + size_type bytes_count = 0; + DIST_UINT nrows = arr2d.size(); + bytes_count += sdsl::write_member(nrows, out, arr_node, "nrows"); + for (auto& row: arr2d) { + bytes_count += row.serialize(out, arr_node, "arr2d_row"); + } + return bytes_count; + } + + void load(istream& in) { + DIST_UINT nrows; + sdsl::read_member(nrows, in); + arr2d.resize(nrows); + for (DIST_UINT i = 0; i < nrows; i++) { + arr2d[i].load(in); + } + row_count = nrows; col_count = arr2d[0].size(); + } + + void bit_compress() { + for (auto& row: arr2d) { + sdsl::util::bit_compress(row); + } + } + + array shape() { + array shape_arr = {row_count, col_count}; + return shape_arr; + } + + auto col_view(DIST_UINT col) { + auto get_col_entry = [col] (sdsl::int_vector& row) { + return row[col]; + }; + + auto col_itr = ranges::views::transform(ranges::views::transform(arr2d, get_col_entry), toOut); + return col_itr; + } +}; + +int addInt(int a, int b); + +/* +following functions assume input graph node ids go from 1...N +*/ + +/* +Converts a node_id to a, or both nodeside_ids +left and return_both should not both be True +*/ +//using bool for indicating side just like for direction in follow_edges +NodesideId node_to_nodeside(NodeId node_id, bool left); + +array get_node_nodesides(NodeId node_id); + +NodeId nodeside_to_node(NodesideId nodeside_id); + +tuple nodeside_to_node_tuple(NodesideId nodeside_id); + +bool nodeside_left (NodesideId nodeside_id); + +NodesideId other_nodeside(int nodeside_id); + +NodesideId handle_entry_nodeside(const handle_t& handle, HashGraph& g); + +string nodeside_string(NodesideId nodeside); + + +/// landmark selection + +/* +returns ids of nodesides with largest degree +*/ +//helper +int nodeside_degree(NodesideId ns_id, HashGraph& g); + +vector get_highest_degree_nodesides(bdsg::HashGraph& g, int top_num, int nodeside_count); + +vector get_landmark_nodes(bdsg::HashGraph& g, int top_ns_num, int nodeside_count); + +/* +Notes: +- modifies the table passed in +*/ +void fill_other_nodeside_dists(SdslArray2D& dist_table, HashGraph& g); + +/* +trying insert-Dijkstra over decrease-key dijkstra +see https://stackoverflow.com/questions/9255620/why-does-dijkstras-algorithm-use-decrease-key + +start is the id of the origin node +dijkstra goes from start node to other nodes' nodesides +*/ +typedef tuple QueueObj; +bool pqcomp1(const QueueObj& o1, const QueueObj& o2); + +typedef std::priority_queue, function> PriorityQueue; + +PriorityQueue initQueue( + int nodeside_count, + NodesideId start_ns +); + + +/* +Called when new nodeside is discovered. +If path to the nodeside is the shortest found so far: + update path_lengths and add new queue entry for it +*/ +bool discover_nodeside(NodesideId new_nodeside, EnterDir enter_direction, int new_len, vector>& path_lengths, PriorityQueue& nodeside_queue); + + +SdslArray2D dijkstra(bdsg::HashGraph& g, NodesideId start, NodesideId stop_ns = -1, int stop_dist = INF_INT, bool ball_ver = false); + +/* +notes: + - lm_nodes should not be empty +*/ +SdslArray2D get_lm2ns_dist_table(vector& lm_nodes, HashGraph& g); + +void get_closest_lm_ind(SdslArray2D& dist_table, vector& ind_vec, vector& min_dist_vec, int nodeside_count); + +/* +based off https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/msr-tr-2009-84.pdf [Chen et al. (2009)] +*/ +Ball get_ball_contents(NodesideId ns, int dist_limit, HashGraph& g); + + +vector find_balls(vector& closest_lm_dist_vec, HashGraph& g, int min_ball_size = 0); + +typedef struct OracleInfo { + vector& lm_nodes; + vector& closest_lm_inds; + vector& closest_lm_dists; + vector& balls; + SdslArray2D& lm_sides_to_ns; +} OracleInfo; + + +/* +query algorithm from Chen et al. (2009) +with modifications for our purpose +*/ + +int oracle_query(NodesideId source, NodesideId target, OracleInfo& oracle, HashGraph& g); + +} + +#endif diff --git a/bdsg/include/bdsg/snarl_distance_index.hpp b/bdsg/include/bdsg/snarl_distance_index.hpp index 5e013962..ca33bd86 100644 --- a/bdsg/include/bdsg/snarl_distance_index.hpp +++ b/bdsg/include/bdsg/snarl_distance_index.hpp @@ -1250,7 +1250,10 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab //How big is the entire snarl record? static size_t distance_vector_size(record_t type, size_t node_count); - static size_t record_size (record_t type, size_t node_count) ; + //vec_size parameter only needed for oversized snarls + //represents size of hub labeling-related data + //the value needed should be the first entry after the fixed-size record data + static size_t record_size (record_t type, size_t node_count, size_t vec_size) ; size_t record_size() ; //Get the index into the distance vector for the calculating distance between the given node sides @@ -1284,9 +1287,13 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab SnarlRecordWriter(); - SnarlRecordWriter (size_t node_count, bdsg::yomo::UniqueMappedPointer* records, record_t type); + SnarlRecordWriter (size_t node_count, bdsg::yomo::UniqueMappedPointer* records, record_t type, size_t vec_size); SnarlRecordWriter(bdsg::yomo::UniqueMappedPointer* records, size_t pointer); + //sets size of hub label flat vector (only used for oversized snarls) + //TODO: Make separate SnarlRecordWriter for oversized snarls + void set_vec_size(size_t vec_size); + void set_distance(size_t rank1, bool right_side1, size_t rank2, bool right_side2, size_t distance); void set_distance_start_start(size_t value); @@ -1456,7 +1463,7 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab */ //Add a snarl to the end of the chain and return a SnarlRecordWriter pointing to it - SnarlRecordWriter add_snarl(size_t snarl_size, record_t type, size_t previous_child_offset); + SnarlRecordWriter add_snarl(size_t snarl_size, record_t type, size_t vec_size, size_t previous_child_offset); SimpleSnarlRecordWriter add_simple_snarl(size_t snarl_size, record_t type, size_t previous_child_offset); //Add a node to the end of a chain and return the offset of the record it got added to //If new_record is true, make a new trivial snarl record for the node @@ -1634,7 +1641,9 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab unordered_set tippy_child_ranks; //The ranks of children that are tips //vector, pair, size_t>> distances; unordered_map, pair>, size_t> distances; - + //linearized hub labels (if not empty, this is an oversized snarl) + vector hub_labels; + //How long is the record going to be in the distance index? size_t get_max_record_length() const ; }; diff --git a/bdsg/src/ch.cpp b/bdsg/src/ch.cpp new file mode 100644 index 00000000..222c3341 --- /dev/null +++ b/bdsg/src/ch.cpp @@ -0,0 +1,887 @@ +/* +file for quickly playing around with stuff +*/ +#include "bdsg/ch.hpp" + +namespace bdsg { +bdsg::HashGraph make_test() { + bdsg::HashGraph g; + vector h; h.resize(13); + for (int i = 1; i <= 12; i++) { + h[i] = g.create_handle("A"); + } + g.create_edge(h[1], h[2]); g.create_edge(h[2], h[3]); + g.create_edge(h[2], h[4]); g.create_edge(h[2], h[11]); + g.create_edge(h[2], g.flip(h[7])); g.create_edge(h[3], h[5]); + g.create_edge(h[4], h[6]); g.create_edge(h[4], h[7]); + g.create_edge(h[5], h[4]); g.create_edge(h[5], h[6]); + g.create_edge(h[6], h[8]); g.create_edge(h[7], h[8]); + g.create_edge(h[7], h[9]); g.create_edge(h[7], h[10]); + g.create_edge(g.flip(h[7]), g.flip(h[10])); + g.create_edge(h[9], g.flip(h[10])); g.create_edge(h[11], h[12]); + g.create_edge(h[12], g.flip(h[1])); + return g; +} + +NODE_UINT bgid(const handle_t& h, bdsg::HashGraph& hg) { + auto nid = hg.get_id(h); + return hg.get_is_reverse(h) ? (nid-1)*2+1 : (nid-1)*2; +} + +NODE_UINT rev_bgid(NODE_UINT n) { + return n ^ 1; +} + +CHOverlay make_boost_graph(bdsg::HashGraph& hg) { + NODE_UINT node_count = hg.get_node_count(); + CHOverlay g(node_count*2); + hg.for_each_edge([&](const edge_t& edge_h) { + auto& [h1, h2] = edge_h; + auto i1 = bgid(h1, hg), i2 = bgid(h2, hg); + add_edge(i1, i2, g); + if (i1 != rev_bgid(i2)) { + add_edge(rev_bgid(i2), rev_bgid(i1), g); + } + }); + + hg.for_each_handle([&](const handle_t& h) { + auto nid = bgid(h, hg); + g[nid].seqlen = hg.get_length(h); + g[rev_bgid(nid)].seqlen = g[nid].seqlen; + }); + + + return g; +} + +CHOverlay make_boost_graph(SnarlDistanceIndex::TemporaryDistanceIndex& temp_index, pair& snarl_index, SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record, vector>& all_children, const HandleGraph* hgraph) { + CHOverlay ov(all_children.size()*4); + //maps edge destination handle to id in Boost graph + unordered_map handle_bgnid_map; + + for (size_t child_num = 0; child_num < all_children.size(); child_num++) { + auto [rec_type, rec_index] = all_children[child_num]; + if (rec_type == bdsg::SnarlDistanceIndex::TEMP_CHAIN) { + auto& record = temp_index.temp_chain_records.at(rec_index); + handle_t start_handle = hgraph->get_handle(record.start_node_id, record.start_node_rev); + handle_t end_handle = hgraph->get_handle(record.end_node_id, record.end_node_rev); + //chain representation as node ids (numbers are offsets from child_num*4) + // 1<-3 + // 0->2 + handle_bgnid_map[start_handle] = child_num*4; + handle_bgnid_map[end_handle] = child_num*4+2; + //add edges representing distance across chain + auto new_edge = add_edge(child_num*4, child_num*4+2, ov); + ov[new_edge.first].weight = record.min_length; + new_edge = add_edge(child_num*4+3, child_num*4+1, ov); + ov[new_edge.first].weight = record.min_length; + + //add looping distances (thanks Xian!) + auto& first_child = record.children.front(); + assert(first_child.first == bdsg::SnarlDistanceIndex::TEMP_NODE); + DIST_UINT start_node_length = temp_index.temp_node_records.at(first_child.second).node_length; + //record.children.front().first.node_length; + DIST_UINT start_start_distance = record.forward_loops[0] + (2*start_node_length); + DIST_UINT end_end_distance = record.backward_loops.back() + (2*record.end_node_length); + //loops are edges between different orientations of the same node + auto new_loop_edge = add_edge(child_num*4+2, child_num*4+3, ov); + ov[new_loop_edge.first].weight = end_end_distance; + new_loop_edge = add_edge(child_num*4+1, child_num*4, ov); + ov[new_loop_edge.first].weight = start_start_distance; + + } else if (rec_type == bdsg::SnarlDistanceIndex::TEMP_NODE) { + auto& record = temp_index.temp_node_records.at(rec_index); + handle_t node_handle = hgraph->get_handle(record.node_id, record.reversed_in_parent); + ov[child_num*4].seqlen = record.node_length;//hgraph->get_length(node_handle); + } else { + //uh oh + cerr << "unexpected rec_type" << endl; + } + } + + //add edges between Boost graph nodes of different temp chains / temp nodes + for (size_t child_num = 0; child_num < all_children.size(); child_num++) { + auto [rec_type, rec_index] = all_children[child_num]; + if (rec_type == bdsg::SnarlDistanceIndex::TEMP_CHAIN) { + auto& record = temp_index.temp_chain_records.at(rec_index); + const handle_t start_handle = hgraph->get_handle(record.start_node_id, record.start_node_rev); + const handle_t end_handle = hgraph->get_handle(record.end_node_id, record.end_node_rev); + auto start_id = handle_bgnid_map[start_handle]; + auto end_id = handle_bgnid_map[end_handle]; + //traverse edges going out of start and end nodes of the chain (thanks Xian!) + hgraph->follow_edges(start_handle, false, [&] (const handle_t& next) { + if (!handle_bgnid_map.contains(next)) { return; } + const auto next_id = handle_bgnid_map[next]; + //pair of edge_descriptor and bool of it exists or not + auto edge_info = edge(start_id, next_id, ov); + if (!edge_info.second) { + add_edge(start_id, next_id, ov); + add_edge(rev_bgid(next_id), rev_bgid(start_id), ov); + } + }); + + hgraph->follow_edges(end_handle, false, [&] (const handle_t& next) { + if (!handle_bgnid_map.contains(next)) { return; } + const auto next_id = handle_bgnid_map[next]; + //pair of edge_descriptor and bool of it exists or not + auto edge_info = edge(end_id, next_id, ov); + if (!edge_info.second) { + add_edge(end_id, next_id, ov); + add_edge(rev_bgid(next_id), rev_bgid(end_id), ov); + } + }); + } else { + if (rec_type == bdsg::SnarlDistanceIndex::TEMP_NODE) { + auto& record = temp_index.temp_node_records.at(rec_index); + handle_t node_handle = hgraph->get_handle(record.node_id, record.reversed_in_parent); + const auto node_id = handle_bgnid_map[node_handle]; + for (bool direction: {true, false}) { + hgraph->follow_edges(node_handle, direction, [&] (const handle_t& next) { + if (!handle_bgnid_map.contains(next)) { return; } + const auto next_id = handle_bgnid_map[next]; + //pair of edge_descriptor and bool of it exists or not + auto edge_info = edge(node_id, next_id, ov); + if (!edge_info.second) { + add_edge(node_id, next_id, ov); + add_edge(rev_bgid(next_id), rev_bgid(node_id), ov); + } + }); + } + + } + } + } + return ov; +} + + +int edge_diff(ContractedGraph::vertex_descriptor nid, ContractedGraph& ch, CHOverlay& ov, vector& node_dists, int hop_limit = 2) { + //using namespace boost; + auto [out_start, out_end] = out_edges(nid, ch); + auto [in_start, in_end] = in_edges(nid, ch); + + ov[nid].arc_cover = 0; + int eadd = 0; + //thanks https://theboostcpplibraries.com/boost.graph-vertices-and-edges for iteration code + std::for_each(in_start, in_end, [&](ContractedGraph::edge_descriptor eid) { + auto in_node = source(eid, ch); + DIST_UINT in_w = ch[eid].weight; + DIST_UINT stop_dist = in_w + ov[nid].seqlen + ov[nid].max_out; + + std::priority_queue, vector>, greater>> q; + auto [_, __] = out_edges(in_node, ch); + std::for_each(_, __, [&](auto edge) { q.emplace(ch[edge].weight, target(edge, ch)); }); + int num_iter = 0; + vector to_reset; + //five hops limit idea from https://turing.iem.thm.de/routeplanning/hwy/contract.pdf + while ((!q.empty()) && (num_iter < hop_limit)) { + auto [cur_dist, cur_node] = q.top(); + to_reset.push_back(cur_node); + if (cur_dist > stop_dist) { break; } + q.pop(); + + std::tie(_, __) = out_edges(cur_node, ch); + std::for_each(_, __, [&](auto edge) { + DIST_UINT new_dist = ch[edge].weight + cur_dist + ov[cur_node].seqlen; + auto t = target(edge,ch); + if (new_dist < node_dists[t]) { + q.emplace(new_dist, t); + node_dists[t] = new_dist; + } + }); + num_iter += 1; + } + + std::for_each(out_start, out_end, [&](ContractedGraph::edge_descriptor eid2) { + if (in_w+ch[nid].seqlen+ch[eid2].weight <= node_dists[target(eid2, ch)]) { + eadd += 1; + ov[nid].arc_cover += (ov[eid].arc_cover + ov[eid2].arc_cover); + } + + }); + + for (auto n: to_reset) { node_dists[n] = INF_INT; } + + while (!q.empty()) { node_dists[get<1>(q.top())] = INF_INT; q.pop(); } + }); + + int edel = out_degree(nid, ch) + in_degree(nid, ch); + int ediff = eadd - edel; + + return ediff; +} + +void contract(CHOverlay::vertex_descriptor nid, ContractedGraph& ch, CHOverlay& ov, vector& node_dists, vector& shouldnt_contract, int hop_limit = 2) { + auto [out_start, out_end] = out_edges(nid, ch); + auto [in_start, in_end] = in_edges(nid, ch); + + //thanks https://theboostcpplibraries.com/boost.graph-vertices-and-edges for iteration code + std::for_each(in_start, in_end, [&](ContractedGraph::edge_descriptor eid) { + auto in_node = source(eid, ch); + if (in_node == nid) { return; } + DIST_UINT in_w = ch[eid].weight; + DIST_UINT stop_dist = in_w + ov[nid].seqlen + ov[nid].max_out; + + + std::priority_queue, vector>, greater>> q; + auto [_, __] = out_edges(in_node, ch); + std::for_each(_, __, [&](auto edge) { + q.emplace(ch[edge].weight, target(edge, ch)); + }); + + int num_iter = 0; + vector to_reset; + while ((!q.empty()) && (num_iter < hop_limit)) { + auto [cur_dist, cur_node] = q.top(); + to_reset.push_back(cur_node); + if (cur_dist > stop_dist) { break; } + q.pop(); + + std::tie(_, __) = out_edges(cur_node, ch); + std::for_each(_, __, [&](auto edge) { + DIST_UINT new_dist = ch[edge].weight + cur_dist + ov[cur_node].seqlen; + auto t = target(edge,ch); + if (new_dist < node_dists[t]) { + q.emplace(new_dist, t); + node_dists[t] = new_dist; + } + }); + + num_iter += 1; + } + + std::for_each(out_start, out_end, [&](ContractedGraph::edge_descriptor out_e) { + DIST_UINT new_len = in_w + ch[nid].seqlen + ch[out_e].weight; + auto out_node = target(out_e, ch); + if (new_len <= node_dists[out_node] && (out_node != nid)) { + auto edge_info = edge(in_node, out_node, ov); + if (!edge_info.second){ + auto new_edge = add_edge(in_node, out_node, ov).first; + ov[new_edge].weight = new_len; + ov[new_edge].arc_cover = ov[out_e].arc_cover + ov[eid].arc_cover; + ov[new_edge].ori = false; + ov[in_node].max_out = max(ov[in_node].max_out, new_len); + } else { + if (new_len < ov[edge_info.first].weight) { + ov[edge_info.first].weight = new_len; + ov[edge_info.first].arc_cover = ov[out_e].arc_cover + ov[eid].arc_cover; + ov[in_node].max_out = max(ov[in_node].max_out, new_len); + + } + ov[edge_info.first].contracted = false; + } + } + }); + + for (auto n: to_reset) { node_dists[n] = INF_INT; } + + while (!q.empty()) { node_dists[get<1>(q.top())] = INF_INT; q.pop(); } + }); + + //update contracted neighbor counts + std::for_each(in_start, in_end, [&](ContractedGraph::edge_descriptor eid) { + auto in_node = source(eid, ch); + ov[in_node].contracted_neighbors += 1; + ov[in_node].level = max(ov[in_node].level, ov[nid].level); + ch[eid].contracted = true; + shouldnt_contract[in_node] = true; + }); + + //std::tie(out_start, out_end) = out_edges(nid, ch); + std::for_each(out_start, out_end, [&](ContractedGraph::edge_descriptor eid) { + auto out_node = target(eid, ch); + ov[out_node].contracted_neighbors += 1; + ov[out_node].level = max(ov[out_node].level, ov[nid].level); + ch[eid].contracted = true; + shouldnt_contract[out_node] = true; + }); + + ov[nid].contracted = true; +} + +int get_hop_limit(CHOverlay& ov) { + //staggered hop limit idea from https://www.microsoft.com/en-us/research/wp-content/uploads/2011/05/hl-sea.pdf + int l = 1; + double deg = (double)num_edges(ov)/num_vertices(ov); + if (deg >= 1) { + l = 1; + } + + if (deg >= 2) { + l = 1; + } + + if (deg >= 3) { + l = 2; + } + + if (deg >= 5) { + l = 3; + } + + if (deg >= 7) { + l = 5; + } + return l; +} + +void make_contraction_hierarchy(CHOverlay& ov) { + cerr << "starting degree: " << (double)num_edges(ov)/num_vertices(ov) << endl; + + //thanks https://stackoverflow.com/questions/53490593/boostget-with-boostfiltered-graph-on-adjacency-list-with-netsed-properties for filtered_graph code + auto contracted_filter = [&](CHOverlay::edge_descriptor eid) { return !(ov[eid].contracted); }; + + ContractedGraph contracted_g(ov, contracted_filter); + vector skip(num_vertices(ov), false); + int num_con = 0; + + vector node_dists(num_vertices(ov), INF_INT); + + vector nodes; nodes.resize(num_vertices(ov)); + iota(nodes.begin(), nodes.end(), 0); + + for (int rnd = 0; rnd < 1; rnd++) { + std::fill(skip.begin(), skip.end(), false); + + //for (NODE_UINT i = 0; i < num_vertices(ov); i+=1) { + for (auto i: nodes) { + if (ov[i].contracted || skip[i]) { continue; } + int edif = edge_diff(i, contracted_g, ov, node_dists, 250); + + int min_pri = INF_INT; + + auto [out_start, out_end] = out_edges(i, contracted_g); + auto [in_start, in_end] = in_edges(i, contracted_g); + std::for_each(out_start, out_end, [&] (auto out_edge) { + auto neigh = target(out_edge, ov); + if (skip[neigh]) {return;} + int neigh_edif = edge_diff(neigh, contracted_g, ov, node_dists, 250); + + int neigh_pri = (2*neigh_edif) + (1*ov[neigh].contracted_neighbors) + (5*(ov[neigh].level+1)) + ov[neigh].arc_cover; + + if (neigh_pri < min_pri) { min_pri = neigh_pri; } + + }); + + std::for_each(in_start, in_end, [&] (auto in_edge) { + auto neigh = source(in_edge, ov); + if (skip[neigh]) {return;} + int neigh_edif = edge_diff(neigh, contracted_g, ov, node_dists, 250); + + int neigh_pri = (2*neigh_edif) + (1*ov[neigh].contracted_neighbors) + (5*(ov[neigh].level+1)) + ov[neigh].arc_cover; + + if (neigh_pri < min_pri) { min_pri = neigh_pri; } + + }); + + int priority = (2*edif) + (1*ov[i].contracted_neighbors) + (5*(ov[i].level+1)) + ov[i].arc_cover; + + if ((priority <= min_pri)) { + + std::for_each(out_start, out_end, [&] (auto out_edge) { + auto neigh = target(out_edge, ov); + skip[neigh] = true; + }); + + std::for_each(in_start, in_end, [&] (auto in_edge) { + auto neigh = source(in_edge, ov); + skip[neigh] = true; + }); + //} + + } else { + skip[i] = true; + } + } + + for (auto i = 0u; i < num_vertices(ov); i+=1) { + + if (ov[i].contracted) { continue; } + if ((!skip[i])) { + ov[i].new_id = num_vertices(ov)-1-num_con; + contract(i, contracted_g, ov, node_dists, skip, 250); + skip[i] = true; + num_con += 1; + } + } + + cerr << "num contr: " << num_con << endl; + cerr << "after round " << rnd+1 << " degree: " << (double)num_edges(ov)/num_vertices(ov) << endl; + } + + + cerr << "left over: " << num_vertices(ov) - num_con << endl; + //std::fill(skip.begin(), skip.end(), false); + //for (auto n: arti_pts) { skip[n] = true; } + + vector> queue_objs; queue_objs.reserve(num_vertices(ov)/2); + for (int i = 0; i < num_vertices(ov); i+=1) { + if (ov[i].contracted) { continue; } + + int edif = edge_diff(i, contracted_g, ov, node_dists, 1000); + + //priority formula based off that given in https://www.microsoft.com/en-us/research/wp-content/uploads/2011/05/hl-sea.pdf + int priority = (2*edif) + (1*ov[i].contracted_neighbors) + (5*(ov[i].level+1)) + ov[i].arc_cover; + queue_objs.emplace_back(priority, i); + } + make_heap(queue_objs.begin(), queue_objs.end(), greater>()); + pop_heap(queue_objs.begin(), queue_objs.end(), greater>()); + //size_t init_qsize = queue_objs.size(); + + while (queue_objs.size() > 4) { + auto [pri, node] = queue_objs.back(); queue_objs.pop_back(); + //preparing for next pop + pop_heap(queue_objs.begin(), queue_objs.end(), greater>()); + + int hop_limit = 1000;//get_hop_limit(ov); + int edif = edge_diff(node, contracted_g, ov, node_dists, hop_limit); + + int new_pri = ((2*edif)+ (1*ov[node].contracted_neighbors)) + (5*(ov[node].level+1)) + ov[node].arc_cover; + + if (new_pri > get<0>(queue_objs.back())) { + queue_objs.emplace_back(new_pri, node); + push_heap(queue_objs.begin(), queue_objs.end(), greater>()); + pop_heap(queue_objs.begin(), queue_objs.end(), greater>()); + continue; + } + ov[node].level += 1; + if (queue_objs.size() % 100 == 1) { + cerr << "remaining: " << queue_objs.size() << ", deg: " << (double)num_edges(ov)/num_vertices(ov) << endl; + cerr << "lv: " << ov[node].level << endl; + } + + + ov[node].new_id = num_vertices(ov)-1-num_con; + contract(node, contracted_g, ov, node_dists, skip, hop_limit); num_con += 1; + } + + while (!queue_objs.empty()) { + auto [pri, node] = queue_objs.back(); queue_objs.pop_back(); + //preparing for next pop + pop_heap(queue_objs.begin(), queue_objs.end(), greater>()); + + + //cerr << "in out: " << in_degree(node, ov) << " " << out_degree(node, ov) << endl; + ov[node].new_id = num_vertices(ov)-1-num_con; + //contract(node, contracted_g, ov, node_dists, skip, 50); + num_con += 1; + } + + auto ori_filter = [&](CHOverlay::edge_descriptor eid) { return !(ov[eid].ori); }; + remove_edge_if(ori_filter, ov); + + vector> v2; v2.reserve(num_vertices(ov)); + for (auto i = 0u; i < num_vertices(ov); i+=1) { + v2.emplace_back(in_degree(i,ov)*out_degree(i,ov), i); + } + sort(v2.rbegin(), v2.rend()); + for (int i: {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) { + auto [p, n] = v2[i]; + cerr << n << " (" << ov[n].new_id << "): " << in_degree(n,ov) << " " << out_degree(n,ov) << endl; + } +} + +DIST_UINT binary_intersection_ch(vector& v1, vector& v2) { + vector& key_vec = v1.size() < v2.size() ? v1 : v2; + vector& search_vec = v1.size() < v2.size() ? v2 : v1; + + auto start_itr = search_vec.begin(); + auto end_itr = search_vec.end(); + DIST_UINT min_dist = INF_INT; + for (auto k: key_vec) { + start_itr = lower_bound(start_itr, end_itr, k); + if (start_itr == end_itr) { + return min_dist; + } + if (start_itr->hub == k.hub) { + DIST_UINT d = start_itr->dist + k.dist; + min_dist = min(min_dist, d); + } + } + return min_dist; +} + +template +ItrType get_dist_itr(ItrType start_itr, ItrType hub_itr) { + auto node_count = *start_itr; + auto last_fwd_end_bound_itr = next(start_itr, 1+node_count); + if (hub_itr >= next(start_itr, *last_fwd_end_bound_itr)) { + //backwards label + auto first_back_bound_itr = next(start_itr, 1+node_count+1); + auto last_back_bound_itr = next(start_itr, 1+node_count+1+node_count); + auto jump_to_dist = (*last_back_bound_itr) - *first_back_bound_itr; + return next(hub_itr, jump_to_dist); + } else { + //forwards label + auto first_fwd_bound_itr = next(start_itr, 1); + auto last_fwd_bound_itr = next(start_itr, 1+node_count); + auto jump_to_dist = (*last_fwd_bound_itr) - *first_fwd_bound_itr; + return next(hub_itr, jump_to_dist); + } +} + +/* +start_bound_index variables are relative to start_offset +*/ +template +DIST_UINT binary_intersection_ch(const VecType& storage, size_t start_offset, size_t v1_start_bound_index, size_t v2_start_bound_index) { + auto start_itr = next(storage.begin(), start_offset); + auto v1_start_bound_itr = next(start_itr, v1_start_bound_index); + auto v1_end_bound_itr = next(v1_start_bound_itr, 1); + auto v2_start_bound_itr = next(start_itr, v2_start_bound_index); + auto v2_end_bound_itr = next(v2_start_bound_itr, 1); + + auto v1_start_itr = next(start_itr, *v1_start_bound_itr); + auto v1_end_itr = next(start_itr, *v1_end_bound_itr); + auto v2_start_itr = next(start_itr, *v2_start_bound_itr); + auto v2_end_itr = next(start_itr, *v2_end_bound_itr); + auto v1_range = ranges::subrange(v1_start_itr, v1_end_itr); + auto v2_range = ranges::subrange(v2_start_itr, v2_end_itr); + + auto& key_vec = v1_range.size() < v2_range.size() ? v1_range : v2_range; + auto& search_vec = v1_range.size() < v2_range.size() ? v2_range : v1_range; + + auto search_start_itr = search_vec.begin(); + auto search_end_itr = search_vec.end(); + DIST_UINT min_dist = INF_INT; + for (auto k: key_vec) { + search_start_itr = lower_bound(search_start_itr, search_end_itr, k); + if (search_start_itr == search_end_itr) { + return min_dist; + } + if (*search_start_itr == k) { + DIST_UINT d = + min_dist = min(min_dist, d); + } + } + return min_dist; +} + +DIST_UINT hhl_query(size_t rank1, size_t rank2, std::function reader) { + //reader gets value at index + auto start_index_1 = reader(rank1+1); + auto start_index_2 = reader(rank2+1); + + return 1; +} + +void down_dijk(int node, CHOverlay& ov, vector& node_dists, vector>& labels, vector>& labels_back) { + auto in_node = node; + + labels_back[node].emplace_back(ov[node].new_id, -ov[node].seqlen); + std::priority_queue, vector>, greater>> q; + + auto [_, __] = out_edges(in_node, ov); + std::for_each(_, __, [&](auto edge) { + auto t = target(edge, ov); + + if (!ov[edge].ori) { return; } + q.emplace(ov[edge].weight, t); + node_dists[t] = ov[edge].weight; + }); + + vector to_reset; + + while ((!q.empty())) { + auto [cur_dist, cur_node] = q.top(); q.pop(); + to_reset.push_back(cur_node); + + if (node_dists[cur_node] < cur_dist) { continue; } + DIST_UINT check_dist = binary_intersection_ch(labels_back[cur_node], labels[node]); + + if (check_dist <= cur_dist) {continue;} + labels_back[cur_node].emplace_back(ov[node].new_id, cur_dist); + + + std::tie(_, __) = out_edges(cur_node, ov); + std::for_each(_, __, [&](auto edge) { + auto t = target(edge, ov); + + if (!ov[edge].ori) { return; } + DIST_UINT new_dist = ov[edge].weight + cur_dist + ov[cur_node].seqlen; + if (new_dist < node_dists[t]) { + q.emplace(new_dist, t); + node_dists[t] = new_dist; + } + }); + } + + node_dists[node] = INF_INT; + for (auto n: to_reset) { node_dists[n] = INF_INT; } + + while (!q.empty()) { node_dists[get<1>(q.top())] = INF_INT; q.pop(); } +} + +void down_dijk_back(int node, CHOverlay& ov, vector& node_dists, vector>& labels, vector>& labels_back) { + auto in_node = node; + labels[node].emplace_back(ov[node].new_id, 0); + node_dists[node] = 0; + + //std::priority_queue, vector>, greater>> q; + vector> q; if (ov[node].new_id < 100) { q.reserve(num_vertices(ov)/2); } + + auto [_, __] = in_edges(in_node, ov); + std::for_each(_, __, [&](auto edge) { + auto s = source(edge, ov); + + if (!ov[edge].ori) { return; } + q.emplace_back(ov[edge].weight, s); + node_dists[s] = ov[edge].weight; + }); + make_heap(q.begin(), q.end(), greater>()); + + vector to_reset; if (ov[node].new_id < 100) { to_reset.reserve(num_vertices(ov)/2); } + while ((!q.empty())) { + pop_heap(q.begin(), q.end(), greater>()); + auto [cur_dist, cur_node] = q.back(); q.pop_back(); + + to_reset.push_back(cur_node); + if (node_dists[cur_node] < cur_dist) { + continue; + } + + DIST_UINT check_dist = binary_intersection_ch(labels[cur_node], labels_back[node]); + if (check_dist <= cur_dist) {continue;} + labels[cur_node].emplace_back(ov[node].new_id, cur_dist+ov[node].seqlen); + + std::tie(_, __) = in_edges(cur_node, ov); + std::for_each(_, __, [&](auto edge) { + auto t = source(edge, ov); + + if (!ov[edge].ori) { return; } + DIST_UINT new_dist = ov[edge].weight + cur_dist + ov[cur_node].seqlen; + if (new_dist < node_dists[t]) { + q.emplace_back(new_dist, t); + push_heap(q.begin(), q.end(), greater>()); + node_dists[t] = new_dist; + } + }); + + } + + node_dists[node] = INF_INT; + for (auto n: to_reset) { node_dists[n] = INF_INT; } + + for (auto& t: q) { node_dists[get<1>(t)] = INF_INT; } + q.clear(); +} + +void test_dijk(int node, CHOverlay& ov, vector& node_dists, vector>& labels, vector>& labels_back) { + auto in_node = node; //node_dists[node] = 0; + + std::priority_queue, vector>, greater>> q; + auto [_, __] = out_edges(in_node, ov); + std::for_each(_, __, [&](auto edge) { + auto t = target(edge, ov); + + if (!ov[edge].ori) { return; } + q.emplace(ov[edge].weight, t); + node_dists[t] = ov[edge].weight; + }); + + vector to_reset; + + while ((!q.empty())) { + auto [cur_dist, cur_node] = q.top(); q.pop(); + to_reset.push_back(cur_node); + + if (cur_dist > node_dists[cur_node]) { continue; } + + std::tie(_, __) = out_edges(cur_node, ov); + std::for_each(_, __, [&](auto edge) { + auto t = target(edge, ov); + + if (!ov[edge].ori) { return; } + DIST_UINT new_dist = ov[edge].weight + cur_dist + ov[cur_node].seqlen; + if (new_dist < node_dists[t]) { + q.emplace(new_dist, t); + node_dists[t] = new_dist; + } + }); + } + + for (int cur_node = 0; cur_node < static_cast(num_vertices(ov)); cur_node++) { + + DIST_UINT check_dist = binary_intersection_ch(labels_back[cur_node], labels[node]); + if (cur_node == node) { + check_dist = min(check_dist, labels_back[cur_node].back().dist + labels[node].back().dist); + } + if (check_dist != node_dists[cur_node]) { + cerr << "node " << cur_node << " mismatch: " << check_dist << ", actual: " << node_dists[cur_node] << endl; + } + } + + node_dists[node] = INF_INT; + for (auto n: to_reset) { node_dists[n] = INF_INT; } + + while (!q.empty()) { node_dists[get<1>(q.top())] = INF_INT; q.pop(); } +} + +void test_dijk_back(int node, CHOverlay& ov, vector& node_dists, vector>& labels, vector>& labels_back) { + auto in_node = node; + + std::priority_queue, vector>, greater>> q; + auto [_, __] = in_edges(in_node, ov); + std::for_each(_, __, [&](auto edge) { + auto s = source(edge, ov); + + if (!ov[edge].ori) { return; } + q.emplace(ov[edge].weight, s); + node_dists[s] = ov[edge].weight; + }); + + vector to_reset; + + while ((!q.empty())) { + auto [cur_dist, cur_node] = q.top(); q.pop(); + to_reset.push_back(cur_node); + + if (cur_dist > node_dists[cur_node]) { continue; } + + std::tie(_, __) = in_edges(cur_node, ov); + std::for_each(_, __, [&](auto edge) { + auto s = source(edge, ov); + //if (ov[t].new_id <= ov[node].new_id) { return; } + if (!ov[edge].ori) { return; } + DIST_UINT new_dist = ov[edge].weight + cur_dist + ov[cur_node].seqlen; + if (new_dist < node_dists[s]) { + q.emplace(new_dist, s); + node_dists[s] = new_dist; + } + }); + } + for (auto cur_node = 0u; cur_node < num_vertices(ov); cur_node++) { + DIST_UINT check_dist = binary_intersection_ch(labels[cur_node], labels_back[node]); + if (cur_node == node) { + check_dist = min(check_dist, labels[cur_node].back().dist + labels_back[node].back().dist); + } + if (check_dist != node_dists[cur_node]) { + cerr << "node " << cur_node << " mismatch: " << check_dist << ", actual: " << node_dists[cur_node] << endl; + } + } + + node_dists[node] = INF_INT; + for (auto n: to_reset) { node_dists[n] = INF_INT; } + + while (!q.empty()) { node_dists[get<1>(q.top())] = INF_INT; q.pop(); } +} + +void create_labels(vector>& labels, vector>& labels_back, CHOverlay& ov) { + vector node_dists(num_vertices(ov), INF_INT); + vector v; v.resize(num_vertices(ov)); + for (auto i = 0u; i < num_vertices(ov); i++) { + v[ov[i].new_id] = i; + } + + for (auto j = 0u; j < num_vertices(ov); j++) { + + if (j % 100 == 1) { cerr << j << "th node, " << v[j] << endl; } + //cerr << "starting dijkstra: " << endl; + down_dijk_back(v[j], ov, node_dists, labels, labels_back); + + //cerr << "starting rev dijkstra: " << endl; + down_dijk(v[j], ov, node_dists, labels, labels_back); + } +} + +/* +Puts hub labels in a flat vector form + +Structure: +- offsets are relative to start of flat vector +- extra offset in each of fwd and back offset sets at the end so that end of ranges can be found +-- subtracting the extra offset by the first offset of its set gets the distance to the corresponding dist of a hub +label count | start offsets (fwd) | start offsets (back) | fwd label hubs | fwd label dists | back label hubs | back label dists +*/ +vector pack_labels(const vector>& labels, const vector>& labels_back) { + auto label_count = labels.size(); + //label_count+1 is so we can look at the next offset to determine end of range + size_t flat_vec_size = 2*(label_count+1)+1; + vector flat_label_vec; + flat_label_vec.resize(flat_vec_size); + flat_label_vec[0] = label_count; + + //fill in fwd offsets + size_t current_offset = flat_vec_size; + for (size_t i = 0; i < labels.size(); i++) { + auto& label = labels[i]; + size_t label_size = label.size(); + flat_label_vec[i+1] = current_offset; + current_offset += label_size; + + flat_vec_size += (label_size*2); + } + flat_label_vec[label_count+1] = current_offset; + + //fill in back offsets + current_offset = flat_vec_size; + for (size_t i = 0; i < labels_back.size(); i++) { + auto& label = labels_back[i]; + size_t label_size = label.size(); + flat_label_vec[i+1+label_count+1] = current_offset; + current_offset += label_size; + + flat_vec_size += (label_size*2); + } + flat_label_vec[(label_count+1)+label_count+1] = current_offset; + + flat_label_vec.resize(flat_vec_size); + + //copy info into vec (fwd) + size_t jump_to_dist = flat_label_vec[label_count+1] - flat_label_vec[1]; + for (size_t i = 0; i < labels.size(); i++) { + auto& label = labels[i]; + size_t hub_offset = flat_label_vec[i+1]; + for (size_t j = 0; j < label.size(); j++) { + flat_label_vec[hub_offset+j] = label[j].hub; + flat_label_vec[hub_offset+j+jump_to_dist] = label[j].dist; + } + } + + //copy info into vec (back) + jump_to_dist = flat_label_vec[(label_count+1)+label_count+1] - flat_label_vec[label_count+1+1]; + for (size_t i = 0; i < labels_back.size(); i++) { + auto& label = labels_back[i]; + size_t hub_offset = flat_label_vec[1+label_count+1+i]; + for (size_t j = 0; j < label.size(); j++) { + flat_label_vec[hub_offset+j] = label[j].hub; + flat_label_vec[hub_offset+j+jump_to_dist] = label[j].dist; + } + } + + return flat_label_vec; +} + +void write_to_csv(CHOverlay& ov, string out_path) { + ofstream out(out_path); + out << "source,target" << endl; + auto [edge_start, edge_end] = edges(ov); + + std::for_each(edge_start, edge_end, [&](const auto& e) { + out << source(e, ov) << "," << target(e, ov) << endl; + }); +} + +void write_to_gr(CHOverlay& ov, string out_path) { + ofstream out(out_path); + stringstream out_sstr; + out_sstr << "p tww " << num_vertices(ov) << " " << num_edges(ov) << endl; + auto [edge_start, edge_end] = edges(ov); + + std::for_each(edge_start, edge_end, [&](const auto& e) { + out_sstr << source(e, ov)+1 << " " << target(e, ov)+1 << endl; + }); + out << out_sstr.str(); +} + +vector read_node_order(string in_path) { + vector ordered_nodes; + ifstream in(in_path); + string node_string; + while (getline(in, node_string)) { + ordered_nodes.push_back(stoi(node_string)); + } + + return ordered_nodes; +} +} diff --git a/bdsg/src/hublabel.cpp b/bdsg/src/hublabel.cpp new file mode 100644 index 00000000..60f19603 --- /dev/null +++ b/bdsg/src/hublabel.cpp @@ -0,0 +1,394 @@ +/* +hub labeling implementations + +based on the pruned landmark labeling approach by Akiba et al. (2013) +(https://dl.acm.org/doi/abs/10.1145/2463676.2465315) + +Code heavily references https://github.com/yb47438/An-Experimental-Study-on-Hub-Labeling-based-Shortest-Path-Algorithms/blame/7f43a91bcc47a3e74a52cf8fd034bd33a367920b/src/construction.h +*/ +#include "bdsg/hublabel.hpp" + +namespace bdsg { +bool notInf(int d) { + return d != INF_INT; +} + +void sorted_vec_insert(vector& v, NodeId item) { + auto ins_itr = upper_bound(v.begin(), v.end(), item); + v.insert(ins_itr, item); +} +void sorted_vec_insert(vector& v, HubRecord& item) { + auto ins_itr = lower_bound(v.begin(), v.end(), item); + if (ins_itr == v.end() || ins_itr->hub != item.hub) { + v.insert(ins_itr, item); + } +} + +bool pqcomp2(const QueueObj2& o1, const QueueObj2& o2) { + return get<0>(o1) > get<0>(o2); +}; + +bool pqcomp_hw(const QueueObj2& o1, const QueueObj2& o2) { + return get<0>(o1) > get<0>(o2); +}; + +void hub_insert(Label& label, NodesideId ns) { + NodeId hub = nodeside_to_node(ns); + if (nodeside_left(ns)) { + sorted_vec_insert(label.first.first, hub); + } else { + sorted_vec_insert(label.first.second, hub); + } +} +void hub_insert(HubRecsPair& label, NodesideId ns, DIST_UINT dist) { + NodeId hub = nodeside_to_node(ns); + HubRecord rec(hub, dist); + if (nodeside_left(ns)) { + sorted_vec_insert(label.first, rec); + } else { + sorted_vec_insert(label.second, rec); + } +} + +void hub_emplace(HubRecsPair& label, NodesideId ns, DIST_UINT dist) { + NodeId hub = nodeside_to_node(ns); + //HubRecord rec(hub, dist); + if (nodeside_left(ns)) { + label.first.emplace_back(hub, dist); + } else { + label.second.emplace_back(hub, dist); + } +} + +/* +assumes both input vectors are sorted least to greatest +*/ +void merge_intersection(vector& v1, vector& v2, vector& out) { + auto p1 = v1.begin(); auto p2 = v2.begin(); + while (p1 != v1.end() && p2 != v2.end()) { + if (*p1 < *p2) { + p1=next(p1); + } else { + if (*p1 > *p2) { + p2=next(p2); + } else { + out.push_back(*p1); + p1=next(p1); p2=next(p2); + } + } + } +} + +DIST_UINT merge_intersection_dist(vector& v1, vector& v2, bdsg::HashGraph& g) { + DIST_UINT min_dist = INF_INT; + auto p1 = v1.begin(); auto p2 = v2.begin(); + while (p1 != v1.end() && p2 != v2.end()) { + if (p1->hub < p2->hub) { + p1=next(p1); + } else { + if (p1->hub > p2->hub) { + p2=next(p2); + } else { + DIST_UINT d = p1->dist + g.get_length(g.get_handle(p1->hub,false)) + p2->dist; + min_dist = min(min_dist, d); + p1=next(p1); p2=next(p2); + } + } + } + + return min_dist; +} + +/* +assumes both input vectors are sorted least to greatest +*/ +void binary_intersection(vector& v1, vector& v2, vector& out) { + vector& key_vec = v1.size() < v2.size() ? v1 : v2; + vector& search_vec = v1.size() < v2.size() ? v2 : v1; + + auto start_itr = search_vec.begin(); + auto end_itr = search_vec.end(); + for (auto k: key_vec) { + start_itr = lower_bound(start_itr, end_itr, k); + if (*start_itr == k) { + out.push_back(k); + } else { + if (start_itr == end_itr) { + break; + } + } + } +} +DIST_UINT binary_intersection_dist(vector& v1, vector& v2, bdsg::HashGraph& g) { + vector& key_vec = v1.size() < v2.size() ? v1 : v2; + vector& search_vec = v1.size() < v2.size() ? v2 : v1; + + auto start_itr = search_vec.begin(); + auto end_itr = search_vec.end(); + DIST_UINT min_dist = INF_INT; + for (auto k: key_vec) { + start_itr = lower_bound(start_itr, end_itr, k); + if (start_itr == end_itr) { + return min_dist; + } + if (start_itr->hub == k.hub) { + DIST_UINT d = start_itr->dist + g.get_length(g.get_handle(k.hub,false)) + k.dist; + min_dist = min(min_dist, d); + } + } + return min_dist; +} +DIST_UINT binary_intersection_dist(vector& v1, vector& v2, vector& rank_node_lens) { + vector& key_vec = v1.size() < v2.size() ? v1 : v2; + vector& search_vec = v1.size() < v2.size() ? v2 : v1; + + auto start_itr = search_vec.begin(); + auto end_itr = search_vec.end(); + DIST_UINT min_dist = INF_INT; + for (auto k: key_vec) { + start_itr = lower_bound(start_itr, end_itr, k); + if (start_itr == end_itr) { + return min_dist; + } + if (start_itr->hub == k.hub) { + DIST_UINT d = start_itr->dist + rank_node_lens[k.hub] + k.dist; + min_dist = min(min_dist, d); + } + } + return min_dist; +} +DIST_UINT binary_intersection_dist(vector& v1, vector& v2) { + vector& key_vec = v1.size() < v2.size() ? v1 : v2; + vector& search_vec = v1.size() < v2.size() ? v2 : v1; + + auto start_itr = search_vec.begin(); + auto end_itr = search_vec.end(); + DIST_UINT min_dist = INF_INT; + for (auto k: key_vec) { + start_itr = lower_bound(start_itr, end_itr, k); + if (start_itr == end_itr) { + return min_dist; + } + if (start_itr->hub == k.hub) { + DIST_UINT d = start_itr->dist + k.dist; + min_dist = min(min_dist, d); + } + } + return min_dist; +} + +vector hub_vec_intersect(vector& v1, vector& v2) { + vector shared_nodes; + binary_intersection(v1, v2, shared_nodes); + return shared_nodes; +} + +DIST_UINT hl_build_intersect(HubRecsPair& l1, HubRecsPair& l2, bdsg::HashGraph& g) { + DIST_UINT min1 = binary_intersection_dist(l1.first, l2.second, g); + DIST_UINT min2 = binary_intersection_dist(l1.second, l2.first, g); + + return min(min1, min2); +} + +DIST_UINT hl_build_intersect(HubRecsPair& l1, HubRecsPair& l2, vector& rank_node_lens) { + DIST_UINT min1 = binary_intersection_dist(l1.first, l2.second, rank_node_lens); + DIST_UINT min2 = binary_intersection_dist(l1.second, l2.first, rank_node_lens); + + return min(min1, min2); +} + +DIST_UINT hl_build_intersect(HubRecsPair& l1, HubRecsPair& l2) { + DIST_UINT min1 = binary_intersection_dist(l1.first, l2.second); + DIST_UINT min2 = binary_intersection_dist(l1.second, l2.first); + + return min(min1, min2); +} + +bool label_contains(HubRecsPair& label, NodesideId query) { + NodeId query_node = nodeside_to_node(query); + if (nodeside_left(query)) { + return binary_search(label.first.begin(), label.first.end(), query_node); + } else { + return binary_search(label.second.begin(), label.second.end(), query_node); + } +} + +DIST_UINT get_label_dist(HubRecsPair& label, NodesideId query) { + NodeId query_node = nodeside_to_node(query); + if (nodeside_left(query)) { + return (*lower_bound(label.first.begin(), label.first.end(), query_node)).dist; + } else { + return (*lower_bound(label.second.begin(), label.second.end(), query_node)).dist; + } +} + +void set_label_dist(HubRecsPair& label, NodesideId query, DIST_UINT new_dist) { + NodeId query_node = nodeside_to_node(query); + if (nodeside_left(query)) { + (*lower_bound(label.first.begin(), label.first.end(), query_node)).dist = new_dist; + } else { + (*lower_bound(label.second.begin(), label.second.end(), query_node)).dist = new_dist; + } +} + +bool prune_check(HubRecsPair& label, vector& rank_node_lens, vector& rank_dists, vector& rank_dists2, const DIST_UINT& cur_dist) { + bool prune = false; + for (auto& rec: label.first) { + if (rank_dists2[rec.hub] == INF_INT) { continue; } + const DIST_UINT hl_dist = rec.dist + rank_node_lens[rec.hub] + rank_dists2[rec.hub]; + if (hl_dist <= cur_dist) { return true; } + } + for (auto& rec: label.second) { + if (rank_dists[rec.hub] == INF_INT) { continue; } + const DIST_UINT hl_dist = rec.dist + rank_node_lens[rec.hub] + rank_dists[rec.hub]; + if (hl_dist <= cur_dist) { return true; } + } + return prune; +} + +void pruned_dijkstra(bdsg::HashGraph& g, NodesideId start, vector& labels, vector& path_lengths, vector& rank_to_ns, vector& ns_to_rank, vector& rank_node_lens, int stop_dist) { + auto node_count = g.get_node_count(); + DIST_UINT nodeside_count = node_count*2; + vector parent(nodeside_count, INF_INT); + + const auto rank_start = ns_to_rank[start]; + bool self_loop = false; + + vector rank_dists(node_count+1, INF_INT); + vector rank_dists2(node_count+1, INF_INT); + for (auto& rec: labels[ns_to_rank[start]].first) { + rank_dists[rec.hub] = rec.dist; + } + for (auto& rec: labels[ns_to_rank[start]].second) { + rank_dists2[rec.hub] = rec.dist; + } + + //place initial OTHER_NODE nodesides + PriorityQueue2 nodeside_queue{pqcomp2}; + handle_t start_handle = g.get_handle(nodeside_to_node(start), nodeside_left(start)); + g.follow_edges(start_handle, false, [&](const handle_t& new_h){ + NodesideId new_nodeside = handle_entry_nodeside(new_h, g); + path_lengths[new_nodeside] = 0; + nodeside_queue.emplace(0, new_nodeside); + }); + + auto not_stop_cond = [stop_dist] (const QueueObj2& top_item) { + bool not_stop_dist = (get<0>(top_item) <= stop_dist) && (get<0>(top_item) != INF_INT); + return not_stop_dist; + }; + NodesideId last_ns = start; + while ((!nodeside_queue.empty()) && not_stop_cond(nodeside_queue.top())) { + const QueueObj2 item = nodeside_queue.top(); nodeside_queue.pop(); + const DIST_UINT cur_dist = get<0>(item); + const NodesideId cur_nodeside = get<1>(item); + + //gotta have this since priorities of C++ priority queue elements can't be updated + //all nodesides on queue are reached through OTHER_NODE direction + if (cur_dist != path_lengths[cur_nodeside]){ + continue; + } + last_ns = cur_nodeside; + if (cur_nodeside != start) [[likely]] { + //check if we can prune here + const auto rank_cur_ns = ns_to_rank[cur_nodeside]; + DIST_UINT label_dist = label_contains(labels[rank_cur_ns], rank_start) ? get_label_dist(labels[rank_cur_ns], rank_start) : INF_INT; + if (label_dist <= cur_dist) { + continue; + } + + if (prune_check(labels[rank_cur_ns], rank_node_lens, rank_dists, rank_dists2, cur_dist)) { continue; } + + hub_emplace(labels[rank_cur_ns], rank_start, cur_dist); + } + + const NodeId cur_node = nodeside_to_node(cur_nodeside); + //cross "edge" to other nodeside + const handle_t cur_handle = g.get_handle(cur_node, !nodeside_left(cur_nodeside)); + const DIST_UINT cur_handle_len = g.get_length(cur_handle); + const DIST_UINT new_dist = cur_dist + cur_handle_len; + + //now find the new nodesides reached through the OTHER_NODE direction + g.follow_edges(cur_handle, false, [&](const handle_t& new_h){ + const NodesideId new_ns = handle_entry_nodeside(new_h, g); + if (new_dist < path_lengths[new_ns]) { + path_lengths[new_ns] = new_dist; + nodeside_queue.emplace(new_dist, new_ns); + parent[new_ns] = cur_nodeside; + } + }); + } + +} + + + +DIST_UINT hl_query(NodesideId i, NodesideId j, vector& labels, bdsg::HashGraph& g) { + if (i == j) { return 0; } + return label_contains(labels[i], j) ? get_label_dist(labels[i], j) : hl_build_intersect(labels[i], labels[j], g); +} +DIST_UINT hl_query(NodesideId i, NodesideId j, vector& labels, vector& rank_node_lens) { + if (i == j) { return 0; } + NodesideId important = min(i, j); + NodesideId less_important = max(i, j); + + return label_contains(labels[less_important], important) ? get_label_dist(labels[less_important], important) : hl_build_intersect(labels[less_important], labels[important], rank_node_lens); +} +DIST_UINT hl_query2(NodesideId i, NodesideId j, vector& labels) { + if (i == j) { return 0; } + NodesideId important = min(i, j); + NodesideId less_important = max(i, j); + + return label_contains(labels[less_important], important) ? get_label_dist(labels[less_important], important) : hl_build_intersect(labels[less_important], labels[important]); +} + +DIST_UINT get_degree(NodeId node, bdsg::HashGraph& g) { + const handle_t h = g.get_handle(node, false); + return g.get_degree(h, true) * g.get_degree(h, false); +} + +vector make_labels(bdsg::HashGraph& g) { + int node_count = g.get_node_count(); + int nodeside_count = g.get_node_count()*2; + + vector labels; labels.resize(nodeside_count); + + auto node_ordering_view = ranges::iota_view(1,node_count+1); + vector node_ordering(node_ordering_view.begin(), node_ordering_view.end()); + sort(node_ordering.begin(), node_ordering.end(), [&](NodeId n1, NodeId n2) { + return get_degree(n1, g) > get_degree(n2, g); + }); + + vector rank_node_lens(node_count+1, 0); + + vector ns_ordering(nodeside_count, 0); + + vector rank(nodeside_count, 0); + for (size_t i = 0; i < node_ordering.size(); i++) { + const NodeId node = node_ordering[i]; + const auto nodesides = get_node_nodesides(node); + rank[nodesides[0]] = i*2; + rank[nodesides[1]] = i*2+1; + ns_ordering[i*2] = nodesides[0]; + ns_ordering[i*2+1] = nodesides[1]; + rank_node_lens[i+1] = g.get_length(g.get_handle(node, false)); + } + + vector path_lengths; + //for (int i = 0; i < 2; i++) { + path_lengths = vector(nodeside_count, INF_INT); + + + for (size_t i = 0; i < node_ordering.size(); i++) { + NodeId node = node_ordering[i]; + const auto nodesides = get_node_nodesides(node); + for (NodesideId ns: nodesides) { + pruned_dijkstra(g, ns, labels, path_lengths, ns_ordering, rank, rank_node_lens); + //for (int i = 0; i < 2; i++) { + std::fill(path_lengths.begin(), path_lengths.end(), INF_INT); + + } + } + return labels; +} + +} diff --git a/bdsg/src/landmark.cpp b/bdsg/src/landmark.cpp new file mode 100644 index 00000000..ed1be9eb --- /dev/null +++ b/bdsg/src/landmark.cpp @@ -0,0 +1,501 @@ +#include "bdsg/landmark.hpp" + +using namespace std; +namespace bdsg { + +int addInt(int a, int b) { + if (a == INF_INT || b == INF_INT) { + return INF_INT; + } + return a+b; +} + +bool pqcomp1(const QueueObj& o1, const QueueObj& o2) { + return get<0>(o1) > get<0>(o2); +}; + +/* +following functions assume input graph node ids go from 1...N +*/ + +/* +Converts a node_id to a nodeside_id +*/ +//using bool for indicating side just like for direction in follow_edges +NodesideId node_to_nodeside(NodeId node_id, bool left) { + NodesideId ns_id = (node_id-1)*2; + + if (!left) { + ns_id += 1; + } + + return ns_id; +} + +array get_node_nodesides(NodeId node_id) { + NodesideId ns_id = (node_id-1)*2; + array both = {ns_id, ns_id+1}; + return both; +} + +NodeId nodeside_to_node(NodesideId nodeside_id) { + return nodeside_id / 2 + 1; +} + +tuple nodeside_to_node_tuple(NodesideId nodeside_id) { + tuple t = {nodeside_id / 2 + 1, (nodeside_id%2==0)}; + return t; +} + +bool nodeside_left (NodesideId nodeside_id) { + return nodeside_id%2==0; +} + +NodesideId other_nodeside(int nodeside_id) { + return nodeside_id ^ 1; +} + +NodesideId handle_entry_nodeside(const handle_t& handle, HashGraph& g) { + return node_to_nodeside(g.get_id(handle), !g.get_is_reverse(handle)); +} + +string nodeside_string(NodesideId nodeside) { + bool is_left = nodeside_left(nodeside); + string dir = is_left ? "left" : "right"; + NodeId node = nodeside_to_node(nodeside); + + stringstream stream; + stream << node << " (" << dir << ")"; + return stream.str(); +} + +/// landmark selection + +/* +returns ids of nodesides with largest degree +*/ +//helper +int nodeside_degree(NodesideId ns_id, HashGraph& g) { + tuple t = nodeside_to_node_tuple(ns_id); + NodeId node_id = get<0>(t); + bool is_left = get<1>(t); + handle_t node_handle = g.get_handle(node_id); + return g.get_degree(node_handle, is_left); +} + +vector get_highest_degree_nodesides(bdsg::HashGraph& g, int top_num, int nodeside_count) { + vector ns_degrees; ns_degrees.resize(nodeside_count); + vector ns; ns.resize(nodeside_count); + for (int i = 0; i < nodeside_count; i++) { + ns_degrees[i]=nodeside_degree(i, g); + ns[i] = i; + } + + std::sort(ns.begin(), ns.end(), [&ns_degrees](NodesideId n1, NodesideId n2){ + return ns_degrees[n1] > ns_degrees[n2]; + }); + + auto top_span = span(ns).subspan(0, top_num); + vector top_deg_ns(top_span.begin(), top_span.end()); + return top_deg_ns; +} + +vector get_landmark_nodes(bdsg::HashGraph& g, int top_ns_num, int nodeside_count) { + vector highest_deg_ns = get_highest_degree_nodesides(g, top_ns_num, nodeside_count); + vector possible_lm; possible_lm.resize(top_ns_num); + ranges::transform(highest_deg_ns.begin(), highest_deg_ns.end(), possible_lm.begin(), nodeside_to_node); + sort(possible_lm.begin(),possible_lm.end()); + auto new_lm_end_it = unique(possible_lm.begin(), possible_lm.end()); + possible_lm.resize(distance(possible_lm.begin(), new_lm_end_it)); + + return possible_lm; +} + +/* +Notes: +- modifies the table passed in +*/ +void fill_other_nodeside_dists(SdslArray2D& dist_table, HashGraph& g) { + auto table_shape = dist_table.shape(); + for (DIST_UINT i = 0; i < table_shape[0]; i++) { + for (DIST_UINT ns = 0; ns < table_shape[1]; ns++) { + if (dist_table.get(i,ns) != INF_INT) { + NodesideId other_ns = other_nodeside(ns); + NodeId node = nodeside_to_node(ns); + dist_table.set(i, other_ns, min(addInt(dist_table.get(i,ns), g.get_length(g.get_handle(node))), static_cast(dist_table.get(i,other_ns)))); + } + } + } +} + +/* +trying insert-Dijkstra over decrease-key dijkstra +see https://stackoverflow.com/questions/9255620/why-does-dijkstras-algorithm-use-decrease-key + +start is the id of the origin node +dijkstra goes from start node to other nodes' nodesides +*/ + +PriorityQueue initQueue( + int nodeside_count, + NodesideId start_ns +) { + vector init_objs; init_objs.resize(2); + /* + for (int i = 0; i < nodeside_count; i++) { + init_objs[i] = make_tuple(INF_INT, OTHER_NODESIDE, i); + init_objs[i+nodeside_count] = make_tuple(INF_INT, OTHER_NODE, i); + }*/ + + //NodesideId start_node_other_ns = other_nodeside(start_ns); + //treating start_ns as arrived to from OTHER_NODESIDE so exit is to another node + //don't exit out other direction, that's covered by the other nodeside of the node + init_objs[0] = make_tuple(0, OTHER_NODESIDE, start_ns); + //dummy queue obj + init_objs[1] = make_tuple(INF_INT, OTHER_NODE, start_ns); + /* + for (QueueObj qo: init_objs) { + auto [a,b,c] = qo; + nodeside_queue.push(qo); + } */ + PriorityQueue nodeside_queue{pqcomp1, move(init_objs)}; + return nodeside_queue; +} + +/* +Called when new nodeside is discovered. +If path to the nodeside is the shortest found so far: + update path_lengths and add new queue entry for it +*/ +bool discover_nodeside(NodesideId new_nodeside, EnterDir enter_direction, int new_len, vector>& path_lengths, PriorityQueue& nodeside_queue) { + + if (new_len < path_lengths[enter_direction][new_nodeside]) { + nodeside_queue.push({new_len, enter_direction, new_nodeside}); + path_lengths[enter_direction][new_nodeside] = new_len; + } + return true; +}; + +//arguments after second are optional +SdslArray2D dijkstra(bdsg::HashGraph& g, NodesideId start, NodesideId stop_ns, int stop_dist, bool ball_ver) { + //code in this function based off https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm#Pseudocode and its subsections + int nodeside_count = g.get_node_count()*2; + + #define LEFT_PATH true + #define RIGHT_PATH false + + NodesideId start_ns = start; + //handle_t start_node_handle = g.get_handle(nodeside_to_node(start_ns)); + + + int inf = INF_INT; + + /*auto is_start = [start_ns](NodesideId ns) { + return (ns == start_ns); + }; */ + + //Need to see each nodeside twice; need to keep track of two distances, one when entering from other nodeside + //the other when entering from a completely different node + //variables called enter_direction keep track of direction of entry + PriorityQueue nodeside_queue = initQueue(nodeside_count, start_ns); + + //matrix to store minimum distances found so far + vector> path_lengths(2); + for (auto& v: path_lengths) { + v.resize(nodeside_count); + fill(v.begin(), v.end(), inf); + } + + path_lengths[OTHER_NODESIDE][start_ns] = 0; + + vector> which_path(2); + + //keep track of which nodesides have self-loops + vector self_loop; self_loop.resize(nodeside_count); + fill(self_loop.begin(), self_loop.end(), false); + + + //Stop conditions: + // - empty queue + // - stop_ns visited from the OTHER_NODE direction + // - stop_dist reached or exceeded by next queue item + auto not_stop_cond = [stop_ns,stop_dist] (const QueueObj& top_item) { + bool not_stop_ns = !(get<1>(top_item) == OTHER_NODE && get<2>(top_item) == stop_ns); + bool not_stop_dist = (get<0>(top_item) <= stop_dist) && (get<0>(top_item) != INF_INT); + return not_stop_ns && not_stop_dist; + }; + while ((!nodeside_queue.empty()) && not_stop_cond(nodeside_queue.top())) { + QueueObj item = nodeside_queue.top(); nodeside_queue.pop(); + int cur_dist = get<0>(item); + EnterDir enter_direction = get<1>(item); + NodesideId cur_nodeside = get<2>(item); + + //gotta have this since priorities of C++ priority queue elements can't be updated + if (cur_dist != path_lengths[enter_direction][cur_nodeside]){ + continue; + } + + NodeId node_id = nodeside_to_node(cur_nodeside); + + + + if (enter_direction == OTHER_NODESIDE) { + //got here from the other nodeside of cur_nodeside's node + handle_t handle = g.get_handle(node_id, nodeside_left(cur_nodeside)); + + g.follow_edges(handle, false, [&g,cur_nodeside,&handle,cur_dist,&self_loop,&path_lengths,&nodeside_queue](const handle_t& new_h){ + NodesideId new_nodeside = handle_entry_nodeside(new_h,g); + if (new_nodeside == cur_nodeside) { + self_loop[new_nodeside] = true; + } + + return discover_nodeside( + new_nodeside, OTHER_NODE, + cur_dist, + path_lengths, nodeside_queue + ); + }); + + } + else { + //came here from a nodeside of a node that =/= cur_nodeside's node + handle_t handle = g.get_handle(node_id, !nodeside_left(cur_nodeside)); + int handle_len = g.get_length(handle); + + NodesideId other_ns = other_nodeside(cur_nodeside); + //handle_t cur_handle = self_loop[cur_nodeside] ? g.flip(handle):handle; + if (other_ns == cur_nodeside) { + self_loop[other_ns] = true; + } + discover_nodeside( + other_ns, OTHER_NODESIDE, + cur_dist == INF_INT ? INF_INT : cur_dist+handle_len, + path_lengths, nodeside_queue + ); + } + + } + + SdslArray2D res(1, nodeside_count); + for (int i = 0; i < nodeside_count; i++) { + res.set(0, i, path_lengths[OTHER_NODE][i]); + } + + if (ball_ver) { + //fill in OTHER_NODESIDE dists for balls + fill_other_nodeside_dists(res, g); + } + return res; +} + +/* +dijkstra edge cases +*/ + +/* +notes: + - lm_nodes should not be empty +*/ +SdslArray2D get_lm2ns_dist_table(vector& lm_nodes, HashGraph& g) { + size_t nodeside_count = g.get_node_count()*2; + //rows: one row for each landmark nodeside (two rows per landmark) + //cols: one column for each nodeside of the graph + SdslArray2D table(lm_nodes.size()*2, nodeside_count); + + for (size_t i = 0; i < lm_nodes.size(); i++) { + NodeId lm_node = lm_nodes[i]; + array lm_nodesides = get_node_nodesides(lm_node); + + for (size_t ns_i = 0; ns_i < lm_nodesides.size(); ns_i++) { + SdslArray2D dists = dijkstra(g, lm_nodesides[ns_i]); + + size_t row = i*2+ns_i; + for (size_t col = 0; col < nodeside_count; col++) { + table.set(row, col, dists.get(0,col)); + } + } + } + + return table; +} + +void get_closest_lm_ind(SdslArray2D& dist_table, vector& ind_vec, vector& min_dist_vec, int nodeside_count) { + //typedef boost::multi_array_types::index_range index_range; + ind_vec.resize(nodeside_count); + min_dist_vec.resize(nodeside_count); + for (int ns = 0; ns < nodeside_count; ns++) { + //Array2D::index_gen ind_gen; + //Array2D::array_view<1>::type ns_col_view = dist_table[ind_gen[index_range(0,dist_table.shape()[0])][ns]]; + auto ns_col_view = dist_table.col_view(ns); + auto min_itr = min_element(ns_col_view.begin(), ns_col_view.end()); + ind_vec[ns] = distance(ns_col_view.begin(), min_itr) / 2; + int min_dist = *min_itr; + min_dist_vec[ns] = min_dist; + } +} +/* +TEST_CASE("simple get_closest_lm_ind test","") { + HashGraph gt; + handle_t h = gt.create_handle("A"); + handle_t h2 = gt.create_handle("AC"); + handle_t h3 = gt.create_handle("ACG"); + handle_t h4 = gt.create_handle("ACGT"); + handle_t h5 = gt.create_handle("ACGTA"); + handle_t h6 = gt.create_handle("ACGTAC"); + + gt.create_edge(h,h2); gt.create_edge(h,h3); + gt.create_edge(h2,h4); gt.create_edge(h3,h5); + gt.create_edge(h4,h6); gt.create_edge(h5,h6); + gt.create_edge(h,h6); + + int nodeside_count = gt.get_node_count()*2; + + NodeId l1 = gt.get_id(h); + NodeId l2 = gt.get_id(h6); + vector lm_nodes = {l1, l2}; + Array2D dist_table = get_lm2ns_dist_table(lm_nodes, gt); + + vector closest_lm_ind_vec; closest_lm_ind_vec.resize(nodeside_count); + vector closest_lm_dist_vec; closest_lm_dist_vec.resize(nodeside_count); + get_closest_lm_ind(dist_table, closest_lm_ind_vec, closest_lm_dist_vec, nodeside_count); + + vector ind_ans_key = {0,1,0,1,0,1,0,1,0,1,0,0}; + for (int i = 0; i dist_ans_key = {INF_INT,0,0,4,0,5,2,0,3,0,0,INF_INT}; + for (int i = 0; i find_balls(vector& closest_lm_dist_vec, HashGraph& g, int min_ball_size) { + int nodeside_count = g.get_node_count()*2; + vector balls; balls.resize(nodeside_count); + for (int i = 0; i < nodeside_count; i++) { + int dist_limit = max(min_ball_size,closest_lm_dist_vec[i]); + balls[i] = get_ball_contents(i, dist_limit, g); + } + return balls; +} + +/* +query algorithm from Chen et al. (2009) +with modifications for our purpose +*/ + +int oracle_query(NodesideId source, NodesideId target, OracleInfo& oracle, HashGraph& g) { + #define ball_list oracle.balls + #define lm_node_vec oracle.lm_nodes + #define closest_lm_ind oracle.closest_lm_inds + #define lm_dist oracle.closest_lm_dists + #define lm_to_ns oracle.lm_sides_to_ns + + if (ball_list[source].contains(target)) { + return ball_list[source][target]; + } + + if (ball_list[target].contains(source)) { + return ball_list[target][source]; + } + + auto get_guess = [&](NodesideId ns) { + //typedef boost::multi_array_types::index_range index_range; + + NodeId lm = lm_node_vec[closest_lm_ind[ns]]; + //auto lm_nodesides = get_node_nodesides(lm); + int lm_length = g.get_length(g.get_handle(lm)); + int closest_lm_ns_ind = closest_lm_ind[ns]*2; + + //Array2D::index_gen ind_gen; + + //2 x nodeside_count table + //each row is a nodeside of the landmark + //col = nodeside id + //auto lm_table = lm_to_ns[ind_gen[index_range(closest_lm_ns_ind, closest_lm_ns_ind+2)][index_range()]]; + + //generate all possible distances + vector s; + for (int a: {lm_to_ns.get(closest_lm_ns_ind, source), lm_to_ns.get(closest_lm_ns_ind+1, source)}) { + for (int b: {lm_to_ns.get(closest_lm_ns_ind, target), lm_to_ns.get(closest_lm_ns_ind+1, target)}) { + int pos_dist=addInt(a,b); + s.push_back(pos_dist); + } + } + + //in case landmark has a self-loop + s[0] = addInt(s[0], addInt(lm_to_ns.get(closest_lm_ns_ind+1,node_to_nodeside(lm,false)), lm_length)); + s[3] = addInt(s[3], addInt(lm_to_ns.get(closest_lm_ns_ind, node_to_nodeside(lm,true)), lm_length)); + + int min_dist = addInt(*min_element(s.begin(), s.end()), lm_length); + + return min_dist; + }; + + return min(get_guess(target), get_guess(source)); +} + + +void printMArray(Array2D& arr) { + auto sh = arr.shape(); + int num_rows = sh[0]; int num_cols = sh[1]; + for (int i = 0; i < num_rows; i++) { + for (int j = 0; j < num_cols; j++) { + if (arr[i][j] == INF_INT) { + cerr << "inf "; + } else { + cerr << arr[i][j] << " "; + } + } + cerr << endl; + } +} + + +} + diff --git a/bdsg/src/snarl_distance_index.cpp b/bdsg/src/snarl_distance_index.cpp index 0d419ef4..aa983fb1 100644 --- a/bdsg/src/snarl_distance_index.cpp +++ b/bdsg/src/snarl_distance_index.cpp @@ -4,6 +4,7 @@ //#define debug_distance_paths #include "bdsg/snarl_distance_index.hpp" +#include "bdsg/ch.hpp" #include #include @@ -100,9 +101,13 @@ size_t SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::get_max return SimpleSnarlRecord::record_size(node_count, include_distances); } else { if (parent.first == TEMP_ROOT) { - return SnarlRecord::record_size(include_distances ? DISTANCED_ROOT_SNARL : ROOT_SNARL, node_count) + node_count; + //TODO: Why is node_count being added? + return SnarlRecord::record_size(include_distances ? DISTANCED_ROOT_SNARL : ROOT_SNARL, node_count, 0) + node_count; + } else if (!(hub_labels.empty())) { + return SnarlRecord::record_size(OVERSIZED_SNARL, node_count, hub_labels.size()); } else { - return SnarlRecord::record_size(include_distances ? DISTANCED_SNARL : SNARL, node_count) + node_count; + //TODO: Why is node_count being added? + return SnarlRecord::record_size(include_distances ? DISTANCED_SNARL : SNARL, node_count, 0) + node_count; } } } @@ -1298,41 +1303,13 @@ size_t SnarlDistanceIndex::distance_in_parent(const net_handle_t& parent, if (get_record_type(snarl_tree_records->at(get_record_offset(parent))) == DISTANCED_SIMPLE_SNARL) { return SimpleSnarlRecord(parent, &snarl_tree_records).get_distance(rank1, rev1, rank2, rev2); - } else if (get_record_type(snarl_tree_records->at(get_record_offset(parent))) == OVERSIZED_SNARL - && !(rank1 == 0 || rank1 == 1 || rank2 == 0 || rank2 == 1) ) { - //If this is an oversized snarl and we're looking for internal distances, then we didn't store the - //distance and we have to find it using dijkstra's algorithm - if (graph == nullptr) { - if (size_limit_warnings.load() < max_num_size_limit_warnings) { - int warning_num = const_cast(this)->size_limit_warnings++; - if (warning_num < max_num_size_limit_warnings) { - std::string msg = "warning: trying to find the distance in an oversized snarl without a graph. Returning inf\n"; - if (warning_num + 1 == max_num_size_limit_warnings) { - msg += "suppressing further warnings\n"; - } - std::cerr << msg; - } - } - return std::numeric_limits::max(); - } - handle_t handle1 = is_node(child1) ? get_handle(child1, graph) : get_handle(get_bound(child1, !child_ends_at_start1, false), graph); - handle_t handle2 = is_node(child2) ? get_handle(child2, graph) : get_handle(get_bound(child2, !child_ends_at_start2, false), graph); - handle2 = graph->flip(handle2); - - size_t distance = std::numeric_limits::max(); - handlegraph::algorithms::dijkstra(graph, handle1, [&](const handle_t& reached, size_t dist) { - if (reached == handle2) { - distance = dist; - return false; - } else if (dist > distance_limit) { - distance = std::numeric_limits::max(); - return false; - } - return true; - }, false); - return distance; - + } else if (get_record_type(snarl_tree_records->at(get_record_offset(parent))) == OVERSIZED_SNARL) { + size_t distance = hhl_query(rank1, rank2, [&] (size_t data_offset) { + //+ 1 is for skipping over vec_size + return snarl_tree_records->at(get_record_offset(parent) + SNARL_RECORD_SIZE + 1 + data_offset); + }); + return distance; } else if (rank1 == 0 && rank2 == 0 && !snarl_is_root) { //Start to start is stored in the snarl return SnarlRecord(parent, &snarl_tree_records).get_distance_start_start(); @@ -4186,12 +4163,18 @@ size_t SnarlDistanceIndex::SnarlRecord::distance_vector_size(record_t type, size } } -size_t SnarlDistanceIndex::SnarlRecord::record_size (record_t type, size_t node_count) { - return SNARL_RECORD_SIZE + distance_vector_size(type, node_count); +size_t SnarlDistanceIndex::SnarlRecord::record_size (record_t type, size_t node_count, size_t vec_size) { + if (type == OVERSIZED_SNARL) { + return SNARL_RECORD_SIZE + vec_size; + } else { + return SNARL_RECORD_SIZE + distance_vector_size(type, node_count); + } } size_t SnarlDistanceIndex::SnarlRecord::record_size() { - record_t type = get_record_type(); - return record_size(type, get_node_count()); + record_t type = get_record_type(); + //vec_size only for oversized snarls + size_t vec_size = (*records)->at(record_offset + SNARL_RECORD_SIZE); + return record_size(type, get_node_count(), vec_size); } size_t SnarlDistanceIndex::SnarlRecord::get_distance_start_start() const { @@ -4209,7 +4192,7 @@ size_t SnarlDistanceIndex::SnarlRecord::get_distance_end_end() const { return stored_value == 0 ? std::numeric_limits::max() : stored_value - 1; } -SnarlDistanceIndex::SnarlRecordWriter::SnarlRecordWriter (size_t node_count, bdsg::yomo::UniqueMappedPointer* records, record_t type){ +SnarlDistanceIndex::SnarlRecordWriter::SnarlRecordWriter (size_t node_count, bdsg::yomo::UniqueMappedPointer* records, record_t type, size_t vec_size){ //Constructor for making a new record, including allocating memory. //Assumes that this is the latest record being made, so pointer will be the end of //the array and we need to allocate extra memory past it @@ -4221,16 +4204,21 @@ SnarlDistanceIndex::SnarlRecordWriter::SnarlRecordWriter (size_t node_count, bds SnarlRecord::record_offset = (*records)->size(); SnarlRecord::records = records; - size_t extra_size = record_size(type, node_count); + //vec_size only used for oversized snarls + size_t extra_size = record_size(type, node_count, vec_size); #ifdef debug_distance_indexing if (type == OVERSIZED_SNARL) { cerr << "oversized" << endl; } cerr << " Resizing array to add snarl: length " << (*records)->size() << " -> " << (*records)->size() + extra_size << endl; -#endif +#endif (*records)->resize((*records)->size() + extra_size); set_node_count(node_count); set_record_type(type); + + if (type == OVERSIZED_SNARL) { + set_vec_size(vec_size); + } #ifdef count_allocations cerr << "new_snarl\t" << extra_size << "\t" << (*records)->size() << endl; @@ -4377,6 +4365,19 @@ void SnarlDistanceIndex::SnarlRecordWriter::set_node_count(size_t node_count) { (*records)->at(record_offset + SNARL_NODE_COUNT_OFFSET) = node_count; } +/* +set size of hub labels vector (hub_labels) +*/ +void SnarlDistanceIndex::SnarlRecordWriter::set_vec_size(size_t vec_size) { +#ifdef debug_distance_indexing + cerr << record_offset + SNARL_NODE_COUNT_OFFSET << " set vec_size " << vec_size << endl; + assert(vec_size > 0); + assert((*records)->at(record_offset + SNARL_NODE_COUNT_OFFSET) == 0); +#endif + + (*records)->at(record_offset + SNARL_NODE_COUNT_OFFSET) = vec_size; +} + size_t SnarlDistanceIndex::SnarlRecord::get_child_record_pointer() const { return (*records)->at(record_offset+SNARL_CHILD_RECORD_OFFSET) ; } @@ -5538,9 +5539,9 @@ void SnarlDistanceIndex::ChainRecordWriter::set_distance_right_end(size_t distan } //Add a snarl to the end of the chain and return a SnarlRecordWriter pointing to it -SnarlDistanceIndex::SnarlRecordWriter SnarlDistanceIndex::ChainRecordWriter::add_snarl(size_t snarl_size, record_t type, size_t previous_child_offset) { +SnarlDistanceIndex::SnarlRecordWriter SnarlDistanceIndex::ChainRecordWriter::add_snarl(size_t snarl_size, record_t type, size_t vec_size, size_t previous_child_offset) { - size_t snarl_record_size = SnarlRecord::record_size(type, snarl_size); + size_t snarl_record_size = SnarlRecord::record_size(type, snarl_size, vec_size); #ifdef debug_distance_indexing cerr << (*records)->size() << " Adding child snarl length to the end of the array " << endl; cerr << "Previous child was at " << previous_child_offset << endl; @@ -5558,7 +5559,7 @@ SnarlDistanceIndex::SnarlRecordWriter SnarlDistanceIndex::ChainRecordWriter::add (*records)->resize(start_i+1); (*records)->at(start_i) = snarl_record_size; (*records)->reserve(start_i + snarl_record_size); - SnarlRecordWriter snarl_record(snarl_size, records, type); + SnarlRecordWriter snarl_record(snarl_size, records, type, vec_size); snarl_record.set_parent_record_offset(get_offset()); #ifdef debug_distance_indexing cerr << (*records)->size() << " Adding child snarl length to the end of the array " << endl; @@ -6363,8 +6364,9 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vectortemp_snarl_records[current_record_index.second]; record_to_offset.emplace(make_pair(temp_index_i,current_record_index), snarl_tree_records->size()); - SnarlRecordWriter snarl_record_constructor (temp_snarl_record.node_count, &snarl_tree_records, record_type); + SnarlRecordWriter snarl_record_constructor (temp_snarl_record.node_count, &snarl_tree_records, record_type, temp_snarl_record.hub_labels.size()); //Fill in snarl info snarl_record_constructor.set_parent_record_offset(0); diff --git a/bdsg/src/test_libbdsg.cpp b/bdsg/src/test_libbdsg.cpp index 0d94c9e3..81a3de6d 100644 --- a/bdsg/src/test_libbdsg.cpp +++ b/bdsg/src/test_libbdsg.cpp @@ -32,7 +32,7 @@ #include "bdsg/overlays/vectorizable_overlays.hpp" #include "bdsg/overlays/packed_subgraph_overlay.hpp" #include "bdsg/overlays/reference_path_overlay.hpp" - +#include "bdsg/ch.hpp" using namespace bdsg; using namespace handlegraph; @@ -4639,6 +4639,37 @@ void test_hash_graph() { cerr << "HashGraph tests successful!" << endl; } +void test_hub_labeling() { + HashGraph test_g; + vector handles; handles.resize(3); + for (auto n: {0,1,2}) { + handles[n] = test_g.create_handle("A"); + } + test_g.create_edge(handles[0], handles[1]); + test_g.create_edge(handles[1], handles[2]); + + //test HashGraph -> Boost graph + CHOverlay bg = make_boost_graph(test_g); + + vector> labels_fwd; labels_fwd.resize(num_vertices(bg)); + vector> labels_back; labels_back.resize(num_vertices(bg)); + create_labels(labels_fwd, labels_back, bg); + + //linearization + vector packed_labels = pack_labels(labels_fwd, labels_back); + //dummy filter + //TODO: placeholder getter for now + size_t dist = hhl_query(0, 1, [&] (size_t ofs) { return 1; }); //binary_intersection_ch(packed_labels, 0, 1, 5); + assert(dist == 1); + + /* + for (size_t i = 0; i < test_g.get_node_count(); i++) { + for (size_t j = 0; j < test_g.get_node_count(); j++) { + binary_intersection_ch( + } + }*/ +} + void test_snarl_distance_index() { char filename[] = "tmpXXXXXX"; From 9a5bbd3120b86d503ad1c39fccbdfe4f1e37134f Mon Sep 17 00:00:00 2001 From: Zia Truong <194475824+electricEpilith@users.noreply.github.com> Date: Fri, 12 Dec 2025 13:52:50 -0800 Subject: [PATCH 02/75] replace placeholder getter stuff, rewrite hhlquery to take iterator --- bdsg/include/bdsg/ch.hpp | 3 +- bdsg/src/ch.cpp | 21 ++++++++----- bdsg/src/snarl_distance_index.cpp | 9 ++---- bdsg/src/test_libbdsg.cpp | 50 ++++++++++++++----------------- 4 files changed, 41 insertions(+), 42 deletions(-) diff --git a/bdsg/include/bdsg/ch.hpp b/bdsg/include/bdsg/ch.hpp index cc8668cd..2ef7b40d 100644 --- a/bdsg/include/bdsg/ch.hpp +++ b/bdsg/include/bdsg/ch.hpp @@ -47,7 +47,8 @@ void make_contraction_hierarchy(CHOverlay& ov); DIST_UINT binary_intersection_ch(vector& v1, vector& v2); -DIST_UINT hhl_query(size_t rank1, size_t rank2, std::function reader); +template +DIST_UINT hhl_query(ItrType start_itr, size_t rank1, size_t rank2); void down_dijk(int node, CHOverlay& ov, vector& node_dists, vector>& labels, vector>& labels_rev); diff --git a/bdsg/src/ch.cpp b/bdsg/src/ch.cpp index 222c3341..daa997b3 100644 --- a/bdsg/src/ch.cpp +++ b/bdsg/src/ch.cpp @@ -523,9 +523,8 @@ ItrType get_dist_itr(ItrType start_itr, ItrType hub_itr) { /* start_bound_index variables are relative to start_offset */ -template -DIST_UINT binary_intersection_ch(const VecType& storage, size_t start_offset, size_t v1_start_bound_index, size_t v2_start_bound_index) { - auto start_itr = next(storage.begin(), start_offset); +template +DIST_UINT binary_intersection_ch(ItrType start_itr, size_t v1_start_bound_index, size_t v2_start_bound_index) { auto v1_start_bound_itr = next(start_itr, v1_start_bound_index); auto v1_end_bound_itr = next(v1_start_bound_itr, 1); auto v2_start_bound_itr = next(start_itr, v2_start_bound_index); @@ -557,12 +556,18 @@ DIST_UINT binary_intersection_ch(const VecType& storage, size_t start_offset, si return min_dist; } -DIST_UINT hhl_query(size_t rank1, size_t rank2, std::function reader) { - //reader gets value at index - auto start_index_1 = reader(rank1+1); - auto start_index_2 = reader(rank2+1); + +template +DIST_UINT hhl_query(ItrType start_itr, size_t rank1, size_t rank2) { + size_t label_count = *start_itr; + + auto start_index_1 = 1+rank1; + auto start_index_2 = 1+label_count+1+rank2; - return 1; + DIST_UINT dist = binary_intersection_ch(start_itr, start_index_1, start_index_2); + + + return dist; } void down_dijk(int node, CHOverlay& ov, vector& node_dists, vector>& labels, vector>& labels_back) { diff --git a/bdsg/src/snarl_distance_index.cpp b/bdsg/src/snarl_distance_index.cpp index aa983fb1..14ecd759 100644 --- a/bdsg/src/snarl_distance_index.cpp +++ b/bdsg/src/snarl_distance_index.cpp @@ -1304,12 +1304,9 @@ size_t SnarlDistanceIndex::distance_in_parent(const net_handle_t& parent, if (get_record_type(snarl_tree_records->at(get_record_offset(parent))) == DISTANCED_SIMPLE_SNARL) { return SimpleSnarlRecord(parent, &snarl_tree_records).get_distance(rank1, rev1, rank2, rev2); } else if (get_record_type(snarl_tree_records->at(get_record_offset(parent))) == OVERSIZED_SNARL) { - size_t distance = hhl_query(rank1, rank2, [&] (size_t data_offset) { - //+ 1 is for skipping over vec_size - return snarl_tree_records->at(get_record_offset(parent) + SNARL_RECORD_SIZE + 1 + data_offset); - }); - - return distance; + size_t distance = hhl_query(snarl_tree_records->begin() + get_record_offset(parent) + SNARL_RECORD_SIZE + 1, rank1, rank2); + return distance; + } else if (rank1 == 0 && rank2 == 0 && !snarl_is_root) { //Start to start is stored in the snarl return SnarlRecord(parent, &snarl_tree_records).get_distance_start_start(); diff --git a/bdsg/src/test_libbdsg.cpp b/bdsg/src/test_libbdsg.cpp index 81a3de6d..60f3f77f 100644 --- a/bdsg/src/test_libbdsg.cpp +++ b/bdsg/src/test_libbdsg.cpp @@ -4639,35 +4639,31 @@ void test_hash_graph() { cerr << "HashGraph tests successful!" << endl; } -void test_hub_labeling() { - HashGraph test_g; - vector handles; handles.resize(3); - for (auto n: {0,1,2}) { - handles[n] = test_g.create_handle("A"); +void test_hub_labeling() { + { + HashGraph test_g; + vector handles; handles.resize(3); + for (auto n: {0,1,2}) { + handles[n] = test_g.create_handle("A"); + } + test_g.create_edge(handles[0], handles[1]); + test_g.create_edge(handles[1], handles[2]); + + //test HashGraph -> Boost graph + CHOverlay bg = make_boost_graph(test_g); + + vector> labels_fwd; labels_fwd.resize(num_vertices(bg)); + vector> labels_back; labels_back.resize(num_vertices(bg)); + create_labels(labels_fwd, labels_back, bg); + + //linearization + vector packed_labels = pack_labels(labels_fwd, labels_back); + //dummy filter + assert(hhl_query(packed_labels.begin(), 0, 2) == 0); + } - test_g.create_edge(handles[0], handles[1]); - test_g.create_edge(handles[1], handles[2]); - - //test HashGraph -> Boost graph - CHOverlay bg = make_boost_graph(test_g); - - vector> labels_fwd; labels_fwd.resize(num_vertices(bg)); - vector> labels_back; labels_back.resize(num_vertices(bg)); - create_labels(labels_fwd, labels_back, bg); - - //linearization - vector packed_labels = pack_labels(labels_fwd, labels_back); - //dummy filter - //TODO: placeholder getter for now - size_t dist = hhl_query(0, 1, [&] (size_t ofs) { return 1; }); //binary_intersection_ch(packed_labels, 0, 1, 5); - assert(dist == 1); - /* - for (size_t i = 0; i < test_g.get_node_count(); i++) { - for (size_t j = 0; j < test_g.get_node_count(); j++) { - binary_intersection_ch( - } - }*/ + } void test_snarl_distance_index() { From 3e009fa8d664b8c219d4b5f4427d0c60fd90dc21 Mon Sep 17 00:00:00 2001 From: Zia Truong <194475824+electricEpilith@users.noreply.github.com> Date: Fri, 23 Jan 2026 09:05:13 -0800 Subject: [PATCH 03/75] debugging etc. --- Makefile | 1 + bdsg/include/bdsg/ch.hpp | 76 ++++++- .../bdsg/internal/indexing_iterator.hpp | 208 ++++++++++++++++++ bdsg/include/bdsg/internal/mapped_structs.hpp | 23 +- bdsg/include/bdsg/internal/packed_structs.hpp | 203 +---------------- bdsg/include/bdsg/snarl_distance_index.hpp | 1 + bdsg/src/ch.cpp | 77 ++----- bdsg/src/snarl_distance_index.cpp | 15 +- bdsg/src/test_libbdsg.cpp | 102 ++++++++- 9 files changed, 434 insertions(+), 272 deletions(-) create mode 100644 bdsg/include/bdsg/internal/indexing_iterator.hpp diff --git a/Makefile b/Makefile index 4436f140..47c5c977 100644 --- a/Makefile +++ b/Makefile @@ -24,6 +24,7 @@ OBJS += $(OBJ_DIR)/path_subgraph_overlay.o OBJS += $(OBJ_DIR)/subgraph_overlay.o OBJS += $(OBJ_DIR)/vectorizable_overlays.o OBJS += $(OBJ_DIR)/packed_subgraph_overlay.o +OBJS += $(OBJ_DIR)/ch.o OBJS += $(OBJ_DIR)/snarl_distance_index.o OBJS += $(OBJ_DIR)/strand_split_overlay.o OBJS += $(OBJ_DIR)/utility.o diff --git a/bdsg/include/bdsg/ch.hpp b/bdsg/include/bdsg/ch.hpp index 2ef7b40d..207a5b83 100644 --- a/bdsg/include/bdsg/ch.hpp +++ b/bdsg/include/bdsg/ch.hpp @@ -43,12 +43,84 @@ int edge_diff(ContractedGraph::vertex_descriptor nid, ContractedGraph& ch, CHOve void contract(CHOverlay::vertex_descriptor nid, ContractedGraph& ch, CHOverlay& ov, vector& node_dists, vector& shouldnt_contract, int hop_limit); void make_contraction_hierarchy(CHOverlay& ov); - + +template +ItrType get_dist_itr(ItrType start_itr, ItrType hub_itr) { + auto node_count = *start_itr; + auto last_fwd_end_bound_itr = next(start_itr, 1+node_count); + if (hub_itr >= next(start_itr, *last_fwd_end_bound_itr)) { + //backwards label + auto first_back_bound_itr = next(start_itr, 1+node_count+1); + auto last_back_bound_itr = next(start_itr, 1+node_count+1+node_count); + auto jump_to_dist = (*last_back_bound_itr) - *first_back_bound_itr; + return next(hub_itr, jump_to_dist); + } else { + //forwards label + auto first_fwd_bound_itr = next(start_itr, 1); + auto last_fwd_bound_itr = next(start_itr, 1+node_count); + auto jump_to_dist = (*last_fwd_bound_itr) - *first_fwd_bound_itr; + return next(hub_itr, jump_to_dist); + } +} DIST_UINT binary_intersection_ch(vector& v1, vector& v2); +/* +start_bound_index variables are relative to start_offset +*/ +template +DIST_UINT binary_intersection_ch(ItrType start_itr, size_t v1_start_bound_index, size_t v2_start_bound_index) { + auto v1_start_bound_itr = next(start_itr, v1_start_bound_index); + auto v1_end_bound_itr = next(v1_start_bound_itr, 1); + auto v2_start_bound_itr = next(start_itr, v2_start_bound_index); + auto v2_end_bound_itr = next(v2_start_bound_itr, 1); + + auto v1_start_itr = next(start_itr, *v1_start_bound_itr); + auto v1_end_itr = next(start_itr, *v1_end_bound_itr); + auto v2_start_itr = next(start_itr, *v2_start_bound_itr); + auto v2_end_itr = next(start_itr, *v2_end_bound_itr); + auto v1_range = ranges::subrange(v1_start_itr, v1_end_itr); + auto v2_range = ranges::subrange(v2_start_itr, v2_end_itr); + + auto& key_vec = v1_range.size() < v2_range.size() ? v1_range : v2_range; + auto& search_vec = v1_range.size() < v2_range.size() ? v2_range : v1_range; + + auto search_start_itr = search_vec.begin(); + auto search_end_itr = search_vec.end(); + DIST_UINT min_dist = INF_INT; + for (auto it = key_vec.begin(); it < key_vec.end(); it++) { + //cerr << "loop top" << endl; + auto k = *it; + auto k_dist_itr = get_dist_itr(start_itr, it); + //cerr << "dist for k " << k << " is " << *k_dist_itr << ", at: " << distance(start_itr,k_dist_itr) << endl; + //cerr << "searching for " << k << " between " << distance(start_itr,search_start_itr) << " & " << distance(start_itr,search_end_itr) << endl; + search_start_itr = lower_bound(search_start_itr, search_end_itr, k); + if (search_start_itr == search_end_itr) { + return min_dist; + } + if (*search_start_itr == k) { + //cerr << "match found, key: " << *search_start_itr << ", at " << distance(start_itr,search_start_itr) << endl; + auto dist_itr = get_dist_itr(start_itr, search_start_itr); + DIST_UINT d = *(dist_itr) + *(k_dist_itr); + //cerr << "dist for key is: " << *dist_itr << ", at " << distance(start_itr,dist_itr) << endl; + //cerr << "total dist is: " << d << endl; + min_dist = min(min_dist, d); + } + } + return min_dist; +} template -DIST_UINT hhl_query(ItrType start_itr, size_t rank1, size_t rank2); +DIST_UINT hhl_query(ItrType start_itr, size_t rank1, size_t rank2) { + size_t label_count = *start_itr; + + auto start_index_1 = 1+rank1; + auto start_index_2 = 1+label_count+1+rank2; + + DIST_UINT dist = binary_intersection_ch(start_itr, start_index_1, start_index_2); + + + return dist; +} void down_dijk(int node, CHOverlay& ov, vector& node_dists, vector>& labels, vector>& labels_rev); diff --git a/bdsg/include/bdsg/internal/indexing_iterator.hpp b/bdsg/include/bdsg/internal/indexing_iterator.hpp new file mode 100644 index 00000000..447d09d7 --- /dev/null +++ b/bdsg/include/bdsg/internal/indexing_iterator.hpp @@ -0,0 +1,208 @@ +#ifndef BDSG_INDEXING_ITERATOR_HPP_INCLUDED +#define BDSG_INDEXING_ITERATOR_HPP_INCLUDED + +namespace bdsg { +/** + * A forward iterator for anything vector-like (PackedVector, PagedVector, + * etc.) that provides read-only access to elements and internally uses integer + * indexes. + * + * This iterator is invalidated if the backing object changes size or moves. + */ +template +class IndexingIterator { +public: + // Iterator traits for standard library compatibility + using iterator_category = std::random_access_iterator_tag; + using value_type = uint64_t; + using difference_type = typename std::make_signed::type; + using pointer = void; + using reference = uint64_t; + + //Constructors + IndexingIterator() = default; + IndexingIterator(const IndexingIterator& other) = default; + IndexingIterator& operator=(const IndexingIterator& other) = default; + ~IndexingIterator() = default; + + /// Pre-increment operator + IndexingIterator& operator++(); + + /// Post-increment operator + IndexingIterator operator++(int); + + /// Dereference operator - returns value at current position + uint64_t operator*() const; + + /// Equality comparison + bool operator==(const IndexingIterator& other) const; + + /// Inequality comparison + bool operator!=(const IndexingIterator& other) const; + + // Bidirectional methods + + /// Pre-decrement operator + IndexingIterator& operator--(); + + /// Post-decrement operator + IndexingIterator operator--(int); + + // Random-access methods + + /// Addition of offset in place + IndexingIterator& operator+=(difference_type offset); + + /// Addition of offset + IndexingIterator operator+(difference_type offset) const; + + /// Subtraction of offset in place + IndexingIterator& operator-=(difference_type offset); + + /// Subtraction of offset + IndexingIterator operator-(difference_type offset) const; + + /// Subtraction of two iterators + difference_type operator-(const IndexingIterator& other) const; + + /// Indexing into iterator. Even though we type this as reference, remember + /// that we don't actually implement writing to our "references" and just + /// use the value type. + /// Result is undefined if itrators are to different collecitons. + reference operator[](difference_type offset) const; + + // Comaprable iterator methods (TODO: Is there an STL concept name for this?) + + /// Determine if this iterator is strictly before another. + /// Result is undefined if iterators are to different collecitons. + bool operator<(const IndexingIterator& other) const; + + /// Determine if this iterator is before or at another. + /// Result is undefined if iterators are to different collecitons. + bool operator<=(const IndexingIterator& other) const; + + /// Determine if this iterator is strictly after another. + /// Result is undefined if iterators are to different collecitons. + bool operator>(const IndexingIterator& other) const; + + /// Determine if this iterator is at or after another. + /// Result is undefined if itrators are to different collecitons. + bool operator>=(const IndexingIterator& other) const; + +private: + // Private constructor - only associated class can create iterators + IndexingIterator(const VectorLike* vec, size_t idx); + + const VectorLike* vec_ptr = nullptr; + size_t index = 0; + + // We're not allowed to use "class" when befriending a template parameter. + // See + friend VectorLike; +}; + +template +IndexingIterator::IndexingIterator(const VectorLike* vec, size_t idx) + : vec_ptr(vec), index(idx) { + // Constructor +} + +template +IndexingIterator& IndexingIterator::operator++() { + ++index; + return *this; +} + +template +IndexingIterator IndexingIterator::operator++(int) { + IndexingIterator tmp = *this; + ++index; + return tmp; +} + +template +uint64_t IndexingIterator::operator*() const { + return vec_ptr->get(index); +} + +template +bool IndexingIterator::operator==(const IndexingIterator& other) const { + return vec_ptr == other.vec_ptr && index == other.index; +} + +template +bool IndexingIterator::operator!=(const IndexingIterator& other) const { + return !(*this == other); +} + +template +IndexingIterator& IndexingIterator::operator--() { + --index; + return *this; +} + +template +IndexingIterator IndexingIterator::operator--(int) { + IndexingIterator tmp = *this; + --index; + return tmp; +} + +template +IndexingIterator& IndexingIterator::operator+=(difference_type offset) { + index += offset; + return *this; +} + +template +IndexingIterator IndexingIterator::operator+(difference_type offset) const { + return IndexingIterator(vec_ptr, index + offset); +} + +template +IndexingIterator& IndexingIterator::operator-=(difference_type offset) { + index -= offset; + return *this; +} + +template +IndexingIterator IndexingIterator::operator-(difference_type offset) const { + return IndexingIterator(vec_ptr, index - offset); +} + +template +typename IndexingIterator::difference_type IndexingIterator::operator-(const IndexingIterator& other) const { + // TODO: I don't know a way to subtract two unsigned values and get the + // signed difference in a single operation as long as that difference + // itself fits the signed type. So we cast and hope. + return (difference_type) index - (difference_type) other.index; +} + +template +typename IndexingIterator::reference IndexingIterator::operator[](difference_type offset) const { + return *(*this + offset); +} + +template +bool IndexingIterator::operator<(const IndexingIterator& other) const { + return index < other.index; +} + +template +bool IndexingIterator::operator<=(const IndexingIterator& other) const { + return index <= other.index; +} + +template +bool IndexingIterator::operator>(const IndexingIterator& other) const { + return index > other.index; +} + +template +bool IndexingIterator::operator>=(const IndexingIterator& other) const { + return index >= other.index; +} +} + + +#endif diff --git a/bdsg/include/bdsg/internal/mapped_structs.hpp b/bdsg/include/bdsg/internal/mapped_structs.hpp index b3dc7881..aab5aedd 100644 --- a/bdsg/include/bdsg/internal/mapped_structs.hpp +++ b/bdsg/include/bdsg/internal/mapped_structs.hpp @@ -19,6 +19,7 @@ #include #include #include +#include // TODO: We only target little-endian systems, like x86_64 and ARM64 Linux and // MacOS. Porting to big-endian systems will require wrapping all the numbers @@ -827,7 +828,7 @@ class UniqueMappedPointer { template> class CompatVector { public: - + CompatVector() = default; // Because we contain a pointer, we need a destructor and copy and move @@ -865,6 +866,7 @@ class CompatVector { * Empty out the vector and free any allocated memory. */ void clear(); + T& at(size_t index); const T& at(size_t index) const; @@ -937,6 +939,7 @@ using MappedVector = CompatVector>; template> class CompatIntVector { public: + using iterator = IndexingIterator; CompatIntVector() = default; @@ -996,6 +999,9 @@ class CompatIntVector { */ void clear(); + iterator begin() const; + iterator end() const; + /** * Return the width in bits of the entries. */ @@ -1109,6 +1115,9 @@ class CompatIntVector { */ ConstProxy operator[](size_t index) const; + //get() needed for iterators (or compiler won't be happy) + inline uint64_t get(size_t index) const { return (*this)[index]; }; + // Compatibility with SDSL-lite serialization /** @@ -1741,6 +1750,16 @@ void CompatIntVector::clear() { data.clear(); } +template +CompatIntVector::iterator CompatIntVector::begin() const { + return iterator(this, 0); +} + +template +CompatIntVector::iterator CompatIntVector::end() const { + return iterator(this, length); +} + template size_t CompatIntVector::width() const { return bit_width; @@ -1860,6 +1879,7 @@ CompatIntVector::ConstProxy::operator uint64_t () const { template auto CompatIntVector::at(size_t index) -> Proxy { if (index > size()) { + assert(false); throw std::out_of_range("Accessing index " + std::to_string(index) + " in integer vector of length " + std::to_string(size())); } @@ -1869,6 +1889,7 @@ auto CompatIntVector::at(size_t index) -> Proxy { template auto CompatIntVector::at(size_t index) const -> ConstProxy { if (index > size()) { + assert(false); throw std::out_of_range("Accessing index " + std::to_string(index) + " in integer vector of length " + std::to_string(size())); } diff --git a/bdsg/include/bdsg/internal/packed_structs.hpp b/bdsg/include/bdsg/internal/packed_structs.hpp index de99df43..152693c4 100644 --- a/bdsg/include/bdsg/internal/packed_structs.hpp +++ b/bdsg/include/bdsg/internal/packed_structs.hpp @@ -16,6 +16,7 @@ #include #include +#include #include namespace bdsg { @@ -29,103 +30,6 @@ template inline void repack(IntVector& target, size_t new_width, size_t new_size); -/** - * A forward iterator for anything vector-like (PackedVector, PagedVector, - * etc.) that provides read-only access to elements and internally uses integer - * indexes. - * - * This iterator is invalidated if the backing object changes size or moves. - */ -template -class IndexingIterator { -public: - // Iterator traits for standard library compatibility - using iterator_category = std::random_access_iterator_tag; - using value_type = uint64_t; - using difference_type = typename std::make_signed::type; - using pointer = void; - using reference = uint64_t; - - // Standard iterator operations - IndexingIterator(const IndexingIterator& other) = default; - IndexingIterator& operator=(const IndexingIterator& other) = default; - ~IndexingIterator() = default; - - /// Pre-increment operator - IndexingIterator& operator++(); - - /// Post-increment operator - IndexingIterator operator++(int); - - /// Dereference operator - returns value at current position - uint64_t operator*() const; - - /// Equality comparison - bool operator==(const IndexingIterator& other) const; - - /// Inequality comparison - bool operator!=(const IndexingIterator& other) const; - - // Bidirectional methods - - /// Pre-decrement operator - IndexingIterator& operator--(); - - /// Post-decrement operator - IndexingIterator operator--(int); - - // Random-access methods - - /// Addition of offset in place - IndexingIterator& operator+=(difference_type offset); - - /// Addition of offset - IndexingIterator operator+(difference_type offset) const; - - /// Subtraction of offset in place - IndexingIterator& operator-=(difference_type offset); - - /// Subtraction of offset - IndexingIterator operator-(difference_type offset) const; - - /// Subtraction of two iterators - difference_type operator-(const IndexingIterator& other) const; - - /// Indexing into iterator. Even though we type this as reference, remember - /// that we don't actually implement writing to our "references" and just - /// use the value type. - /// Result is undefined if itrators are to different collecitons. - reference operator[](difference_type offset) const; - - // Comaprable iterator methods (TODO: Is there an STL concept name for this?) - - /// Determine if this iterator is strictly before another. - /// Result is undefined if iterators are to different collecitons. - bool operator<(const IndexingIterator& other) const; - - /// Determine if this iterator is before or at another. - /// Result is undefined if iterators are to different collecitons. - bool operator<=(const IndexingIterator& other) const; - - /// Determine if this iterator is strictly after another. - /// Result is undefined if iterators are to different collecitons. - bool operator>(const IndexingIterator& other) const; - - /// Determine if this iterator is at or after another. - /// Result is undefined if itrators are to different collecitons. - bool operator>=(const IndexingIterator& other) const; - -private: - // Private constructor - only associated class can create iterators - IndexingIterator(const VectorLike* vec, size_t idx); - - const VectorLike* vec_ptr = nullptr; - size_t index = 0; - - // We're not allowed to use "class" when befriending a template parameter. - // See - friend VectorLike; -}; /* * A dynamic integer vector that maintains integers in bit-compressed form. @@ -745,111 +649,6 @@ inline void repack>(sdsl::int_vector<>& target, size_t new_wi target = std::move(tmp); } -///////////////////// -/// IndexingIterator -///////////////////// - -template -IndexingIterator::IndexingIterator(const VectorLike* vec, size_t idx) - : vec_ptr(vec), index(idx) { - // Constructor -} - -template -IndexingIterator& IndexingIterator::operator++() { - ++index; - return *this; -} - -template -IndexingIterator IndexingIterator::operator++(int) { - IndexingIterator tmp = *this; - ++index; - return tmp; -} - -template -uint64_t IndexingIterator::operator*() const { - return vec_ptr->get(index); -} - -template -bool IndexingIterator::operator==(const IndexingIterator& other) const { - return vec_ptr == other.vec_ptr && index == other.index; -} - -template -bool IndexingIterator::operator!=(const IndexingIterator& other) const { - return !(*this == other); -} - -template -IndexingIterator& IndexingIterator::operator--() { - --index; - return *this; -} - -template -IndexingIterator IndexingIterator::operator--(int) { - IndexingIterator tmp = *this; - --index; - return tmp; -} - -template -IndexingIterator& IndexingIterator::operator+=(difference_type offset) { - index += offset; - return *this; -} - -template -IndexingIterator IndexingIterator::operator+(difference_type offset) const { - return IndexingIterator(vec_ptr, index + offset); -} - -template -IndexingIterator& IndexingIterator::operator-=(difference_type offset) { - index -= offset; - return *this; -} - -template -IndexingIterator IndexingIterator::operator-(difference_type offset) const { - return IndexingIterator(vec_ptr, index - offset); -} - -template -typename IndexingIterator::difference_type IndexingIterator::operator-(const IndexingIterator& other) const { - // TODO: I don't know a way to subtract two unsigned values and get the - // signed difference in a single operation as long as that difference - // itself fits the signed type. So we cast and hope. - return (difference_type) index - (difference_type) other.index; -} - -template -typename IndexingIterator::reference IndexingIterator::operator[](difference_type offset) const { - return *(*this + offset); -} - -template -bool IndexingIterator::operator<(const IndexingIterator& other) const { - return index < other.index; -} - -template -bool IndexingIterator::operator<=(const IndexingIterator& other) const { - return index <= other.index; -} - -template -bool IndexingIterator::operator>(const IndexingIterator& other) const { - return index > other.index; -} - -template -bool IndexingIterator::operator>=(const IndexingIterator& other) const { - return index >= other.index; -} ///////////////////// diff --git a/bdsg/include/bdsg/snarl_distance_index.hpp b/bdsg/include/bdsg/snarl_distance_index.hpp index 2f93902f..513c6ddd 100644 --- a/bdsg/include/bdsg/snarl_distance_index.hpp +++ b/bdsg/include/bdsg/snarl_distance_index.hpp @@ -1692,6 +1692,7 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab vector temp_chain_records; vector temp_snarl_records; vector temp_node_records; + //whether the entire index has any oversized snarls or not bool use_oversized_snarls = false; size_t most_oversized_snarl_size = 0; friend class SnarlDistanceIndex; diff --git a/bdsg/src/ch.cpp b/bdsg/src/ch.cpp index daa997b3..6a5681d1 100644 --- a/bdsg/src/ch.cpp +++ b/bdsg/src/ch.cpp @@ -413,7 +413,7 @@ void make_contraction_hierarchy(CHOverlay& ov) { cerr << "left over: " << num_vertices(ov) - num_con << endl; //std::fill(skip.begin(), skip.end(), false); //for (auto n: arti_pts) { skip[n] = true; } - + vector> queue_objs; queue_objs.reserve(num_vertices(ov)/2); for (int i = 0; i < num_vertices(ov); i+=1) { if (ov[i].contracted) { continue; } @@ -426,9 +426,9 @@ void make_contraction_hierarchy(CHOverlay& ov) { } make_heap(queue_objs.begin(), queue_objs.end(), greater>()); pop_heap(queue_objs.begin(), queue_objs.end(), greater>()); - //size_t init_qsize = queue_objs.size(); - while (queue_objs.size() > 4) { + + while (queue_objs.size() > 2) { auto [pri, node] = queue_objs.back(); queue_objs.pop_back(); //preparing for next pop pop_heap(queue_objs.begin(), queue_objs.end(), greater>()); @@ -445,11 +445,12 @@ void make_contraction_hierarchy(CHOverlay& ov) { continue; } ov[node].level += 1; - if (queue_objs.size() % 100 == 1) { - cerr << "remaining: " << queue_objs.size() << ", deg: " << (double)num_edges(ov)/num_vertices(ov) << endl; - cerr << "lv: " << ov[node].level << endl; - } - + + //if (queue_objs.size() % 100 == 1) { + cerr << "remaining: " << queue_objs.size() << ", deg: " << (double)num_edges(ov)/num_vertices(ov) << endl; + cerr << "lv: " << ov[node].level << endl; + //} + ov[node].new_id = num_vertices(ov)-1-num_con; contract(node, contracted_g, ov, node_dists, skip, hop_limit); num_con += 1; @@ -474,11 +475,12 @@ void make_contraction_hierarchy(CHOverlay& ov) { for (auto i = 0u; i < num_vertices(ov); i+=1) { v2.emplace_back(in_degree(i,ov)*out_degree(i,ov), i); } - sort(v2.rbegin(), v2.rend()); + sort(v2.rbegin(), v2.rend()); + /* for (int i: {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) { auto [p, n] = v2[i]; cerr << n << " (" << ov[n].new_id << "): " << in_degree(n,ov) << " " << out_degree(n,ov) << endl; - } + } */ } DIST_UINT binary_intersection_ch(vector& v1, vector& v2) { @@ -500,7 +502,7 @@ DIST_UINT binary_intersection_ch(vector& v1, vector& v2) { } return min_dist; } - +/* template ItrType get_dist_itr(ItrType start_itr, ItrType hub_itr) { auto node_count = *start_itr; @@ -519,56 +521,11 @@ ItrType get_dist_itr(ItrType start_itr, ItrType hub_itr) { return next(hub_itr, jump_to_dist); } } - -/* -start_bound_index variables are relative to start_offset */ -template -DIST_UINT binary_intersection_ch(ItrType start_itr, size_t v1_start_bound_index, size_t v2_start_bound_index) { - auto v1_start_bound_itr = next(start_itr, v1_start_bound_index); - auto v1_end_bound_itr = next(v1_start_bound_itr, 1); - auto v2_start_bound_itr = next(start_itr, v2_start_bound_index); - auto v2_end_bound_itr = next(v2_start_bound_itr, 1); - - auto v1_start_itr = next(start_itr, *v1_start_bound_itr); - auto v1_end_itr = next(start_itr, *v1_end_bound_itr); - auto v2_start_itr = next(start_itr, *v2_start_bound_itr); - auto v2_end_itr = next(start_itr, *v2_end_bound_itr); - auto v1_range = ranges::subrange(v1_start_itr, v1_end_itr); - auto v2_range = ranges::subrange(v2_start_itr, v2_end_itr); - - auto& key_vec = v1_range.size() < v2_range.size() ? v1_range : v2_range; - auto& search_vec = v1_range.size() < v2_range.size() ? v2_range : v1_range; - - auto search_start_itr = search_vec.begin(); - auto search_end_itr = search_vec.end(); - DIST_UINT min_dist = INF_INT; - for (auto k: key_vec) { - search_start_itr = lower_bound(search_start_itr, search_end_itr, k); - if (search_start_itr == search_end_itr) { - return min_dist; - } - if (*search_start_itr == k) { - DIST_UINT d = - min_dist = min(min_dist, d); - } - } - return min_dist; -} - -template -DIST_UINT hhl_query(ItrType start_itr, size_t rank1, size_t rank2) { - size_t label_count = *start_itr; - auto start_index_1 = 1+rank1; - auto start_index_2 = 1+label_count+1+rank2; - - DIST_UINT dist = binary_intersection_ch(start_itr, start_index_1, start_index_2); - return dist; -} void down_dijk(int node, CHOverlay& ov, vector& node_dists, vector>& labels, vector>& labels_back) { auto in_node = node; @@ -711,6 +668,7 @@ void test_dijk(int node, CHOverlay& ov, vector& node_dists, vector& node_dists, vect if (cur_node == node) { check_dist = min(check_dist, labels[cur_node].back().dist + labels_back[node].back().dist); } + if (check_dist != node_dists[cur_node]) { cerr << "node " << cur_node << " mismatch: " << check_dist << ", actual: " << node_dists[cur_node] << endl; } + } node_dists[node] = INF_INT; @@ -772,6 +732,7 @@ void test_dijk_back(int node, CHOverlay& ov, vector& node_dists, vect } void create_labels(vector>& labels, vector>& labels_back, CHOverlay& ov) { + cerr << "start create labels" << endl; vector node_dists(num_vertices(ov), INF_INT); vector v; v.resize(num_vertices(ov)); for (auto i = 0u; i < num_vertices(ov); i++) { @@ -780,7 +741,9 @@ void create_labels(vector>& labels, vector>& for (auto j = 0u; j < num_vertices(ov); j++) { - if (j % 100 == 1) { cerr << j << "th node, " << v[j] << endl; } + //if (j % 100 == 1) { + cerr << j << "th node, " << v[j] << endl; + //cerr << "starting dijkstra: " << endl; down_dijk_back(v[j], ov, node_dists, labels, labels_back); diff --git a/bdsg/src/snarl_distance_index.cpp b/bdsg/src/snarl_distance_index.cpp index 975f7b34..e8f3c14b 100644 --- a/bdsg/src/snarl_distance_index.cpp +++ b/bdsg/src/snarl_distance_index.cpp @@ -1,4 +1,4 @@ -//#define debug_distance_indexing +#define debug_distance_indexing //#define debug_snarl_traversal //#define debug_distances //#define debug_distance_paths @@ -4386,15 +4386,16 @@ void SnarlDistanceIndex::SnarlRecordWriter::set_node_count(size_t node_count) { /* set size of hub labels vector (hub_labels) +putting vec_size in the SNARL_RECORD_SIZE slot due to it being the first one after the header */ void SnarlDistanceIndex::SnarlRecordWriter::set_vec_size(size_t vec_size) { #ifdef debug_distance_indexing - cerr << record_offset + SNARL_NODE_COUNT_OFFSET << " set vec_size " << vec_size << endl; + cerr << record_offset + SNARL_RECORD_SIZE << " set vec_size " << vec_size << endl; assert(vec_size > 0); - assert((*records)->at(record_offset + SNARL_NODE_COUNT_OFFSET) == 0); + assert((*records)->at(record_offset + SNARL_RECORD_SIZE) == 0); #endif - (*records)->at(record_offset + SNARL_NODE_COUNT_OFFSET) = vec_size; + (*records)->at(record_offset + SNARL_RECORD_SIZE) = vec_size; } size_t SnarlDistanceIndex::SnarlRecord::get_child_record_pointer() const { @@ -6384,7 +6385,7 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vector& child_index = temp_snarl_record.children[i]; if( child_index.first == TEMP_CHAIN) { - assert(temp_index->temp_chain_records[child_index.second].children.size() == 1); + if (temp_index->temp_chain_records[child_index.second].children.size() != 1) { + throw runtime_error("size of children != 1, child index: "+to_string(child_index.second)+", bounding nodes: "+to_string(temp_snarl_record.start_node_id)+" "+to_string(temp_snarl_record.end_node_id)); + } const pair& node_index = temp_index->temp_chain_records[child_index.second].children.front(); const TemporaryDistanceIndex::TemporaryNodeRecord& temp_node_record = temp_index->temp_node_records[node_index.second-min_node_id]; diff --git a/bdsg/src/test_libbdsg.cpp b/bdsg/src/test_libbdsg.cpp index a6df19c9..704ff5ac 100644 --- a/bdsg/src/test_libbdsg.cpp +++ b/bdsg/src/test_libbdsg.cpp @@ -5002,6 +5002,10 @@ void test_hub_labeling() { //test HashGraph -> Boost graph CHOverlay bg = make_boost_graph(test_g); + // + make_contraction_hierarchy(bg); + //cerr << " - made contraction hierarchy" << endl; + vector> labels_fwd; labels_fwd.resize(num_vertices(bg)); vector> labels_back; labels_back.resize(num_vertices(bg)); create_labels(labels_fwd, labels_back, bg); @@ -5009,11 +5013,100 @@ void test_hub_labeling() { //linearization vector packed_labels = pack_labels(labels_fwd, labels_back); //dummy filter + /* + for (auto v: labels_fwd) { + for (auto sz: v) { + cerr << "(" << sz.hub << "," << sz.dist << ") "; + } + cerr << " | "; + } + cerr << endl; + cerr<<"back:" << endl; + for (auto v: labels_back) { + for (auto sz: v) { + cerr << "(" << sz.hub << "," << sz.dist << ") "; + } + cerr << " | "; + } + cerr << endl; + cerr << "pack:" << endl; + for (auto sz: packed_labels) { + cerr << sz << " "; + } + cerr << endl; */ + assert(hhl_query(packed_labels.begin(), 0, 2) == 0); + + //TODO: what to do when node equals itself? + //assert(hhl_query(packed_labels.begin(), 0, 0) == INF_INT); + assert(hhl_query(packed_labels.begin(), 5, 1) == 1); + + assert(hhl_query(packed_labels.begin(), 1, 2) == INF_INT); + //TODO: check that error occurs when nodeside out of range is given + } + { + HashGraph test_g; + vector handles; handles.resize(8); + for (auto n: {0,1,2,3,4,5,6,7}) { + handles[n] = test_g.create_handle(string(n+1, 'A')); + } + //vector> edges={{0,1},{0,2},{1,0},{2,0},{1,3},{1,4},{4,1},{5,5}}; + vector> edges={{1,3}}; + for (auto e: edges) { + auto [s,t] = e; + test_g.create_edge(handles[s], handles[t]); + } + //test HashGraph -> Boost graph + CHOverlay bg = make_boost_graph(test_g); + auto [edges_start, edges_end] = boost::edges(bg); + std::for_each(edges_start, edges_end, [&] (auto e) { + cerr << source(e,bg) << " -> " << target(e,bg) << endl; + }); + make_contraction_hierarchy(bg); + + vector> labels_fwd; labels_fwd.resize(num_vertices(bg)); + vector> labels_back; labels_back.resize(num_vertices(bg)); + create_labels(labels_fwd, labels_back, bg); + + //linearization + vector packed_labels = pack_labels(labels_fwd, labels_back); + for (auto v: labels_fwd) { + for (auto sz: v) { + cerr << "(" << sz.hub << "," << sz.dist << ") "; + } + cerr << " | "; + } + cerr << endl; + cerr<<"back:" << endl; + for (auto v: labels_back) { + for (auto sz: v) { + cerr << "(" << sz.hub << "," << sz.dist << ") "; + } + cerr << " | "; + } + /* + //nonexistent path + assert(hhl_query(packed_labels.begin(), 0, 14) == INF_INT); + //check node lengths are taken into account + assert(hhl_query(packed_labels.begin(), 0, 6) == 2); + + //check u -> v and v -> u are different + assert(hhl_query(packed_labels.begin(), 6, 2) == INF_INT); */ + //need to debug + for (int a = 0; a < 10; a++ ) { + cerr << hhl_query(packed_labels.begin(), 2, a) << endl; + } + assert(hhl_query(packed_labels.begin(), 2, 6) == 0); + /* + //node to itself in the same direction (edge exists) + assert(hhl_query(packed_labels.begin(), 10, 10) == 0); + //node to itself in the same direction (edge doesn't exist) + assert(hhl_query(packed_labels.begin(), 6, 6) == INF_INT); + */ } - + cerr << "HubLabeling tests successful!" << endl; } void test_snarl_distance_index() { @@ -5070,7 +5163,7 @@ void test_snarl_distance_index() { } int main(void) { - test_reference_path_overlay(); + /*test_reference_path_overlay(); test_bit_packing(); test_mapped_structs(); test_int_vector(); @@ -5116,6 +5209,7 @@ int main(void) { test_packed_subgraph_overlay(); test_multithreaded_overlay_construction(); test_mapped_packed_graph(); - test_hash_graph(); - test_snarl_distance_index(); + test_hash_graph(); */ + test_hub_labeling(); + //test_snarl_distance_index(); } From 6755f44a2437b7538d416522a3ecc6e4614b94c2 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Fri, 23 Jan 2026 16:27:38 -0500 Subject: [PATCH 04/75] Change hub_labels to a size_t vector, which the function to generate it returns, and which is a different size on mac --- bdsg/include/bdsg/snarl_distance_index.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bdsg/include/bdsg/snarl_distance_index.hpp b/bdsg/include/bdsg/snarl_distance_index.hpp index ca33bd86..5de1a70b 100644 --- a/bdsg/include/bdsg/snarl_distance_index.hpp +++ b/bdsg/include/bdsg/snarl_distance_index.hpp @@ -1642,7 +1642,7 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab //vector, pair, size_t>> distances; unordered_map, pair>, size_t> distances; //linearized hub labels (if not empty, this is an oversized snarl) - vector hub_labels; + vector hub_labels; //How long is the record going to be in the distance index? size_t get_max_record_length() const ; From 0065e9717c949c878427fe39aacb977fc2508e2d Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Fri, 23 Jan 2026 18:10:07 -0500 Subject: [PATCH 05/75] Define a type for the complex temporary X index indexing, and accessors that validate it --- bdsg/deps/libhandlegraph | 2 +- bdsg/include/bdsg/snarl_distance_index.hpp | 63 ++++++++++++++--- bdsg/src/snarl_distance_index.cpp | 81 ++++++++++------------ 3 files changed, 92 insertions(+), 54 deletions(-) diff --git a/bdsg/deps/libhandlegraph b/bdsg/deps/libhandlegraph index ec2da41d..0e70dadb 160000 --- a/bdsg/deps/libhandlegraph +++ b/bdsg/deps/libhandlegraph @@ -1 +1 @@ -Subproject commit ec2da41d955e30366b6366b8760fd8646e2c0000 +Subproject commit 0e70dadb5054568d8071e280b3b7b11b5658937f diff --git a/bdsg/include/bdsg/snarl_distance_index.hpp b/bdsg/include/bdsg/snarl_distance_index.hpp index 5c28a2fd..521b8bd9 100644 --- a/bdsg/include/bdsg/snarl_distance_index.hpp +++ b/bdsg/include/bdsg/snarl_distance_index.hpp @@ -1555,13 +1555,26 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab */ enum temp_record_t {TEMP_CHAIN=0, TEMP_SNARL, TEMP_NODE, TEMP_ROOT}; + /** + * Type for referring to some temporary index (for a node, chain, etc.) in + * a TemporaryDistanceIndex. Holds a tag for the type of object being + * indexed, and then a number used to look it up. + * + * For a node, the number is the node ID. + * + * For anything else, it's the position in the corresponding vector of + * temporary indexes in the TemporaryDistanceIndex where the thing's + * temporary index is stored. + */ + using temp_record_ref_t = std::pair; + class TemporaryDistanceIndex{ public: TemporaryDistanceIndex(); ~TemporaryDistanceIndex(); //Get a string of the start and end of a structure - std::string structure_start_end_as_string(pair index) const; + std::string structure_start_end_as_string(temp_record_ref_t index) const; handlegraph::nid_t min_node_id=0; handlegraph::nid_t max_node_id=0; @@ -1586,7 +1599,7 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab size_t tree_depth=0; //TODO: This isn't used but I left it because I couldn't get the python bindings to build when I changed it //Type of the parent and offset into the appropriate vector //(TEMP_ROOT, 0) if this is a root level chain - pair parent; + temp_record_ref_t parent; size_t min_length=0;//Including boundary nodes size_t max_length = 0; @@ -1608,7 +1621,7 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab bool is_tip = false; bool loopable = true; //If this is a looping snarl, this is false if the last snarl is not start-end connected - vector> children; //All children, both nodes and snarls, in order + vector children; //All children, both nodes and snarls, in order //Distances for the chain, one entry per node //TODO This would probably be more efficient as a vector of a struct of five ints vector prefix_sum; @@ -1622,7 +1635,7 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab size_t get_max_record_length(bool include_distances) const; }; struct TemporarySnarlRecord : TemporaryRecord{ - pair parent; + temp_record_ref_t parent; handlegraph::nid_t start_node_id; size_t start_node_length=0; handlegraph::nid_t end_node_id; @@ -1651,7 +1664,7 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab bool is_tip = false; bool is_root_snarl = false; bool include_distances = true; - vector> children; //All children, nodes and chains, in arbitrary order + vector children; //All children, nodes and chains, in arbitrary order unordered_set tippy_child_ranks; //The ranks of children that are tips //vector, pair, size_t>> distances; unordered_map, pair>, size_t> distances; @@ -1667,7 +1680,7 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab rank_in_parent(0), reversed_in_parent(false){ } handlegraph::nid_t node_id; - pair parent; + temp_record_ref_t parent; size_t node_length=0; size_t rank_in_parent=0; size_t root_snarl_index = std::numeric_limits::max(); @@ -1686,12 +1699,44 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab return NODE_RECORD_SIZE;} }; - - vector> components; - vector> root_snarl_components; + vector components; + vector root_snarl_components; vector temp_chain_records; vector temp_snarl_records; + /// Holds temporary indexes for all the nodes. + /// + /// While temporary snarl and chain records are stored at more or less + /// arbitrary indexes, temporary node records are laid out by node ID, + /// with the one for the node with ID min_node_id at index 0. This means + /// you can look up the TemporaryNodeRecord for a node by its ID, and + /// that some positions in the vector are empty temporary indexes for + /// nonexistent nodes. vector temp_node_records; + + inline TemporaryChainRecord& get_chain(const temp_record_ref_t& ref) { + if (ref.first != TEMP_CHAIN) { + throw std::invalid_argument("Trying to look up a non-chain as a chain"); + } + return temp_chain_records.at(ref.second); + } + + inline TemporarySnarlRecord& get_snarl(const temp_record_ref_t& ref) { + if (ref.first != TEMP_SNARL) { + throw std::invalid_argument("Trying to look up a non-snarl as a snarl"); + } + return temp_snarl_records.at(ref.second); + } + + inline TemporaryNodeRecord& get_node(const temp_record_ref_t& ref) { + if (ref.first != TEMP_NODE) { + throw std::invalid_argument("Trying to look up a non-node as a node"); + } + // Nodes use a node ID in the ref, not an index. + return temp_node_records.at(ref.second - min_node_id); + } + + // Roots never need to be looked up. + //whether the entire index has any oversized snarls or not bool use_oversized_snarls = false; size_t most_oversized_snarl_size = 0; diff --git a/bdsg/src/snarl_distance_index.cpp b/bdsg/src/snarl_distance_index.cpp index e8f3c14b..d02efb4c 100644 --- a/bdsg/src/snarl_distance_index.cpp +++ b/bdsg/src/snarl_distance_index.cpp @@ -29,21 +29,22 @@ SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryDistanceIndex(){} SnarlDistanceIndex::TemporaryDistanceIndex::~TemporaryDistanceIndex(){} -string SnarlDistanceIndex::TemporaryDistanceIndex::structure_start_end_as_string(pair index) const { +string SnarlDistanceIndex::TemporaryDistanceIndex::structure_start_end_as_string(temp_record_ref_t index) const { if (index.first == TEMP_NODE) { - assert(index.second == temp_node_records[index.second-min_node_id].node_id); - return "node " + std::to_string(temp_node_records[index.second-min_node_id].node_id); + const TemporaryNodeRecord& temp_node_record = get_node(index); + assert(index.second == temp_node_record.node_id); + return "node " + std::to_string(temp_node_record.node_id); } else if (index.first == TEMP_SNARL) { - const TemporarySnarlRecord& temp_snarl_record = temp_snarl_records[index.second]; - return "snarl " + std::to_string( temp_snarl_record.start_node_id) + const TemporarySnarlRecord& temp_snarl_record = get_snarl(index); + return "snarl " + std::to_string(temp_snarl_record.start_node_id) + (temp_snarl_record.start_node_rev ? " rev" : " fd") - + " -> " + std::to_string( temp_snarl_record.end_node_id) + + " -> " + std::to_string(temp_snarl_record.end_node_id) + (temp_snarl_record.end_node_rev ? " rev" : " fd"); } else if (index.first == TEMP_CHAIN) { - const TemporaryChainRecord& temp_chain_record = temp_chain_records[index.second]; - return "chain " + std::to_string( temp_chain_record.start_node_id) + const TemporaryChainRecord& temp_chain_record = get_chain(index); + return "chain " + std::to_string(temp_chain_record.start_node_id) + (temp_chain_record.start_node_rev ? " rev" : " fd") - + " -> " + std::to_string( temp_chain_record.end_node_id) + + " -> " + std::to_string(temp_chain_record.end_node_id) + (temp_chain_record.end_node_rev ? " rev" : " fd"); } else if (index.first == TEMP_ROOT) { return (string) "root"; @@ -61,7 +62,7 @@ size_t SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::get_max size_t last_node_count = 0; // How many nodes have we seen in a row? size_t nontrivial_snarl_count = 0; size_t total_node_count = 0; - for (const pair& child : children) { + for (const temp_record_ref_t& child : children) { if (child.first == TEMP_NODE) { if (total_node_count==0 || child != children.front()) { //If this is the last node in the chain, don't do anything @@ -6219,7 +6220,7 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vector> to new offset - unordered_map>, size_t> record_to_offset; + unordered_map, size_t> record_to_offset; //Set the root index for (size_t temp_index_i = 0 ; temp_index_i < temporary_indexes.size() ; temp_index_i++) { //Any root will point to the same root @@ -6231,10 +6232,10 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vector> temp_record_stack = temp_index->components; + vector temp_record_stack = temp_index->components; while (!temp_record_stack.empty()) { - pair current_record_index = temp_record_stack.back(); + temp_record_ref_t current_record_index = temp_record_stack.back(); temp_record_stack.pop_back(); #ifdef debug_distance_indexing @@ -6245,8 +6246,7 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vectortemp_chain_records[current_record_index.second]; + const TemporaryDistanceIndex::TemporaryChainRecord& temp_chain_record = temp_index->get_chain(current_record_index); if (!temp_chain_record.is_trivial) { //If this chain contains at least two nodes #ifdef debug_distance_indexing @@ -6260,8 +6260,7 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vectortemp_snarl_records[temp_chain_record.parent.second]; + const TemporaryDistanceIndex::TemporarySnarlRecord& temp_parent_record = temp_index->get_snarl(temp_chain_record.parent); if (temp_parent_record.is_root_snarl) { is_child_of_root_snarl = true; } @@ -6322,7 +6321,7 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vector& child_record_index = temp_chain_record.children[child_record_index_i]; + const temp_record_ref_t& child_record_index = temp_chain_record.children[child_record_index_i]; //Go through each node and snarl in the chain and add them to the index #ifdef debug_distance_indexing cerr << " Adding child of the chain: " << temp_index->structure_start_end_as_string(child_record_index) << endl; @@ -6334,9 +6333,7 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vectortemp_node_records[child_record_index.second-min_node_id]; - + const TemporaryDistanceIndex::TemporaryNodeRecord& temp_node_record = temp_index->get_node(child_record_index); //Make a new node record size_t new_offset = chain_record_constructor.add_node( @@ -6372,8 +6369,7 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vectortemp_snarl_records[child_record_index.second]; + const TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record = temp_index->get_snarl(child_record_index); if (!temp_snarl_record.is_trivial && !temp_snarl_record.is_simple) { //If this is an actual snarl that we need to make @@ -6436,7 +6432,7 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vector& child : temp_snarl_record.children) { + for (const temp_record_ref_t& child : temp_snarl_record.children) { temp_record_stack.emplace_back(child); #ifdef debug_distance_indexing cerr << " " << temp_index->structure_start_end_as_string(child) << endl; @@ -6498,24 +6494,22 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vector& child_index = temp_snarl_record.children[i]; + const temp_record_ref_t& child_index = temp_snarl_record.children[i]; if( child_index.first == TEMP_CHAIN) { - if (temp_index->temp_chain_records[child_index.second].children.size() != 1) { + if (temp_index->get_chain(child_index).children.size() != 1) { throw runtime_error("size of children != 1, child index: "+to_string(child_index.second)+", bounding nodes: "+to_string(temp_snarl_record.start_node_id)+" "+to_string(temp_snarl_record.end_node_id)); } - const pair& node_index = temp_index->temp_chain_records[child_index.second].children.front(); - const TemporaryDistanceIndex::TemporaryNodeRecord& temp_node_record = - temp_index->temp_node_records[node_index.second-min_node_id]; + const temp_record_ref_t& node_index = temp_index->get_chain(child_index).children.front(); + const TemporaryDistanceIndex::TemporaryNodeRecord& temp_node_record = temp_index->get_node(node_index); //If there is a way to go from the node forward to the start node, //then it is reversed - size_t rank =temp_index->temp_chain_records[child_index.second].rank_in_parent; + size_t rank = temp_index->get_chain(child_index).rank_in_parent; snarl_record_constructor.add_child(i+2, temp_node_record.node_id, temp_node_record.node_length, temp_node_record.reversed_in_parent); } else { assert(child_index.first == TEMP_NODE); - const TemporaryDistanceIndex::TemporaryNodeRecord& temp_node_record = - temp_index->temp_node_records[child_index.second-min_node_id]; + const TemporaryDistanceIndex::TemporaryNodeRecord& temp_node_record = temp_index->get_node(child_index); size_t rank =temp_node_record.rank_in_parent; snarl_record_constructor.add_child(i+2, temp_node_record.node_id, temp_node_record.node_length, temp_node_record.reversed_in_parent); @@ -6543,8 +6537,7 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vectortemp_node_records[temp_chain_record.children[0].second-min_node_id]; + const TemporaryDistanceIndex::TemporaryNodeRecord& temp_node_record = temp_index->get_node(temp_chain_record.children[0]); bool ignore_distances = (snarl_size_limit == 0) || only_top_level_chain_distances; @@ -6575,7 +6568,7 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vectortemp_snarl_records[current_record_index.second]; + const TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record = temp_index->get_snarl(current_record_index); record_to_offset.emplace(make_pair(temp_index_i,current_record_index), snarl_tree_records->size()); SnarlRecordWriter snarl_record_constructor (temp_snarl_record.node_count, &snarl_tree_records, record_type, temp_snarl_record.hub_labels.size()); @@ -6612,7 +6605,7 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vector& child : temp_snarl_record.children) { + for (const temp_record_ref_t& child : temp_snarl_record.children) { temp_record_stack.emplace_back(child); } @@ -6623,8 +6616,7 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vectorstructure_start_end_as_string(current_record_index) << endl; #endif - const TemporaryDistanceIndex::TemporaryNodeRecord& temp_node_record = - temp_index->temp_node_records[current_record_index.second-min_node_id]; + const TemporaryDistanceIndex::TemporaryNodeRecord& temp_node_record = temp_index->get_node(current_record_index); bool ignore_distances = (snarl_size_limit == 0) || only_top_level_chain_distances; record_t record_type = ignore_distances ? NODE : DISTANCED_NODE; @@ -6652,7 +6644,7 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vectorcomponents.size() ; component_num++){ - const pair& component_index = temp_index->components[component_num]; + const temp_record_ref_t& component_index = temp_index->components[component_num]; //Let the root record know that it has another root root_record.add_component(component_num,record_to_offset[make_pair(temp_index_i,component_index)]); @@ -6709,20 +6701,21 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vectortemp_snarl_records.size() ; temp_snarl_i ++) { + TemporaryDistanceIndex::temp_record_ref_t temp_snarl_ref = make_pair(TEMP_SNARL, temp_snarl_i); //Get the temporary index for this snarl - const TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record = temp_index->temp_snarl_records[temp_snarl_i]; + const TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record = temp_index->get_snarl(temp_snarl_ref); if (!temp_snarl_record.is_trivial && !temp_snarl_record.is_simple) { //And a constructor for the permanent record, which we've already created SnarlRecordWriter snarl_record_constructor (&snarl_tree_records, - record_to_offset[make_pair(temp_index_i, make_pair(TEMP_SNARL, temp_snarl_i))]); + record_to_offset[make_pair(temp_index_i, temp_snarl_ref)]); //Now add the children and tell the record where to find them snarl_record_constructor.set_child_record_pointer(snarl_tree_records->size()); - for (pair child : temp_snarl_record.children) { + for (temp_record_ref_t child : temp_snarl_record.children) { snarl_record_constructor.add_child(record_to_offset[make_pair(temp_index_i, child)]); //Check if the child is a tip, and if so set start/end_tip connectivity of parent snarl if (child.first == TEMP_NODE) { - auto temp_node_record = temp_index->temp_node_records[child.second-min_node_id]; + auto& temp_node_record = temp_index->get_node(child); if (temp_node_record.is_tip) { if (temp_node_record.distance_left_start != std::numeric_limits::max() || temp_node_record.distance_right_start != std::numeric_limits::max()){ @@ -6734,7 +6727,7 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vectortemp_chain_records[child.second]; + auto& temp_chain_record = temp_index->get_chain(child); if (temp_chain_record.is_tip) { if (temp_chain_record.distance_left_start != std::numeric_limits::max() || temp_chain_record.distance_right_start != std::numeric_limits::max()){ From 420c9e9c118d764e3b095ade9cc710f6bba602ed Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Fri, 23 Jan 2026 18:18:12 -0500 Subject: [PATCH 06/75] Use indexing accessors in make_boost_graph() --- bdsg/src/ch.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/bdsg/src/ch.cpp b/bdsg/src/ch.cpp index 6a5681d1..2fd29f78 100644 --- a/bdsg/src/ch.cpp +++ b/bdsg/src/ch.cpp @@ -54,15 +54,15 @@ CHOverlay make_boost_graph(bdsg::HashGraph& hg) { return g; } -CHOverlay make_boost_graph(SnarlDistanceIndex::TemporaryDistanceIndex& temp_index, pair& snarl_index, SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record, vector>& all_children, const HandleGraph* hgraph) { +CHOverlay make_boost_graph(SnarlDistanceIndex::TemporaryDistanceIndex& temp_index, SnarlDistanceIndex::temp_record_ref_t& snarl_index, SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record, vector& all_children, const HandleGraph* hgraph) { CHOverlay ov(all_children.size()*4); //maps edge destination handle to id in Boost graph unordered_map handle_bgnid_map; for (size_t child_num = 0; child_num < all_children.size(); child_num++) { - auto [rec_type, rec_index] = all_children[child_num]; - if (rec_type == bdsg::SnarlDistanceIndex::TEMP_CHAIN) { - auto& record = temp_index.temp_chain_records.at(rec_index); + auto child = all_children[child_num]; + if (child.first == bdsg::SnarlDistanceIndex::TEMP_CHAIN) { + auto& record = temp_index.get_chain(child); handle_t start_handle = hgraph->get_handle(record.start_node_id, record.start_node_rev); handle_t end_handle = hgraph->get_handle(record.end_node_id, record.end_node_rev); //chain representation as node ids (numbers are offsets from child_num*4) @@ -79,7 +79,7 @@ CHOverlay make_boost_graph(SnarlDistanceIndex::TemporaryDistanceIndex& temp_inde //add looping distances (thanks Xian!) auto& first_child = record.children.front(); assert(first_child.first == bdsg::SnarlDistanceIndex::TEMP_NODE); - DIST_UINT start_node_length = temp_index.temp_node_records.at(first_child.second).node_length; + DIST_UINT start_node_length = temp_index.get_node(first_child).node_length; //record.children.front().first.node_length; DIST_UINT start_start_distance = record.forward_loops[0] + (2*start_node_length); DIST_UINT end_end_distance = record.backward_loops.back() + (2*record.end_node_length); @@ -89,8 +89,8 @@ CHOverlay make_boost_graph(SnarlDistanceIndex::TemporaryDistanceIndex& temp_inde new_loop_edge = add_edge(child_num*4+1, child_num*4, ov); ov[new_loop_edge.first].weight = start_start_distance; - } else if (rec_type == bdsg::SnarlDistanceIndex::TEMP_NODE) { - auto& record = temp_index.temp_node_records.at(rec_index); + } else if (child.first == bdsg::SnarlDistanceIndex::TEMP_NODE) { + auto& record = temp_index.get_node(child); handle_t node_handle = hgraph->get_handle(record.node_id, record.reversed_in_parent); ov[child_num*4].seqlen = record.node_length;//hgraph->get_length(node_handle); } else { @@ -101,9 +101,9 @@ CHOverlay make_boost_graph(SnarlDistanceIndex::TemporaryDistanceIndex& temp_inde //add edges between Boost graph nodes of different temp chains / temp nodes for (size_t child_num = 0; child_num < all_children.size(); child_num++) { - auto [rec_type, rec_index] = all_children[child_num]; - if (rec_type == bdsg::SnarlDistanceIndex::TEMP_CHAIN) { - auto& record = temp_index.temp_chain_records.at(rec_index); + auto child = all_children[child_num]; + if (child.first == bdsg::SnarlDistanceIndex::TEMP_CHAIN) { + auto& record = temp_index.get_chain(child); const handle_t start_handle = hgraph->get_handle(record.start_node_id, record.start_node_rev); const handle_t end_handle = hgraph->get_handle(record.end_node_id, record.end_node_rev); auto start_id = handle_bgnid_map[start_handle]; @@ -131,8 +131,8 @@ CHOverlay make_boost_graph(SnarlDistanceIndex::TemporaryDistanceIndex& temp_inde } }); } else { - if (rec_type == bdsg::SnarlDistanceIndex::TEMP_NODE) { - auto& record = temp_index.temp_node_records.at(rec_index); + if (child.first == bdsg::SnarlDistanceIndex::TEMP_NODE) { + auto& record = temp_index.get_node(child); handle_t node_handle = hgraph->get_handle(record.node_id, record.reversed_in_parent); const auto node_id = handle_bgnid_map[node_handle]; for (bool direction: {true, false}) { From 5edfaa08d9632108bfb57b3b1d50393b7bfa12b6 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Fri, 23 Jan 2026 18:21:26 -0500 Subject: [PATCH 07/75] Add const accessors and fix build errors --- bdsg/include/bdsg/snarl_distance_index.hpp | 22 ++++++++++++++++++++++ bdsg/src/snarl_distance_index.cpp | 4 ++-- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/bdsg/include/bdsg/snarl_distance_index.hpp b/bdsg/include/bdsg/snarl_distance_index.hpp index 521b8bd9..637af506 100644 --- a/bdsg/include/bdsg/snarl_distance_index.hpp +++ b/bdsg/include/bdsg/snarl_distance_index.hpp @@ -1720,6 +1720,13 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab return temp_chain_records.at(ref.second); } + inline const TemporaryChainRecord& get_chain(const temp_record_ref_t& ref) const { + if (ref.first != TEMP_CHAIN) { + throw std::invalid_argument("Trying to look up a non-chain as a chain"); + } + return temp_chain_records.at(ref.second); + } + inline TemporarySnarlRecord& get_snarl(const temp_record_ref_t& ref) { if (ref.first != TEMP_SNARL) { throw std::invalid_argument("Trying to look up a non-snarl as a snarl"); @@ -1727,6 +1734,13 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab return temp_snarl_records.at(ref.second); } + inline const TemporarySnarlRecord& get_snarl(const temp_record_ref_t& ref) const { + if (ref.first != TEMP_SNARL) { + throw std::invalid_argument("Trying to look up a non-snarl as a snarl"); + } + return temp_snarl_records.at(ref.second); + } + inline TemporaryNodeRecord& get_node(const temp_record_ref_t& ref) { if (ref.first != TEMP_NODE) { throw std::invalid_argument("Trying to look up a non-node as a node"); @@ -1735,6 +1749,14 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab return temp_node_records.at(ref.second - min_node_id); } + inline const TemporaryNodeRecord& get_node(const temp_record_ref_t& ref) const { + if (ref.first != TEMP_NODE) { + throw std::invalid_argument("Trying to look up a non-node as a node"); + } + // Nodes use a node ID in the ref, not an index. + return temp_node_records.at(ref.second - min_node_id); + } + // Roots never need to be looked up. //whether the entire index has any oversized snarls or not diff --git a/bdsg/src/snarl_distance_index.cpp b/bdsg/src/snarl_distance_index.cpp index d02efb4c..e92759ac 100644 --- a/bdsg/src/snarl_distance_index.cpp +++ b/bdsg/src/snarl_distance_index.cpp @@ -6220,7 +6220,7 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vector> to new offset - unordered_map, size_t> record_to_offset; + unordered_map, size_t> record_to_offset; //Set the root index for (size_t temp_index_i = 0 ; temp_index_i < temporary_indexes.size() ; temp_index_i++) { //Any root will point to the same root @@ -6701,7 +6701,7 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vectortemp_snarl_records.size() ; temp_snarl_i ++) { - TemporaryDistanceIndex::temp_record_ref_t temp_snarl_ref = make_pair(TEMP_SNARL, temp_snarl_i); + temp_record_ref_t temp_snarl_ref = make_pair(TEMP_SNARL, temp_snarl_i); //Get the temporary index for this snarl const TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record = temp_index->get_snarl(temp_snarl_ref); if (!temp_snarl_record.is_trivial && !temp_snarl_record.is_simple) { From b9ba5007d2bd9762f93d0adb32bc5a4540ec3bb4 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Mon, 26 Jan 2026 17:58:36 -0500 Subject: [PATCH 08/75] Write some hub labeling interface docs as I remember it, and add some debugging --- bdsg/include/bdsg/ch.hpp | 60 ++++++++++++++++++++++++++----- bdsg/src/ch.cpp | 9 ----- bdsg/src/snarl_distance_index.cpp | 24 +++++++++++-- 3 files changed, 72 insertions(+), 21 deletions(-) diff --git a/bdsg/include/bdsg/ch.hpp b/bdsg/include/bdsg/ch.hpp index 207a5b83..f738604d 100644 --- a/bdsg/include/bdsg/ch.hpp +++ b/bdsg/include/bdsg/ch.hpp @@ -65,8 +65,15 @@ ItrType get_dist_itr(ItrType start_itr, ItrType hub_itr) { DIST_UINT binary_intersection_ch(vector& v1, vector& v2); /* -start_bound_index variables are relative to start_offset -*/ + * Do binary intersection to find shared labels for two vertices. + * + * start_itr should point to the first slot of the packed label data returned + * by pack_labels(), which is the label count. + * + * start_bound_index variables are relative to start_itr, and give the + * positions of the stored start bounds for the two labels; the stored end + * bounds will be in the slots after. + */ template DIST_UINT binary_intersection_ch(ItrType start_itr, size_t v1_start_bound_index, size_t v2_start_bound_index) { auto v1_start_bound_itr = next(start_itr, v1_start_bound_index); @@ -76,8 +83,14 @@ DIST_UINT binary_intersection_ch(ItrType start_itr, size_t v1_start_bound_index, auto v1_start_itr = next(start_itr, *v1_start_bound_itr); auto v1_end_itr = next(start_itr, *v1_end_bound_itr); + + std::cerr << "Found " << v1_end_itr - v1_start_itr << " labels for vertex 1" << std::endl; + auto v2_start_itr = next(start_itr, *v2_start_bound_itr); - auto v2_end_itr = next(start_itr, *v2_end_bound_itr); + auto v2_end_itr = next(start_itr, *v2_end_bound_itr); + + std::cerr << "Found " << v2_end_itr - v2_start_itr << " labels for vertex 2" << std::endl; + auto v1_range = ranges::subrange(v1_start_itr, v1_end_itr); auto v2_range = ranges::subrange(v2_start_itr, v2_end_itr); @@ -88,33 +101,51 @@ DIST_UINT binary_intersection_ch(ItrType start_itr, size_t v1_start_bound_index, auto search_end_itr = search_vec.end(); DIST_UINT min_dist = INF_INT; for (auto it = key_vec.begin(); it < key_vec.end(); it++) { - //cerr << "loop top" << endl; + cerr << "Performing key query" << endl; auto k = *it; auto k_dist_itr = get_dist_itr(start_itr, it); - //cerr << "dist for k " << k << " is " << *k_dist_itr << ", at: " << distance(start_itr,k_dist_itr) << endl; - //cerr << "searching for " << k << " between " << distance(start_itr,search_start_itr) << " & " << distance(start_itr,search_end_itr) << endl; + cerr << "Distance for k " << k << " is " << *k_dist_itr << ", at: " << distance(start_itr,k_dist_itr) << endl; + cerr << "searching for " << k << " between " << distance(start_itr,search_start_itr) << " & " << distance(start_itr,search_end_itr) << endl; search_start_itr = lower_bound(search_start_itr, search_end_itr, k); if (search_start_itr == search_end_itr) { + std::cerr << "No more search results possible" << std::endl; return min_dist; } if (*search_start_itr == k) { - //cerr << "match found, key: " << *search_start_itr << ", at " << distance(start_itr,search_start_itr) << endl; + cerr << "match found, key: " << *search_start_itr << ", at " << distance(start_itr,search_start_itr) << endl; auto dist_itr = get_dist_itr(start_itr, search_start_itr); DIST_UINT d = *(dist_itr) + *(k_dist_itr); - //cerr << "dist for key is: " << *dist_itr << ", at " << distance(start_itr,dist_itr) << endl; - //cerr << "total dist is: " << d << endl; + cerr << "dist for key is: " << *dist_itr << ", at " << distance(start_itr,dist_itr) << endl; + cerr << "total dist is: " << d << endl; min_dist = min(min_dist, d); } } return min_dist; } +/** + * Query stored hub label data for a minimum distance. + * + * start_itr should point to the first slot of the packed label data returned + * by pack_labels(), which is the label count. + */ template DIST_UINT hhl_query(ItrType start_itr, size_t rank1, size_t rank2) { size_t label_count = *start_itr; + std::cerr << "Making hub label query on " << label_count << " labels" << std::endl; + + // Bounds start after the label count, and at the rank of the first + // vertex past there we find the start bound for the first vertex. auto start_index_1 = 1+rank1; + + std::cerr << "Start bound for forward label for rank " << rank1 << " is at index " << start_index_1 << " past there" << std::endl; + + // And there's a final end value for the first set of labels before we go on + // to the bounds where we would find the start bound for the second vertex. auto start_index_2 = 1+label_count+1+rank2; + + std::cerr << "Start bound for reverse label for rank " << rank2 << " is at index " << start_index_2 << " past there" << std::endl; DIST_UINT dist = binary_intersection_ch(start_itr, start_index_1, start_index_2); @@ -132,6 +163,17 @@ void test_dijk_rev(int node, CHOverlay& ov, vector& node_dists, vecto void create_labels(vector>& labels, vector>& labels_rev, CHOverlay& ov); +/** + * Puts hub labels in a flat vector form + * + * Structure: + * - offsets are relative to start of flat vector + * - extra offset in each of fwd and back offset sets at the end so that end of ranges can be found + * -- subtracting the extra offset by the first offset of its set gets the distance to the corresponding dist of a hub + * + * The layout is: + * label count | start offsets (fwd) | start offsets (back) | fwd label hubs | fwd label dists | back label hubs | back label dists +*/ vector pack_labels(const vector>& labels, const vector>& labels_back); //not necessary stuff diff --git a/bdsg/src/ch.cpp b/bdsg/src/ch.cpp index 2fd29f78..d4ebe4fa 100644 --- a/bdsg/src/ch.cpp +++ b/bdsg/src/ch.cpp @@ -752,15 +752,6 @@ void create_labels(vector>& labels, vector>& } } -/* -Puts hub labels in a flat vector form - -Structure: -- offsets are relative to start of flat vector -- extra offset in each of fwd and back offset sets at the end so that end of ranges can be found --- subtracting the extra offset by the first offset of its set gets the distance to the corresponding dist of a hub -label count | start offsets (fwd) | start offsets (back) | fwd label hubs | fwd label dists | back label hubs | back label dists -*/ vector pack_labels(const vector>& labels, const vector>& labels_back) { auto label_count = labels.size(); //label_count+1 is so we can look at the next offset to determine end of range diff --git a/bdsg/src/snarl_distance_index.cpp b/bdsg/src/snarl_distance_index.cpp index e92759ac..4947ca02 100644 --- a/bdsg/src/snarl_distance_index.cpp +++ b/bdsg/src/snarl_distance_index.cpp @@ -1,6 +1,6 @@ #define debug_distance_indexing //#define debug_snarl_traversal -//#define debug_distances +#define debug_distances //#define debug_distance_paths #include "bdsg/snarl_distance_index.hpp" @@ -1318,8 +1318,26 @@ size_t SnarlDistanceIndex::distance_in_parent(const net_handle_t& parent, if (get_record_type(snarl_tree_records->at(get_record_offset(parent))) == DISTANCED_SIMPLE_SNARL) { return SimpleSnarlRecord(parent, &snarl_tree_records).get_distance(rank1, rev1, rank2, rev2); - } else if (get_record_type(snarl_tree_records->at(get_record_offset(parent))) == OVERSIZED_SNARL) { - size_t distance = hhl_query(snarl_tree_records->begin() + get_record_offset(parent) + SNARL_RECORD_SIZE + 1, rank1, rank2); + } else if (get_record_type(snarl_tree_records->at(get_record_offset(parent))) == OVERSIZED_SNARL) { +#ifdef debug_distances + cerr << " Performing HHL query" << endl; +#endif + // We need to point at the hub labeling data, which lives after the fixed-size snarl record header and the length value + // This points to the whole record, including the fixed-size header + auto record_it = snarl_tree_records->begin() + get_record_offset(parent); + // This points to the length and the variable-sized data + auto length_data_it = record_it + SNARL_RECORD_SIZE; + std::cerr << " Hub label data length: " << *length_data_it << endl; + std::cerr << " Hub label data: "; + for (size_t i = 0; i < *length_data_it; i++) { + // Dump the hub label data as retrieved + if (i > 0) { + std::cerr << " | "; + } + std::cerr << *(length_data_it + 1 + i); + } + std::cerr << std::endl; + size_t distance = hhl_query(length_data_it + 1, rank1, rank2); return distance; } else if (rank1 == 0 && rank2 == 0 && !snarl_is_root) { From 23e67454e9bbaf525a04851fbc42f791acb36b32 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Tue, 27 Jan 2026 13:50:06 -0500 Subject: [PATCH 09/75] Add synthetic debug logging and synthetic analysis results to make_boost_graph --- bdsg/include/bdsg/snarl_distance_index.hpp | 24 +++- bdsg/src/ch.cpp | 152 ++++++++++++++++++--- bdsg/src/snarl_distance_index.cpp | 72 ++++++---- 3 files changed, 202 insertions(+), 46 deletions(-) diff --git a/bdsg/include/bdsg/snarl_distance_index.hpp b/bdsg/include/bdsg/snarl_distance_index.hpp index 637af506..99ac281b 100644 --- a/bdsg/include/bdsg/snarl_distance_index.hpp +++ b/bdsg/include/bdsg/snarl_distance_index.hpp @@ -1304,10 +1304,30 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab SnarlRecordWriter (size_t node_count, bdsg::yomo::UniqueMappedPointer* records, record_t type, size_t vec_size); SnarlRecordWriter(bdsg::yomo::UniqueMappedPointer* records, size_t pointer); - //sets size of hub label flat vector (only used for oversized snarls) - //TODO: Make separate SnarlRecordWriter for oversized snarls + /** + * Set size of hub labels flat vector (hub_labels). + * Only used for oversized snarls. + * + * May only be called once. + * + * TODO: Make separate SnarlRecordWriter for oversized snarls? + * + * Putting vec_size in the SNARL_RECORD_SIZE'th slot due to it being the first one after the header + */ void set_vec_size(size_t vec_size); + + /** + * Set an entry in the vector holding the hub label data. + * Only used for oversized snarls. + * + * set_vec_size() must be called first. + */ + void set_vec_entry(size_t index, size_t value); + /** + * Set a distance matrix entry. + * Not used for oversized snarls. + */ void set_distance(size_t rank1, bool right_side1, size_t rank2, bool right_side2, size_t distance); void set_distance_start_start(size_t value); diff --git a/bdsg/src/ch.cpp b/bdsg/src/ch.cpp index d4ebe4fa..ec19aecd 100644 --- a/bdsg/src/ch.cpp +++ b/bdsg/src/ch.cpp @@ -55,95 +55,203 @@ CHOverlay make_boost_graph(bdsg::HashGraph& hg) { } CHOverlay make_boost_graph(SnarlDistanceIndex::TemporaryDistanceIndex& temp_index, SnarlDistanceIndex::temp_record_ref_t& snarl_index, SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record, vector& all_children, const HandleGraph* hgraph) { + // Boost graph numbering principle (hypothesis): + // - Each child gets 4 Boost vertices at child_num*4, child_num*4+1, child_num*4+2, child_num*4+3 + // - For chains: 0=start_fwd, 1=start_rev, 2=end_fwd, 3=end_rev (where rev_bgid flips lowest bit) + // - For nodes: Unclear - only child_num*4 seems to be used? + // - rev_bgid(n) = n ^ 1 swaps between forward/reverse of same endpoint + + cerr << "=== make_boost_graph: Building net graph ===" << endl; + cerr << "Number of children: " << all_children.size() << endl; + cerr << "Allocating " << all_children.size()*4 << " Boost vertices (4 per child)" << endl; + CHOverlay ov(all_children.size()*4); //maps edge destination handle to id in Boost graph unordered_map handle_bgnid_map; - + + cerr << "--- Phase 1: Creating Boost vertices for each net graph child ---" << endl; for (size_t child_num = 0; child_num < all_children.size(); child_num++) { auto child = all_children[child_num]; if (child.first == bdsg::SnarlDistanceIndex::TEMP_CHAIN) { auto& record = temp_index.get_chain(child); handle_t start_handle = hgraph->get_handle(record.start_node_id, record.start_node_rev); - handle_t end_handle = hgraph->get_handle(record.end_node_id, record.end_node_rev); + handle_t end_handle = hgraph->get_handle(record.end_node_id, record.end_node_rev); + + cerr << "Child " << child_num << " is CHAIN: start_node=" << record.start_node_id + << " (rev=" << record.start_node_rev << "), end_node=" << record.end_node_id + << " (rev=" << record.end_node_rev << "), min_length=" << record.min_length << endl; + cerr << " start_handle: id=" << hgraph->get_id(start_handle) << " rev=" << hgraph->get_is_reverse(start_handle) << endl; + cerr << " end_handle: id=" << hgraph->get_id(end_handle) << " rev=" << hgraph->get_is_reverse(end_handle) << endl; + cerr << " Boost vertices: " << child_num*4 << " (start_fwd), " << child_num*4+1 << " (start_rev), " + << child_num*4+2 << " (end_fwd), " << child_num*4+3 << " (end_rev)" << endl; + //chain representation as node ids (numbers are offsets from child_num*4) // 1<-3 - // 0->2 + // 0->2 handle_bgnid_map[start_handle] = child_num*4; handle_bgnid_map[end_handle] = child_num*4+2; + + cerr << " Mapping start_handle -> Boost " << child_num*4 << endl; + cerr << " Mapping end_handle -> Boost " << child_num*4+2 << endl; + // TODO: Should we also map the REVERSE of start_handle and end_handle? + // Currently only forward orientations are mapped. If follow_edges returns + // a reversed handle, it won't be found in handle_bgnid_map. + //add edges representing distance across chain - auto new_edge = add_edge(child_num*4, child_num*4+2, ov); + auto new_edge = add_edge(child_num*4, child_num*4+2, ov); + ov[new_edge.first].weight = record.min_length; + cerr << " Edge " << child_num*4 << " -> " << child_num*4+2 << " (fwd traversal, weight=" << record.min_length << ")" << endl; + + new_edge = add_edge(child_num*4+3, child_num*4+1, ov); ov[new_edge.first].weight = record.min_length; - new_edge = add_edge(child_num*4+3, child_num*4+1, ov); - ov[new_edge.first].weight = record.min_length; + cerr << " Edge " << child_num*4+3 << " -> " << child_num*4+1 << " (rev traversal, weight=" << record.min_length << ")" << endl; //add looping distances (thanks Xian!) - auto& first_child = record.children.front(); - assert(first_child.first == bdsg::SnarlDistanceIndex::TEMP_NODE); + auto& first_child = record.children.front(); + assert(first_child.first == bdsg::SnarlDistanceIndex::TEMP_NODE); DIST_UINT start_node_length = temp_index.get_node(first_child).node_length; //record.children.front().first.node_length; DIST_UINT start_start_distance = record.forward_loops[0] + (2*start_node_length); DIST_UINT end_end_distance = record.backward_loops.back() + (2*record.end_node_length); + cerr << " Loop distances: start_start=" << start_start_distance << ", end_end=" << end_end_distance << endl; + //loops are edges between different orientations of the same node auto new_loop_edge = add_edge(child_num*4+2, child_num*4+3, ov); ov[new_loop_edge.first].weight = end_end_distance; + cerr << " Edge " << child_num*4+2 << " -> " << child_num*4+3 << " (end loop, weight=" << end_end_distance << ")" << endl; + new_loop_edge = add_edge(child_num*4+1, child_num*4, ov); - ov[new_loop_edge.first].weight = start_start_distance; - + ov[new_loop_edge.first].weight = start_start_distance; + cerr << " Edge " << child_num*4+1 << " -> " << child_num*4 << " (start loop, weight=" << start_start_distance << ")" << endl; + } else if (child.first == bdsg::SnarlDistanceIndex::TEMP_NODE) { auto& record = temp_index.get_node(child); handle_t node_handle = hgraph->get_handle(record.node_id, record.reversed_in_parent); + + cerr << "Child " << child_num << " is NODE: node_id=" << record.node_id + << " (reversed_in_parent=" << record.reversed_in_parent << "), length=" << record.node_length << endl; + cerr << " node_handle: id=" << hgraph->get_id(node_handle) << " rev=" << hgraph->get_is_reverse(node_handle) << endl; + cerr << " Boost vertices allocated: " << child_num*4 << "-" << child_num*4+3 << endl; + cerr << " Only setting seqlen on Boost vertex " << child_num*4 << endl; + ov[child_num*4].seqlen = record.node_length;//hgraph->get_length(node_handle); + + // TODO: Node is NOT added to handle_bgnid_map here! This seems like a bug. + // The edge-adding phase below tries to look up handles in the map, but nodes + // won't be found. Should we add: + // handle_bgnid_map[node_handle] = child_num*4; + // handle_bgnid_map[hgraph->flip(node_handle)] = child_num*4+1; // for reverse? + // Also: no edges are created for nodes (like the loop edges for chains). + // Should there be a "through the node" edge from one side to the other? + + cerr << " WARNING: node_handle NOT added to handle_bgnid_map!" << endl; } else { //uh oh cerr << "unexpected rec_type" << endl; } } + cerr << "--- Phase 2: Adding edges between children based on handle graph edges ---" << endl; + cerr << "Handle map contents:" << endl; + for (const auto& [h, bg_id] : handle_bgnid_map) { + cerr << " handle(id=" << hgraph->get_id(h) << ", rev=" << hgraph->get_is_reverse(h) << ") -> Boost " << bg_id << endl; + } + //add edges between Boost graph nodes of different temp chains / temp nodes for (size_t child_num = 0; child_num < all_children.size(); child_num++) { - auto child = all_children[child_num]; + auto child = all_children[child_num]; if (child.first == bdsg::SnarlDistanceIndex::TEMP_CHAIN) { - auto& record = temp_index.get_chain(child); + auto& record = temp_index.get_chain(child); const handle_t start_handle = hgraph->get_handle(record.start_node_id, record.start_node_rev); - const handle_t end_handle = hgraph->get_handle(record.end_node_id, record.end_node_rev); + const handle_t end_handle = hgraph->get_handle(record.end_node_id, record.end_node_rev); auto start_id = handle_bgnid_map[start_handle]; auto end_id = handle_bgnid_map[end_handle]; - //traverse edges going out of start and end nodes of the chain (thanks Xian!) - hgraph->follow_edges(start_handle, false, [&] (const handle_t& next) { - if (!handle_bgnid_map.contains(next)) { return; } + + cerr << "Child " << child_num << " (CHAIN): Finding edges from start_handle and end_handle" << endl; + cerr << " start_handle(id=" << hgraph->get_id(start_handle) << ", rev=" << hgraph->get_is_reverse(start_handle) + << ") -> Boost " << start_id << endl; + cerr << " end_handle(id=" << hgraph->get_id(end_handle) << ", rev=" << hgraph->get_is_reverse(end_handle) + << ") -> Boost " << end_id << endl; + + //traverse edges going out of start and end nodes of the chain (thanks Xian!) + cerr << " Following edges from start_handle (go_left=false):" << endl; + hgraph->follow_edges(start_handle, false, [&] (const handle_t& next) { + cerr << " Found edge to next(id=" << hgraph->get_id(next) << ", rev=" << hgraph->get_is_reverse(next) << ")" << endl; + if (!handle_bgnid_map.contains(next)) { + cerr << " NOT in handle_bgnid_map - skipping" << endl; + // TODO: This could be a problem. If the next handle is the reverse of something + // in the map, we won't find it. Should we check for flip(next) too? + return; + } const auto next_id = handle_bgnid_map[next]; + cerr << " Maps to Boost " << next_id << endl; //pair of edge_descriptor and bool of it exists or not auto edge_info = edge(start_id, next_id, ov); if (!edge_info.second) { + cerr << " Adding edge " << start_id << " -> " << next_id << endl; + cerr << " Adding reverse edge " << rev_bgid(next_id) << " -> " << rev_bgid(start_id) << endl; add_edge(start_id, next_id, ov); add_edge(rev_bgid(next_id), rev_bgid(start_id), ov); + } else { + cerr << " Edge already exists" << endl; } }); + cerr << " Following edges from end_handle (go_left=false):" << endl; hgraph->follow_edges(end_handle, false, [&] (const handle_t& next) { - if (!handle_bgnid_map.contains(next)) { return; } + cerr << " Found edge to next(id=" << hgraph->get_id(next) << ", rev=" << hgraph->get_is_reverse(next) << ")" << endl; + if (!handle_bgnid_map.contains(next)) { + cerr << " NOT in handle_bgnid_map - skipping" << endl; + return; + } const auto next_id = handle_bgnid_map[next]; + cerr << " Maps to Boost " << next_id << endl; //pair of edge_descriptor and bool of it exists or not auto edge_info = edge(end_id, next_id, ov); if (!edge_info.second) { + cerr << " Adding edge " << end_id << " -> " << next_id << endl; + cerr << " Adding reverse edge " << rev_bgid(next_id) << " -> " << rev_bgid(end_id) << endl; add_edge(end_id, next_id, ov); add_edge(rev_bgid(next_id), rev_bgid(end_id), ov); + } else { + cerr << " Edge already exists" << endl; } - }); + }); } else { - if (child.first == bdsg::SnarlDistanceIndex::TEMP_NODE) { + if (child.first == bdsg::SnarlDistanceIndex::TEMP_NODE) { auto& record = temp_index.get_node(child); - handle_t node_handle = hgraph->get_handle(record.node_id, record.reversed_in_parent); + handle_t node_handle = hgraph->get_handle(record.node_id, record.reversed_in_parent); + + cerr << "Child " << child_num << " (NODE): Finding edges from node_handle" << endl; + cerr << " node_handle(id=" << hgraph->get_id(node_handle) << ", rev=" << hgraph->get_is_reverse(node_handle) << ")" << endl; + + // TODO: This lookup will FAIL because nodes were never added to handle_bgnid_map! + // The map lookup below will return 0 (default) which is wrong. + if (!handle_bgnid_map.contains(node_handle)) { + cerr << " ERROR: node_handle NOT in handle_bgnid_map! Lookup will return garbage." << endl; + } const auto node_id = handle_bgnid_map[node_handle]; + cerr << " Looked up Boost ID: " << node_id << " (expected: " << child_num*4 << ")" << endl; + for (bool direction: {true, false}) { + cerr << " Following edges (go_left=" << (direction ? "true" : "false") << "):" << endl; hgraph->follow_edges(node_handle, direction, [&] (const handle_t& next) { - if (!handle_bgnid_map.contains(next)) { return; } + cerr << " Found edge to next(id=" << hgraph->get_id(next) << ", rev=" << hgraph->get_is_reverse(next) << ")" << endl; + if (!handle_bgnid_map.contains(next)) { + cerr << " NOT in handle_bgnid_map - skipping" << endl; + return; + } const auto next_id = handle_bgnid_map[next]; + cerr << " Maps to Boost " << next_id << endl; //pair of edge_descriptor and bool of it exists or not auto edge_info = edge(node_id, next_id, ov); if (!edge_info.second) { + cerr << " Adding edge " << node_id << " -> " << next_id << endl; + cerr << " Adding reverse edge " << rev_bgid(next_id) << " -> " << rev_bgid(node_id) << endl; add_edge(node_id, next_id, ov); add_edge(rev_bgid(next_id), rev_bgid(node_id), ov); + } else { + cerr << " Edge already exists" << endl; } }); } @@ -151,6 +259,8 @@ CHOverlay make_boost_graph(SnarlDistanceIndex::TemporaryDistanceIndex& temp_inde } } } + + cerr << "=== make_boost_graph complete ===" << endl; return ov; } diff --git a/bdsg/src/snarl_distance_index.cpp b/bdsg/src/snarl_distance_index.cpp index 4947ca02..bbf141d0 100644 --- a/bdsg/src/snarl_distance_index.cpp +++ b/bdsg/src/snarl_distance_index.cpp @@ -4203,8 +4203,15 @@ size_t SnarlDistanceIndex::SnarlRecord::distance_vector_size(record_t type, size size_t SnarlDistanceIndex::SnarlRecord::record_size (record_t type, size_t node_count, size_t vec_size) { if (type == OVERSIZED_SNARL) { - return SNARL_RECORD_SIZE + vec_size; + // Oversized snarls need the fixed-size header, the slot for the length + // of the packed hub label vector, and the packed hub label vector + // itself. + + // TODO: Can we stop storing the packed hub label vector length? Do we + // ever use it??? + return SNARL_RECORD_SIZE + 1 + vec_size; } else { + // Normal snarl records need the fixed-size header and the distance matrix return SNARL_RECORD_SIZE + distance_vector_size(type, node_count); } } @@ -4403,10 +4410,6 @@ void SnarlDistanceIndex::SnarlRecordWriter::set_node_count(size_t node_count) { (*records)->at(record_offset + SNARL_NODE_COUNT_OFFSET) = node_count; } -/* -set size of hub labels vector (hub_labels) -putting vec_size in the SNARL_RECORD_SIZE slot due to it being the first one after the header -*/ void SnarlDistanceIndex::SnarlRecordWriter::set_vec_size(size_t vec_size) { #ifdef debug_distance_indexing cerr << record_offset + SNARL_RECORD_SIZE << " set vec_size " << vec_size << endl; @@ -4415,8 +4418,19 @@ void SnarlDistanceIndex::SnarlRecordWriter::set_vec_size(size_t vec_size) { #endif (*records)->at(record_offset + SNARL_RECORD_SIZE) = vec_size; +} + +void SnarlDistanceIndex::SnarlRecordWriter::set_vec_entry(size_t index, size_t value) { +#ifdef debug_distance_indexing + cerr << record_offset + SNARL_RECORD_SIZE + 1 + index << " set vec entry " << value << endl; + assert(index < (*records)->at(record_offset + SNARL_RECORD_SIZE)); + assert((*records)->at(record_offset + SNARL_RECORD_SIZE + 1 + index) == 0); +#endif + // The hub label data sits right after its size, after the end of the fixed-size header. + (*records)->at(record_offset + SNARL_RECORD_SIZE + 1 + index) = value; } + size_t SnarlDistanceIndex::SnarlRecord::get_child_record_pointer() const { return (*records)->at(record_offset+SNARL_CHILD_RECORD_OFFSET) ; } @@ -6413,26 +6427,38 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vector node_rank1 = it.first.first; - pair node_rank2 = it.first.second; - const size_t distance = it.second; - - if (!ignore_distances) { - //If we are keeping track of distances - //If the distance exceeded the limit, then it wasn't found in the first place - snarl_record_constructor.set_distance(node_rank1.first, node_rank1.second, - node_rank2.first, node_rank2.second, distance); - - if (temp_snarl_record.tippy_child_ranks.count(node_rank1.first) - && temp_snarl_record.tippy_child_ranks.count(node_rank2.first)) { - snarl_record_constructor.set_tip_tip_connected(); - } + if (record_type == OVERSIZED_SNARL) { + // We need to copy the packed hub label vector into place. + for (size_t i = 0; i < temp_snarl_record.hub_labels.size(); i++) { + // TODO: Make this an std::copy or something. + snarl_record_constructor.set_vec_entry(i, temp_snarl_record.hub_labels.at(i)); + } + // TODO: When should we call + // snarl_record_constructor.set_tip_tip_connected()? + // Add code to determine that somewhere! + } else { + // Store individual distance entries. + for (const auto& it : temp_snarl_record.distances) { + pair node_rank1 = it.first.first; + pair node_rank2 = it.first.second; + const size_t distance = it.second; + + if (!ignore_distances) { + //If we are keeping track of distances + //If the distance exceeded the limit, then it wasn't found in the first place + snarl_record_constructor.set_distance(node_rank1.first, node_rank1.second, + node_rank2.first, node_rank2.second, distance); + + if (temp_snarl_record.tippy_child_ranks.count(node_rank1.first) + && temp_snarl_record.tippy_child_ranks.count(node_rank2.first)) { + snarl_record_constructor.set_tip_tip_connected(); + } #ifdef debug_distance_indexing - assert(distance <= temp_snarl_record.max_distance); - assert(snarl_record_constructor.get_distance(node_rank1.first, node_rank1.second, - node_rank2.first, node_rank2.second) == distance); + assert(distance <= temp_snarl_record.max_distance); + assert(snarl_record_constructor.get_distance(node_rank1.first, node_rank1.second, + node_rank2.first, node_rank2.second) == distance); #endif + } } } //Now set the connectivity of this snarl From ad438827fc9706d93f9778353ff76f5a1752721e Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Tue, 27 Jan 2026 14:32:54 -0500 Subject: [PATCH 10/75] Synthesize code to allocate vertices and avoid wasting them, and to hopefully query off the right orientations --- bdsg/src/ch.cpp | 358 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 240 insertions(+), 118 deletions(-) diff --git a/bdsg/src/ch.cpp b/bdsg/src/ch.cpp index ec19aecd..6e0a2168 100644 --- a/bdsg/src/ch.cpp +++ b/bdsg/src/ch.cpp @@ -1,8 +1,10 @@ /* -file for quickly playing around with stuff +file for quickly playing around with stuff */ #include "bdsg/ch.hpp" +#define debug_boost_graph + namespace bdsg { bdsg::HashGraph make_test() { bdsg::HashGraph g; @@ -55,21 +57,47 @@ CHOverlay make_boost_graph(bdsg::HashGraph& hg) { } CHOverlay make_boost_graph(SnarlDistanceIndex::TemporaryDistanceIndex& temp_index, SnarlDistanceIndex::temp_record_ref_t& snarl_index, SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record, vector& all_children, const HandleGraph* hgraph) { - // Boost graph numbering principle (hypothesis): - // - Each child gets 4 Boost vertices at child_num*4, child_num*4+1, child_num*4+2, child_num*4+3 - // - For chains: 0=start_fwd, 1=start_rev, 2=end_fwd, 3=end_rev (where rev_bgid flips lowest bit) - // - For nodes: Unclear - only child_num*4 seems to be used? - // - rev_bgid(n) = n ^ 1 swaps between forward/reverse of same endpoint + // Boost graph vertex allocation: + // - Chains get 4 vertices: start_fwd, start_rev, end_fwd, end_rev + // - Nodes get 2 vertices: fwd, rev (like in the HashGraph overload) + // - rev_bgid(n) = n ^ 1 swaps between forward/reverse orientations + // + // For chains, the diagram is: + // 1<-3 (reverse traversal: end_rev -> start_rev) + // 0->2 (forward traversal: start_fwd -> end_fwd) + // With loop edges: 2->3 (end loop), 1->0 (start loop) + // + // For nodes, we follow the HashGraph pattern: + // base = fwd orientation, base+1 = rev orientation + // No through-edge (sequence length is on seqlen property) + + // First pass: count how many vertices we need + size_t total_vertices = 0; + for (const auto& child : all_children) { + if (child.first == bdsg::SnarlDistanceIndex::TEMP_CHAIN) { + total_vertices += 4; + } else if (child.first == bdsg::SnarlDistanceIndex::TEMP_NODE) { + total_vertices += 2; + } + } +#ifdef debug_boost_graph cerr << "=== make_boost_graph: Building net graph ===" << endl; cerr << "Number of children: " << all_children.size() << endl; - cerr << "Allocating " << all_children.size()*4 << " Boost vertices (4 per child)" << endl; + cerr << "Allocating " << total_vertices << " Boost vertices (4 per chain, 2 per node)" << endl; +#endif - CHOverlay ov(all_children.size()*4); - //maps edge destination handle to id in Boost graph + CHOverlay ov(total_vertices); + // Maps handle to Boost graph vertex ID unordered_map handle_bgnid_map; + // Track current vertex offset as we iterate (chains get 4, nodes get 2) + size_t vertex_offset = 0; + +#ifdef debug_boost_graph cerr << "--- Phase 1: Creating Boost vertices for each net graph child ---" << endl; +#endif + for (size_t child_num = 0; child_num < all_children.size(); child_num++) { auto child = all_children[child_num]; if (child.first == bdsg::SnarlDistanceIndex::TEMP_CHAIN) { @@ -77,190 +105,284 @@ CHOverlay make_boost_graph(SnarlDistanceIndex::TemporaryDistanceIndex& temp_inde handle_t start_handle = hgraph->get_handle(record.start_node_id, record.start_node_rev); handle_t end_handle = hgraph->get_handle(record.end_node_id, record.end_node_rev); +#ifdef debug_boost_graph cerr << "Child " << child_num << " is CHAIN: start_node=" << record.start_node_id << " (rev=" << record.start_node_rev << "), end_node=" << record.end_node_id << " (rev=" << record.end_node_rev << "), min_length=" << record.min_length << endl; cerr << " start_handle: id=" << hgraph->get_id(start_handle) << " rev=" << hgraph->get_is_reverse(start_handle) << endl; cerr << " end_handle: id=" << hgraph->get_id(end_handle) << " rev=" << hgraph->get_is_reverse(end_handle) << endl; - cerr << " Boost vertices: " << child_num*4 << " (start_fwd), " << child_num*4+1 << " (start_rev), " - << child_num*4+2 << " (end_fwd), " << child_num*4+3 << " (end_rev)" << endl; - - //chain representation as node ids (numbers are offsets from child_num*4) - // 1<-3 - // 0->2 - handle_bgnid_map[start_handle] = child_num*4; - handle_bgnid_map[end_handle] = child_num*4+2; - - cerr << " Mapping start_handle -> Boost " << child_num*4 << endl; - cerr << " Mapping end_handle -> Boost " << child_num*4+2 << endl; - // TODO: Should we also map the REVERSE of start_handle and end_handle? - // Currently only forward orientations are mapped. If follow_edges returns - // a reversed handle, it won't be found in handle_bgnid_map. - - //add edges representing distance across chain - auto new_edge = add_edge(child_num*4, child_num*4+2, ov); + cerr << " Boost vertices: " << vertex_offset << " (start_fwd), " << vertex_offset+1 << " (start_rev), " + << vertex_offset+2 << " (end_fwd), " << vertex_offset+3 << " (end_rev)" << endl; +#endif + + // Chain vertex layout (offsets from vertex_offset): + // Both start_handle and end_handle point left-to-right along the chain. + // At start (left): left-to-right = inward. At end (right): left-to-right = outward. + // 0 = start_handle (inward-facing, receives edges from outside) + // 1 = flip(start_handle) (outward-facing, sends edges to outside) + // 2 = end_handle (outward-facing, sends edges to outside) + // 3 = flip(end_handle) (inward-facing, receives edges from outside) + + // Map both orientations of start and end handles + handle_bgnid_map[start_handle] = vertex_offset; // start inward + handle_bgnid_map[hgraph->flip(start_handle)] = vertex_offset + 1; // start outward + handle_bgnid_map[end_handle] = vertex_offset + 2; // end outward + handle_bgnid_map[hgraph->flip(end_handle)] = vertex_offset + 3; // end inward + +#ifdef debug_boost_graph + cerr << " Mapping start_handle (inward) -> Boost " << vertex_offset << endl; + cerr << " Mapping flip(start_handle) (outward) -> Boost " << vertex_offset + 1 << endl; + cerr << " Mapping end_handle (outward) -> Boost " << vertex_offset + 2 << endl; + cerr << " Mapping flip(end_handle) (inward) -> Boost " << vertex_offset + 3 << endl; +#endif + + // Add edges representing distance across chain + auto new_edge = add_edge(vertex_offset, vertex_offset + 2, ov); ov[new_edge.first].weight = record.min_length; - cerr << " Edge " << child_num*4 << " -> " << child_num*4+2 << " (fwd traversal, weight=" << record.min_length << ")" << endl; +#ifdef debug_boost_graph + cerr << " Edge " << vertex_offset << " -> " << vertex_offset + 2 << " (fwd traversal, weight=" << record.min_length << ")" << endl; +#endif - new_edge = add_edge(child_num*4+3, child_num*4+1, ov); + new_edge = add_edge(vertex_offset + 3, vertex_offset + 1, ov); ov[new_edge.first].weight = record.min_length; - cerr << " Edge " << child_num*4+3 << " -> " << child_num*4+1 << " (rev traversal, weight=" << record.min_length << ")" << endl; +#ifdef debug_boost_graph + cerr << " Edge " << vertex_offset + 3 << " -> " << vertex_offset + 1 << " (rev traversal, weight=" << record.min_length << ")" << endl; +#endif - //add looping distances (thanks Xian!) + // Add looping distances (thanks Xian!) auto& first_child = record.children.front(); assert(first_child.first == bdsg::SnarlDistanceIndex::TEMP_NODE); DIST_UINT start_node_length = temp_index.get_node(first_child).node_length; - //record.children.front().first.node_length; - DIST_UINT start_start_distance = record.forward_loops[0] + (2*start_node_length); - DIST_UINT end_end_distance = record.backward_loops.back() + (2*record.end_node_length); + DIST_UINT start_start_distance = record.forward_loops[0] + (2 * start_node_length); + DIST_UINT end_end_distance = record.backward_loops.back() + (2 * record.end_node_length); + +#ifdef debug_boost_graph cerr << " Loop distances: start_start=" << start_start_distance << ", end_end=" << end_end_distance << endl; +#endif - //loops are edges between different orientations of the same node - auto new_loop_edge = add_edge(child_num*4+2, child_num*4+3, ov); + // Loops are edges between different orientations of the same endpoint + auto new_loop_edge = add_edge(vertex_offset + 2, vertex_offset + 3, ov); ov[new_loop_edge.first].weight = end_end_distance; - cerr << " Edge " << child_num*4+2 << " -> " << child_num*4+3 << " (end loop, weight=" << end_end_distance << ")" << endl; +#ifdef debug_boost_graph + cerr << " Edge " << vertex_offset + 2 << " -> " << vertex_offset + 3 << " (end loop, weight=" << end_end_distance << ")" << endl; +#endif - new_loop_edge = add_edge(child_num*4+1, child_num*4, ov); + new_loop_edge = add_edge(vertex_offset + 1, vertex_offset, ov); ov[new_loop_edge.first].weight = start_start_distance; - cerr << " Edge " << child_num*4+1 << " -> " << child_num*4 << " (start loop, weight=" << start_start_distance << ")" << endl; +#ifdef debug_boost_graph + cerr << " Edge " << vertex_offset + 1 << " -> " << vertex_offset << " (start loop, weight=" << start_start_distance << ")" << endl; +#endif + + vertex_offset += 4; } else if (child.first == bdsg::SnarlDistanceIndex::TEMP_NODE) { auto& record = temp_index.get_node(child); handle_t node_handle = hgraph->get_handle(record.node_id, record.reversed_in_parent); +#ifdef debug_boost_graph cerr << "Child " << child_num << " is NODE: node_id=" << record.node_id << " (reversed_in_parent=" << record.reversed_in_parent << "), length=" << record.node_length << endl; cerr << " node_handle: id=" << hgraph->get_id(node_handle) << " rev=" << hgraph->get_is_reverse(node_handle) << endl; - cerr << " Boost vertices allocated: " << child_num*4 << "-" << child_num*4+3 << endl; - cerr << " Only setting seqlen on Boost vertex " << child_num*4 << endl; + cerr << " Boost vertices: " << vertex_offset << " (fwd), " << vertex_offset + 1 << " (rev)" << endl; +#endif + + // Node vertex layout (like HashGraph overload): + // base = forward orientation + // base+1 = reverse orientation + // Both get seqlen set, no through-edge between them + + // Map both orientations + handle_bgnid_map[node_handle] = vertex_offset; + handle_bgnid_map[hgraph->flip(node_handle)] = vertex_offset + 1; + +#ifdef debug_boost_graph + cerr << " Mapping node_handle -> Boost " << vertex_offset << endl; + cerr << " Mapping flip(node_handle) -> Boost " << vertex_offset + 1 << endl; +#endif + + // Set seqlen on both orientations (like HashGraph overload) + ov[vertex_offset].seqlen = record.node_length; + ov[vertex_offset + 1].seqlen = record.node_length; - ov[child_num*4].seqlen = record.node_length;//hgraph->get_length(node_handle); +#ifdef debug_boost_graph + cerr << " Setting seqlen=" << record.node_length << " on both Boost vertices " << vertex_offset << " and " << vertex_offset + 1 << endl; +#endif - // TODO: Node is NOT added to handle_bgnid_map here! This seems like a bug. - // The edge-adding phase below tries to look up handles in the map, but nodes - // won't be found. Should we add: - // handle_bgnid_map[node_handle] = child_num*4; - // handle_bgnid_map[hgraph->flip(node_handle)] = child_num*4+1; // for reverse? - // Also: no edges are created for nodes (like the loop edges for chains). - // Should there be a "through the node" edge from one side to the other? + vertex_offset += 2; - cerr << " WARNING: node_handle NOT added to handle_bgnid_map!" << endl; } else { - //uh oh cerr << "unexpected rec_type" << endl; } } +#ifdef debug_boost_graph cerr << "--- Phase 2: Adding edges between children based on handle graph edges ---" << endl; cerr << "Handle map contents:" << endl; for (const auto& [h, bg_id] : handle_bgnid_map) { cerr << " handle(id=" << hgraph->get_id(h) << ", rev=" << hgraph->get_is_reverse(h) << ") -> Boost " << bg_id << endl; } +#endif - //add edges between Boost graph nodes of different temp chains / temp nodes + // Reset vertex_offset for second pass + vertex_offset = 0; + + // Add edges between Boost graph nodes of different temp chains / temp nodes for (size_t child_num = 0; child_num < all_children.size(); child_num++) { auto child = all_children[child_num]; if (child.first == bdsg::SnarlDistanceIndex::TEMP_CHAIN) { auto& record = temp_index.get_chain(child); - const handle_t start_handle = hgraph->get_handle(record.start_node_id, record.start_node_rev); - const handle_t end_handle = hgraph->get_handle(record.end_node_id, record.end_node_rev); - auto start_id = handle_bgnid_map[start_handle]; - auto end_id = handle_bgnid_map[end_handle]; - - cerr << "Child " << child_num << " (CHAIN): Finding edges from start_handle and end_handle" << endl; - cerr << " start_handle(id=" << hgraph->get_id(start_handle) << ", rev=" << hgraph->get_is_reverse(start_handle) - << ") -> Boost " << start_id << endl; - cerr << " end_handle(id=" << hgraph->get_id(end_handle) << ", rev=" << hgraph->get_is_reverse(end_handle) - << ") -> Boost " << end_id << endl; - - //traverse edges going out of start and end nodes of the chain (thanks Xian!) - cerr << " Following edges from start_handle (go_left=false):" << endl; - hgraph->follow_edges(start_handle, false, [&] (const handle_t& next) { + // Both handles point left-to-right along the chain: + // - At start (left side): left-to-right = INWARD (pointing into chain) + // - At end (right side): left-to-right = OUTWARD (pointing out of chain) + const handle_t start_handle_inward = hgraph->get_handle(record.start_node_id, record.start_node_rev); + const handle_t start_handle_outward = hgraph->flip(start_handle_inward); + const handle_t end_handle_outward = hgraph->get_handle(record.end_node_id, record.end_node_rev); + const handle_t end_handle_inward = hgraph->flip(end_handle_outward); + + // Outward-facing vertices are used as edge sources when connecting to other children + auto start_id_outward = handle_bgnid_map[start_handle_outward]; // vertex_offset + 1 + auto end_id_outward = handle_bgnid_map[end_handle_outward]; // vertex_offset + 2 + +#ifdef debug_boost_graph + cerr << "Child " << child_num << " (CHAIN): Finding edges from chain endpoints" << endl; + cerr << " start_handle_outward(id=" << hgraph->get_id(start_handle_outward) << ", rev=" << hgraph->get_is_reverse(start_handle_outward) + << ") -> Boost " << start_id_outward << endl; + cerr << " end_handle_outward(id=" << hgraph->get_id(end_handle_outward) << ", rev=" << hgraph->get_is_reverse(end_handle_outward) + << ") -> Boost " << end_id_outward << endl; +#endif + + // For start: look LEFT (go_left=true) to find edges going outside the chain + // start_handle_inward points into the chain, so looking left from it goes outside +#ifdef debug_boost_graph + cerr << " Following edges from start_handle_inward (go_left=true to look outside chain):" << endl; +#endif + hgraph->follow_edges(start_handle_inward, true, [&] (const handle_t& next) { +#ifdef debug_boost_graph cerr << " Found edge to next(id=" << hgraph->get_id(next) << ", rev=" << hgraph->get_is_reverse(next) << ")" << endl; +#endif if (!handle_bgnid_map.contains(next)) { +#ifdef debug_boost_graph cerr << " NOT in handle_bgnid_map - skipping" << endl; - // TODO: This could be a problem. If the next handle is the reverse of something - // in the map, we won't find it. Should we check for flip(next) too? +#endif return; } const auto next_id = handle_bgnid_map[next]; +#ifdef debug_boost_graph cerr << " Maps to Boost " << next_id << endl; - //pair of edge_descriptor and bool of it exists or not - auto edge_info = edge(start_id, next_id, ov); +#endif + // Edge from our outward vertex to the destination + auto edge_info = edge(start_id_outward, next_id, ov); if (!edge_info.second) { - cerr << " Adding edge " << start_id << " -> " << next_id << endl; - cerr << " Adding reverse edge " << rev_bgid(next_id) << " -> " << rev_bgid(start_id) << endl; - add_edge(start_id, next_id, ov); - add_edge(rev_bgid(next_id), rev_bgid(start_id), ov); +#ifdef debug_boost_graph + cerr << " Adding edge " << start_id_outward << " -> " << next_id << endl; + cerr << " Adding reverse edge " << rev_bgid(next_id) << " -> " << rev_bgid(start_id_outward) << endl; +#endif + add_edge(start_id_outward, next_id, ov); + add_edge(rev_bgid(next_id), rev_bgid(start_id_outward), ov); } else { +#ifdef debug_boost_graph cerr << " Edge already exists" << endl; +#endif } }); - cerr << " Following edges from end_handle (go_left=false):" << endl; - hgraph->follow_edges(end_handle, false, [&] (const handle_t& next) { + // For end: look RIGHT (go_left=false) to find edges going outside the chain + // end_handle_outward already points out of the chain, so looking right from it goes outside +#ifdef debug_boost_graph + cerr << " Following edges from end_handle_outward (go_left=false to look outside chain):" << endl; +#endif + hgraph->follow_edges(end_handle_outward, false, [&] (const handle_t& next) { +#ifdef debug_boost_graph cerr << " Found edge to next(id=" << hgraph->get_id(next) << ", rev=" << hgraph->get_is_reverse(next) << ")" << endl; +#endif if (!handle_bgnid_map.contains(next)) { +#ifdef debug_boost_graph cerr << " NOT in handle_bgnid_map - skipping" << endl; +#endif return; } const auto next_id = handle_bgnid_map[next]; +#ifdef debug_boost_graph cerr << " Maps to Boost " << next_id << endl; - //pair of edge_descriptor and bool of it exists or not - auto edge_info = edge(end_id, next_id, ov); +#endif + auto edge_info = edge(end_id_outward, next_id, ov); if (!edge_info.second) { - cerr << " Adding edge " << end_id << " -> " << next_id << endl; - cerr << " Adding reverse edge " << rev_bgid(next_id) << " -> " << rev_bgid(end_id) << endl; - add_edge(end_id, next_id, ov); - add_edge(rev_bgid(next_id), rev_bgid(end_id), ov); +#ifdef debug_boost_graph + cerr << " Adding edge " << end_id_outward << " -> " << next_id << endl; + cerr << " Adding reverse edge " << rev_bgid(next_id) << " -> " << rev_bgid(end_id_outward) << endl; +#endif + add_edge(end_id_outward, next_id, ov); + add_edge(rev_bgid(next_id), rev_bgid(end_id_outward), ov); } else { +#ifdef debug_boost_graph cerr << " Edge already exists" << endl; +#endif } }); - } else { - if (child.first == bdsg::SnarlDistanceIndex::TEMP_NODE) { - auto& record = temp_index.get_node(child); - handle_t node_handle = hgraph->get_handle(record.node_id, record.reversed_in_parent); - - cerr << "Child " << child_num << " (NODE): Finding edges from node_handle" << endl; - cerr << " node_handle(id=" << hgraph->get_id(node_handle) << ", rev=" << hgraph->get_is_reverse(node_handle) << ")" << endl; - // TODO: This lookup will FAIL because nodes were never added to handle_bgnid_map! - // The map lookup below will return 0 (default) which is wrong. - if (!handle_bgnid_map.contains(node_handle)) { - cerr << " ERROR: node_handle NOT in handle_bgnid_map! Lookup will return garbage." << endl; - } - const auto node_id = handle_bgnid_map[node_handle]; - cerr << " Looked up Boost ID: " << node_id << " (expected: " << child_num*4 << ")" << endl; - - for (bool direction: {true, false}) { - cerr << " Following edges (go_left=" << (direction ? "true" : "false") << "):" << endl; - hgraph->follow_edges(node_handle, direction, [&] (const handle_t& next) { - cerr << " Found edge to next(id=" << hgraph->get_id(next) << ", rev=" << hgraph->get_is_reverse(next) << ")" << endl; - if (!handle_bgnid_map.contains(next)) { - cerr << " NOT in handle_bgnid_map - skipping" << endl; - return; - } - const auto next_id = handle_bgnid_map[next]; - cerr << " Maps to Boost " << next_id << endl; - //pair of edge_descriptor and bool of it exists or not - auto edge_info = edge(node_id, next_id, ov); - if (!edge_info.second) { - cerr << " Adding edge " << node_id << " -> " << next_id << endl; - cerr << " Adding reverse edge " << rev_bgid(next_id) << " -> " << rev_bgid(node_id) << endl; - add_edge(node_id, next_id, ov); - add_edge(rev_bgid(next_id), rev_bgid(node_id), ov); - } else { - cerr << " Edge already exists" << endl; - } - }); - } + vertex_offset += 4; + } else if (child.first == bdsg::SnarlDistanceIndex::TEMP_NODE) { + auto& record = temp_index.get_node(child); + handle_t node_handle = hgraph->get_handle(record.node_id, record.reversed_in_parent); + const auto node_id_fwd = handle_bgnid_map[node_handle]; + const auto node_id_rev = handle_bgnid_map[hgraph->flip(node_handle)]; + +#ifdef debug_boost_graph + cerr << "Child " << child_num << " (NODE): Finding edges from node" << endl; + cerr << " node_handle(id=" << hgraph->get_id(node_handle) << ", rev=" << hgraph->get_is_reverse(node_handle) + << ") -> Boost " << node_id_fwd << " (fwd), " << node_id_rev << " (rev)" << endl; +#endif + + // For nodes, we look both directions from each handle + // go_left=false from node_handle finds edges leaving the right side + // go_left=true from node_handle finds edges leaving the left side + for (bool go_left : {false, true}) { + // Determine which Boost vertex is the source based on direction + // If go_left=false (looking right), edges leave from the forward vertex + // If go_left=true (looking left), edges leave from the reverse vertex + auto source_id = go_left ? node_id_rev : node_id_fwd; + +#ifdef debug_boost_graph + cerr << " Following edges from node_handle (go_left=" << (go_left ? "true" : "false") << "), source Boost=" << source_id << ":" << endl; +#endif + hgraph->follow_edges(node_handle, go_left, [&] (const handle_t& next) { +#ifdef debug_boost_graph + cerr << " Found edge to next(id=" << hgraph->get_id(next) << ", rev=" << hgraph->get_is_reverse(next) << ")" << endl; +#endif + if (!handle_bgnid_map.contains(next)) { +#ifdef debug_boost_graph + cerr << " NOT in handle_bgnid_map - skipping" << endl; +#endif + return; + } + const auto next_id = handle_bgnid_map[next]; +#ifdef debug_boost_graph + cerr << " Maps to Boost " << next_id << endl; +#endif + auto edge_info = edge(source_id, next_id, ov); + if (!edge_info.second) { +#ifdef debug_boost_graph + cerr << " Adding edge " << source_id << " -> " << next_id << endl; + cerr << " Adding reverse edge " << rev_bgid(next_id) << " -> " << rev_bgid(source_id) << endl; +#endif + add_edge(source_id, next_id, ov); + add_edge(rev_bgid(next_id), rev_bgid(source_id), ov); + } else { +#ifdef debug_boost_graph + cerr << " Edge already exists" << endl; +#endif + } + }); } + + vertex_offset += 2; } } +#ifdef debug_boost_graph cerr << "=== make_boost_graph complete ===" << endl; +#endif return ov; } From 0e6aa47e59f482664da4b83628cdc5f7f2659c03 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Tue, 27 Jan 2026 16:07:13 -0500 Subject: [PATCH 11/75] Redesign to only think of indexing inward-facing handles for converting the net graph --- bdsg/src/ch.cpp | 192 ++++++++++-------------------------------------- 1 file changed, 37 insertions(+), 155 deletions(-) diff --git a/bdsg/src/ch.cpp b/bdsg/src/ch.cpp index 6e0a2168..691c2a83 100644 --- a/bdsg/src/ch.cpp +++ b/bdsg/src/ch.cpp @@ -69,7 +69,6 @@ CHOverlay make_boost_graph(SnarlDistanceIndex::TemporaryDistanceIndex& temp_inde // // For nodes, we follow the HashGraph pattern: // base = fwd orientation, base+1 = rev orientation - // No through-edge (sequence length is on seqlen property) // First pass: count how many vertices we need size_t total_vertices = 0; @@ -88,7 +87,8 @@ CHOverlay make_boost_graph(SnarlDistanceIndex::TemporaryDistanceIndex& temp_inde #endif CHOverlay ov(total_vertices); - // Maps handle to Boost graph vertex ID + // Maps inward-facing handle to Boost graph vertex ID. + // Doesn't include outward-facing handles. unordered_map handle_bgnid_map; // Track current vertex offset as we iterate (chains get 4, nodes get 2) @@ -122,17 +122,15 @@ CHOverlay make_boost_graph(SnarlDistanceIndex::TemporaryDistanceIndex& temp_inde // 1 = flip(start_handle) (outward-facing, sends edges to outside) // 2 = end_handle (outward-facing, sends edges to outside) // 3 = flip(end_handle) (inward-facing, receives edges from outside) + // + // But only the inward-facing versions can be arrived at, so only they need to be indexed directly. - // Map both orientations of start and end handles + // Map inward orientations of start and end handles handle_bgnid_map[start_handle] = vertex_offset; // start inward - handle_bgnid_map[hgraph->flip(start_handle)] = vertex_offset + 1; // start outward - handle_bgnid_map[end_handle] = vertex_offset + 2; // end outward handle_bgnid_map[hgraph->flip(end_handle)] = vertex_offset + 3; // end inward #ifdef debug_boost_graph cerr << " Mapping start_handle (inward) -> Boost " << vertex_offset << endl; - cerr << " Mapping flip(start_handle) (outward) -> Boost " << vertex_offset + 1 << endl; - cerr << " Mapping end_handle (outward) -> Boost " << vertex_offset + 2 << endl; cerr << " Mapping flip(end_handle) (inward) -> Boost " << vertex_offset + 3 << endl; #endif @@ -156,6 +154,10 @@ CHOverlay make_boost_graph(SnarlDistanceIndex::TemporaryDistanceIndex& temp_inde DIST_UINT start_start_distance = record.forward_loops[0] + (2 * start_node_length); DIST_UINT end_end_distance = record.backward_loops.back() + (2 * record.end_node_length); + // TODO: Shouldn't we not make the loop edges if the loops don't exist + // or are unreachable distance sentinels or whatever? Are forward_loops + // and backward_loops always nonempty? + #ifdef debug_boost_graph cerr << " Loop distances: start_start=" << start_start_distance << ", end_end=" << end_end_distance << endl; #endif @@ -189,9 +191,10 @@ CHOverlay make_boost_graph(SnarlDistanceIndex::TemporaryDistanceIndex& temp_inde // Node vertex layout (like HashGraph overload): // base = forward orientation // base+1 = reverse orientation - // Both get seqlen set, no through-edge between them + // Both get seqlen set - // Map both orientations + // Map both orientations; both orientations of a node count as + // "inward-facing" since both can be arrived at. handle_bgnid_map[node_handle] = vertex_offset; handle_bgnid_map[hgraph->flip(node_handle)] = vertex_offset + 1; @@ -223,163 +226,42 @@ CHOverlay make_boost_graph(SnarlDistanceIndex::TemporaryDistanceIndex& temp_inde } #endif - // Reset vertex_offset for second pass - vertex_offset = 0; - - // Add edges between Boost graph nodes of different temp chains / temp nodes - for (size_t child_num = 0; child_num < all_children.size(); child_num++) { - auto child = all_children[child_num]; - if (child.first == bdsg::SnarlDistanceIndex::TEMP_CHAIN) { - auto& record = temp_index.get_chain(child); - // Both handles point left-to-right along the chain: - // - At start (left side): left-to-right = INWARD (pointing into chain) - // - At end (right side): left-to-right = OUTWARD (pointing out of chain) - const handle_t start_handle_inward = hgraph->get_handle(record.start_node_id, record.start_node_rev); - const handle_t start_handle_outward = hgraph->flip(start_handle_inward); - const handle_t end_handle_outward = hgraph->get_handle(record.end_node_id, record.end_node_rev); - const handle_t end_handle_inward = hgraph->flip(end_handle_outward); - - // Outward-facing vertices are used as edge sources when connecting to other children - auto start_id_outward = handle_bgnid_map[start_handle_outward]; // vertex_offset + 1 - auto end_id_outward = handle_bgnid_map[end_handle_outward]; // vertex_offset + 2 - -#ifdef debug_boost_graph - cerr << "Child " << child_num << " (CHAIN): Finding edges from chain endpoints" << endl; - cerr << " start_handle_outward(id=" << hgraph->get_id(start_handle_outward) << ", rev=" << hgraph->get_is_reverse(start_handle_outward) - << ") -> Boost " << start_id_outward << endl; - cerr << " end_handle_outward(id=" << hgraph->get_id(end_handle_outward) << ", rev=" << hgraph->get_is_reverse(end_handle_outward) - << ") -> Boost " << end_id_outward << endl; -#endif - - // For start: look LEFT (go_left=true) to find edges going outside the chain - // start_handle_inward points into the chain, so looking left from it goes outside -#ifdef debug_boost_graph - cerr << " Following edges from start_handle_inward (go_left=true to look outside chain):" << endl; -#endif - hgraph->follow_edges(start_handle_inward, true, [&] (const handle_t& next) { -#ifdef debug_boost_graph - cerr << " Found edge to next(id=" << hgraph->get_id(next) << ", rev=" << hgraph->get_is_reverse(next) << ")" << endl; -#endif - if (!handle_bgnid_map.contains(next)) { -#ifdef debug_boost_graph - cerr << " NOT in handle_bgnid_map - skipping" << endl; -#endif - return; - } - const auto next_id = handle_bgnid_map[next]; -#ifdef debug_boost_graph - cerr << " Maps to Boost " << next_id << endl; -#endif - // Edge from our outward vertex to the destination - auto edge_info = edge(start_id_outward, next_id, ov); - if (!edge_info.second) { -#ifdef debug_boost_graph - cerr << " Adding edge " << start_id_outward << " -> " << next_id << endl; - cerr << " Adding reverse edge " << rev_bgid(next_id) << " -> " << rev_bgid(start_id_outward) << endl; -#endif - add_edge(start_id_outward, next_id, ov); - add_edge(rev_bgid(next_id), rev_bgid(start_id_outward), ov); - } else { -#ifdef debug_boost_graph - cerr << " Edge already exists" << endl; -#endif + for (auto [handle_in, vertex_id_in] : handle_bgnid_map) { + // The map contains inward-facing orientations of every handle. + // So get the outward-facing versioin. + + handle_t handle = hgraph->flip(handle_in); + NODE_UINT vertex_id = rev_bgid(vertex_id_in); + + // We need to get all the edges off the right side of this outward-facing + // handle and create them if they don't already exist. + hgraph->follow_edges(handle, false, [&] (const handle_t& next) { + auto found = handle_bgnid_map.find(next); + if (found == handle_bgnid_map.end()) { + // We're looking outside our net graph, or have reached something + // not inward-facing (like across the inside of a chain). + // Don't add the edge. + return; } - }); + NODE_UINT next_id = found->second; - // For end: look RIGHT (go_left=false) to find edges going outside the chain - // end_handle_outward already points out of the chain, so looking right from it goes outside -#ifdef debug_boost_graph - cerr << " Following edges from end_handle_outward (go_left=false to look outside chain):" << endl; -#endif - hgraph->follow_edges(end_handle_outward, false, [&] (const handle_t& next) { -#ifdef debug_boost_graph - cerr << " Found edge to next(id=" << hgraph->get_id(next) << ", rev=" << hgraph->get_is_reverse(next) << ")" << endl; -#endif - if (!handle_bgnid_map.contains(next)) { -#ifdef debug_boost_graph - cerr << " NOT in handle_bgnid_map - skipping" << endl; -#endif - return; - } - const auto next_id = handle_bgnid_map[next]; -#ifdef debug_boost_graph - cerr << " Maps to Boost " << next_id << endl; -#endif - auto edge_info = edge(end_id_outward, next_id, ov); + auto edge_info = edge(vertex_id, next_id, ov); if (!edge_info.second) { #ifdef debug_boost_graph - cerr << " Adding edge " << end_id_outward << " -> " << next_id << endl; - cerr << " Adding reverse edge " << rev_bgid(next_id) << " -> " << rev_bgid(end_id_outward) << endl; + cerr << " Adding edge " << vertex_id << " -> " << next_id << endl; + cerr << " Adding reverse edge " << rev_bgid(next_id) << " -> " << rev_bgid(vertex_id) << endl; #endif - add_edge(end_id_outward, next_id, ov); - add_edge(rev_bgid(next_id), rev_bgid(end_id_outward), ov); + add_edge(vertex_id, next_id, ov); + add_edge(rev_bgid(next_id), rev_bgid(vertex_id), ov); } else { #ifdef debug_boost_graph - cerr << " Edge already exists" << endl; + cerr << " Edge already exists" << endl; #endif } - }); - - vertex_offset += 4; - - } else if (child.first == bdsg::SnarlDistanceIndex::TEMP_NODE) { - auto& record = temp_index.get_node(child); - handle_t node_handle = hgraph->get_handle(record.node_id, record.reversed_in_parent); - const auto node_id_fwd = handle_bgnid_map[node_handle]; - const auto node_id_rev = handle_bgnid_map[hgraph->flip(node_handle)]; - -#ifdef debug_boost_graph - cerr << "Child " << child_num << " (NODE): Finding edges from node" << endl; - cerr << " node_handle(id=" << hgraph->get_id(node_handle) << ", rev=" << hgraph->get_is_reverse(node_handle) - << ") -> Boost " << node_id_fwd << " (fwd), " << node_id_rev << " (rev)" << endl; -#endif - - // For nodes, we look both directions from each handle - // go_left=false from node_handle finds edges leaving the right side - // go_left=true from node_handle finds edges leaving the left side - for (bool go_left : {false, true}) { - // Determine which Boost vertex is the source based on direction - // If go_left=false (looking right), edges leave from the forward vertex - // If go_left=true (looking left), edges leave from the reverse vertex - auto source_id = go_left ? node_id_rev : node_id_fwd; - -#ifdef debug_boost_graph - cerr << " Following edges from node_handle (go_left=" << (go_left ? "true" : "false") << "), source Boost=" << source_id << ":" << endl; -#endif - hgraph->follow_edges(node_handle, go_left, [&] (const handle_t& next) { -#ifdef debug_boost_graph - cerr << " Found edge to next(id=" << hgraph->get_id(next) << ", rev=" << hgraph->get_is_reverse(next) << ")" << endl; -#endif - if (!handle_bgnid_map.contains(next)) { -#ifdef debug_boost_graph - cerr << " NOT in handle_bgnid_map - skipping" << endl; -#endif - return; - } - const auto next_id = handle_bgnid_map[next]; -#ifdef debug_boost_graph - cerr << " Maps to Boost " << next_id << endl; -#endif - auto edge_info = edge(source_id, next_id, ov); - if (!edge_info.second) { -#ifdef debug_boost_graph - cerr << " Adding edge " << source_id << " -> " << next_id << endl; - cerr << " Adding reverse edge " << rev_bgid(next_id) << " -> " << rev_bgid(source_id) << endl; -#endif - add_edge(source_id, next_id, ov); - add_edge(rev_bgid(next_id), rev_bgid(source_id), ov); - } else { -#ifdef debug_boost_graph - cerr << " Edge already exists" << endl; -#endif - } - }); - } - - vertex_offset += 2; - } + }); } + #ifdef debug_boost_graph cerr << "=== make_boost_graph complete ===" << endl; #endif From 3619491f9dfad76286495964e3f1242684c92288 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Tue, 27 Jan 2026 16:41:11 -0500 Subject: [PATCH 12/75] Teach CMake about Boost dependency --- CMakeLists.txt | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 45998cbc..8104de01 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -143,6 +143,8 @@ endif() # Find other system dependencies pkg_check_modules(Jansson REQUIRED IMPORTED_TARGET jansson) +find_package(Boost REQUIRED) + # Find our bdsg package directory where input sources and dependencies are set(bdsg_DIR "${CMAKE_CURRENT_SOURCE_DIR}/bdsg") @@ -325,7 +327,8 @@ set(bdsg_TARGET_DEPS bbhash sparsepp mio::mio - PkgConfig::Jansson) + PkgConfig::Jansson + Boost::boost) set(bdsg_LIBS ${bdsg_TARGET_DEPS} @@ -339,7 +342,7 @@ target_include_directories(bdsg_objs PUBLIC ${bdsg_INCLUDES}) set_target_properties(bdsg_objs PROPERTIES POSITION_INDEPENDENT_CODE TRUE) if (CMAKE_MAJOR_VERSION EQUAL "3" AND (CMAKE_MINOR_VERSION EQUAL "10" OR CMAKE_MINOR_VERSION EQUAL "11")) - # Before CMake 3.12 we can't ise target_link_libraries on an object library to convey the need to use depencies' include directories + # Before CMake 3.12 we can't use target_link_libraries on an object library to convey the need to use depencies' include directories get_target_property(sdsl_INCLUDE sdsl INTERFACE_INCLUDE_DIRECTORIES) target_include_directories(bdsg_objs PUBLIC ${sdsl_INCLUDE}) get_target_property(hopscotch_map_INCLUDE tsl::hopscotch_map INTERFACE_INCLUDE_DIRECTORIES) From 7e3325737954844eae49c8fa382617abcf0ef1f9 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Tue, 27 Jan 2026 16:52:38 -0500 Subject: [PATCH 13/75] Stop putting negative-unsigned entries in the hub labels --- bdsg/src/ch.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/bdsg/src/ch.cpp b/bdsg/src/ch.cpp index 691c2a83..6fdb7ad5 100644 --- a/bdsg/src/ch.cpp +++ b/bdsg/src/ch.cpp @@ -644,7 +644,13 @@ ItrType get_dist_itr(ItrType start_itr, ItrType hub_itr) { void down_dijk(int node, CHOverlay& ov, vector& node_dists, vector>& labels, vector>& labels_back) { auto in_node = node; - labels_back[node].emplace_back(ov[node].new_id, -ov[node].seqlen); + // TODO: We used to add -ov[node].seqlen to labels_back[node] for the hub + // ov[node].new_id. But this involved doing unsigned overflow shenanigans, + // and gave us values in the labels that are maximally wide and can't later + // be packed into the reduced bit width in a SnarlDistanceIndex. + // + // The tests didn't seem to cover a case where these entries were needed, so + // we just don't do that anymore. std::priority_queue, vector>, greater>> q; auto [_, __] = out_edges(in_node, ov); From 9a88f8f42432aea6e416f08d57c877cc8e5f967c Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Tue, 27 Jan 2026 17:01:50 -0500 Subject: [PATCH 14/75] Define what to do for asking about a node to itself, and test it --- bdsg/include/bdsg/ch.hpp | 10 ++++++++++ bdsg/src/test_libbdsg.cpp | 15 +++++++++++---- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/bdsg/include/bdsg/ch.hpp b/bdsg/include/bdsg/ch.hpp index f738604d..a5f116f5 100644 --- a/bdsg/include/bdsg/ch.hpp +++ b/bdsg/include/bdsg/ch.hpp @@ -128,6 +128,16 @@ DIST_UINT binary_intersection_ch(ItrType start_itr, size_t v1_start_bound_index, * * start_itr should point to the first slot of the packed label data returned * by pack_labels(), which is the label count. + * + * The rank space covers both orientations of each node. + * + * Returns the minimum distance from the end of the node orientation at rank1 + * to the start of the node orientation at rank2. (If working in a net graph in + * a SnarlDistanceIndex, these "nodes" may really be child chains.) + * + * If rank1 == rank2, returns the minimum distance around that cycle, if any. + * + * If there is no known path between the given nodes, returns INF_INT. */ template DIST_UINT hhl_query(ItrType start_itr, size_t rank1, size_t rank2) { diff --git a/bdsg/src/test_libbdsg.cpp b/bdsg/src/test_libbdsg.cpp index 704ff5ac..7961a247 100644 --- a/bdsg/src/test_libbdsg.cpp +++ b/bdsg/src/test_libbdsg.cpp @@ -4991,6 +4991,7 @@ void test_hash_graph() { void test_hub_labeling() { { + // Simple stick graph of 3 nodes HashGraph test_g; vector handles; handles.resize(3); for (auto n: {0,1,2}) { @@ -5034,14 +5035,20 @@ void test_hub_labeling() { cerr << sz << " "; } cerr << endl; */ - + + // 0th forward to 1st forward: no intervening bases assert(hhl_query(packed_labels.begin(), 0, 2) == 0); - //TODO: what to do when node equals itself? - //assert(hhl_query(packed_labels.begin(), 0, 0) == INF_INT); + // When asking about the same node twice, we look for self loops. + // Here there aren't any. + assert(hhl_query(packed_labels.begin(), 0, 0) == INF_INT); + + // 2nd reverse to 1st reverse: 1 intervening base assert(hhl_query(packed_labels.begin(), 5, 1) == 1); - assert(hhl_query(packed_labels.begin(), 1, 2) == INF_INT); + // 0th reverse to 1st forward: no connection + assert(hhl_query(packed_labels.begin(), 1, 2) == INF_INT); + //TODO: check that error occurs when nodeside out of range is given } { From ac07468d06bf63d00d56f29db6e31a20449adf5e Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Tue, 27 Jan 2026 17:20:57 -0500 Subject: [PATCH 15/75] Qualify std::move calls to appease compiler --- bdsg/include/bdsg/landmark.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bdsg/include/bdsg/landmark.hpp b/bdsg/include/bdsg/landmark.hpp index 06bf7c0c..b2c0052f 100644 --- a/bdsg/include/bdsg/landmark.hpp +++ b/bdsg/include/bdsg/landmark.hpp @@ -49,7 +49,7 @@ class SdslArray2D { arr2d.resize(row_count); for (DIST_UINT i = 0; i < row_count; i++) { sdsl::int_vector sdsl_row(col_count, Inf_UInt, DIST_NBITS); - arr2d[i] = move(sdsl_row); + arr2d[i] = std::move(sdsl_row); } } @@ -67,7 +67,7 @@ class SdslArray2D { sdsl_row[j] = static_cast(entry)+offset; } } - arr2d[i] = move(sdsl_row); + arr2d[i] = std::move(sdsl_row); } } From 3b84fb02a6be64da259a59f3928dd73a4c61473e Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Tue, 27 Jan 2026 17:21:37 -0500 Subject: [PATCH 16/75] Const-ify arguments to label building --- bdsg/include/bdsg/ch.hpp | 6 +++--- bdsg/src/ch.cpp | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/bdsg/include/bdsg/ch.hpp b/bdsg/include/bdsg/ch.hpp index a5f116f5..7534aabd 100644 --- a/bdsg/include/bdsg/ch.hpp +++ b/bdsg/include/bdsg/ch.hpp @@ -11,7 +11,7 @@ file for quickly playing around with stuff #include namespace bdsg { -NODE_UINT bgid(const handle_t& h, bdsg::HashGraph& hg); +NODE_UINT bgid(const handle_t& h, const bdsg::HashGraph& hg); NODE_UINT rev_bgid(NODE_UINT n); @@ -35,8 +35,8 @@ typedef struct EdgeProp { typedef boost::adjacency_list CHOverlay; typedef boost::filtered_graph> ContractedGraph; -CHOverlay make_boost_graph(bdsg::HashGraph& hg); -CHOverlay make_boost_graph(bdsg::SnarlDistanceIndex::TemporaryDistanceIndex& temp_index, pair& snarl_index, SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record, vector>& all_children, const HandleGraph* graph); +CHOverlay make_boost_graph(const bdsg::HashGraph& hg); +CHOverlay make_boost_graph(const bdsg::SnarlDistanceIndex::TemporaryDistanceIndex& temp_index, const pair& snarl_index, const SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record, vector>& all_children, const HandleGraph* graph); int edge_diff(ContractedGraph::vertex_descriptor nid, ContractedGraph& ch, CHOverlay& ov, vector& node_dists, int hop_limit); diff --git a/bdsg/src/ch.cpp b/bdsg/src/ch.cpp index 6fdb7ad5..93db0e40 100644 --- a/bdsg/src/ch.cpp +++ b/bdsg/src/ch.cpp @@ -25,7 +25,7 @@ bdsg::HashGraph make_test() { return g; } -NODE_UINT bgid(const handle_t& h, bdsg::HashGraph& hg) { +NODE_UINT bgid(const handle_t& h, const bdsg::HashGraph& hg) { auto nid = hg.get_id(h); return hg.get_is_reverse(h) ? (nid-1)*2+1 : (nid-1)*2; } @@ -34,7 +34,7 @@ NODE_UINT rev_bgid(NODE_UINT n) { return n ^ 1; } -CHOverlay make_boost_graph(bdsg::HashGraph& hg) { +CHOverlay make_boost_graph(const bdsg::HashGraph& hg) { NODE_UINT node_count = hg.get_node_count(); CHOverlay g(node_count*2); hg.for_each_edge([&](const edge_t& edge_h) { @@ -56,7 +56,7 @@ CHOverlay make_boost_graph(bdsg::HashGraph& hg) { return g; } -CHOverlay make_boost_graph(SnarlDistanceIndex::TemporaryDistanceIndex& temp_index, SnarlDistanceIndex::temp_record_ref_t& snarl_index, SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record, vector& all_children, const HandleGraph* hgraph) { +CHOverlay make_boost_graph(const SnarlDistanceIndex::TemporaryDistanceIndex& temp_index, const SnarlDistanceIndex::temp_record_ref_t& snarl_index, const SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record, const vector& all_children, const HandleGraph* hgraph) { // Boost graph vertex allocation: // - Chains get 4 vertices: start_fwd, start_rev, end_fwd, end_rev // - Nodes get 2 vertices: fwd, rev (like in the HashGraph overload) From 227e4d19ddefb1ea9aaeea31163f667fee4b51db Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Tue, 27 Jan 2026 17:22:30 -0500 Subject: [PATCH 17/75] De-comment the existing hub labeling test cases into distinct test cases that all can run --- bdsg/src/test_libbdsg.cpp | 115 +++++++++++++++++++++----------------- 1 file changed, 63 insertions(+), 52 deletions(-) diff --git a/bdsg/src/test_libbdsg.cpp b/bdsg/src/test_libbdsg.cpp index 7961a247..95f3c7ea 100644 --- a/bdsg/src/test_libbdsg.cpp +++ b/bdsg/src/test_libbdsg.cpp @@ -4989,31 +4989,25 @@ void test_hash_graph() { cerr << "HashGraph tests successful!" << endl; } -void test_hub_labeling() { - { - // Simple stick graph of 3 nodes - HashGraph test_g; - vector handles; handles.resize(3); - for (auto n: {0,1,2}) { - handles[n] = test_g.create_handle("A"); - } - test_g.create_edge(handles[0], handles[1]); - test_g.create_edge(handles[1], handles[2]); - +void test_hub_labeling() { + + // To make the tests easier to write we have a widget that does the full dance + // to build a packed label vector. + auto get_packed_labels = [](const HashGraph& test_g) { //test HashGraph -> Boost graph CHOverlay bg = make_boost_graph(test_g); - // + auto [edges_start, edges_end] = boost::edges(bg); + std::for_each(edges_start, edges_end, [&] (auto e) { + cerr << source(e,bg) << " -> " << target(e,bg) << endl; + }); + make_contraction_hierarchy(bg); //cerr << " - made contraction hierarchy" << endl; vector> labels_fwd; labels_fwd.resize(num_vertices(bg)); vector> labels_back; labels_back.resize(num_vertices(bg)); - create_labels(labels_fwd, labels_back, bg); - //linearization - vector packed_labels = pack_labels(labels_fwd, labels_back); - //dummy filter /* for (auto v: labels_fwd) { for (auto sz: v) { @@ -5035,6 +5029,24 @@ void test_hub_labeling() { cerr << sz << " "; } cerr << endl; */ + + create_labels(labels_fwd, labels_back, bg); + + //linearization + return pack_labels(labels_fwd, labels_back); + }; + + { + // Simple stick graph of 3 nodes + HashGraph test_g; + vector handles; handles.resize(3); + for (auto n: {0,1,2}) { + handles[n] = test_g.create_handle("A"); + } + test_g.create_edge(handles[0], handles[1]); + test_g.create_edge(handles[1], handles[2]); + + vector packed_labels = get_packed_labels(test_g); // 0th forward to 1st forward: no intervening bases assert(hhl_query(packed_labels.begin(), 0, 2) == 0); @@ -5052,65 +5064,64 @@ void test_hub_labeling() { //TODO: check that error occurs when nodeside out of range is given } { + // Graph with several nodes but only one edge HashGraph test_g; vector handles; handles.resize(8); for (auto n: {0,1,2,3,4,5,6,7}) { handles[n] = test_g.create_handle(string(n+1, 'A')); - } - //vector> edges={{0,1},{0,2},{1,0},{2,0},{1,3},{1,4},{4,1},{5,5}}; + } vector> edges={{1,3}}; for (auto e: edges) { auto [s,t] = e; test_g.create_edge(handles[s], handles[t]); } - //test HashGraph -> Boost graph - CHOverlay bg = make_boost_graph(test_g); - auto [edges_start, edges_end] = boost::edges(bg); - std::for_each(edges_start, edges_end, [&] (auto e) { - cerr << source(e,bg) << " -> " << target(e,bg) << endl; - }); - make_contraction_hierarchy(bg); - - vector> labels_fwd; labels_fwd.resize(num_vertices(bg)); - vector> labels_back; labels_back.resize(num_vertices(bg)); - create_labels(labels_fwd, labels_back, bg); - //linearization - vector packed_labels = pack_labels(labels_fwd, labels_back); - for (auto v: labels_fwd) { - for (auto sz: v) { - cerr << "(" << sz.hub << "," << sz.dist << ") "; - } - cerr << " | "; - } - cerr << endl; - cerr<<"back:" << endl; - for (auto v: labels_back) { - for (auto sz: v) { - cerr << "(" << sz.hub << "," << sz.dist << ") "; - } - cerr << " | "; - } - /* + vector packed_labels = get_packed_labels(test_g); + + // 1st forward to 3rd forward: the only edge there is + assert(hhl_query(packed_labels.begin(), 2, 6) == 0); //nonexistent path assert(hhl_query(packed_labels.begin(), 0, 14) == INF_INT); + } + { + // Graph with several nodes and several edges + HashGraph test_g; + vector handles; handles.resize(8); + for (auto n: {0,1,2,3,4,5,6,7}) { + handles[n] = test_g.create_handle(string(n+1, 'A')); + } + vector> edges={{0,1},{0,2},{1,0},{2,0},{1,3},{1,4},{4,1},{5,5}}; + for (auto e: edges) { + auto [s,t] = e; + test_g.create_edge(handles[s], handles[t]); + } + + vector packed_labels = get_packed_labels(test_g); - //check node lengths are taken into account + // 1st forward to 3rd forward: direct connection + assert(hhl_query(packed_labels.begin(), 2, 6) == 0); + // 1st forward to 7th forward: nonexistent path + assert(hhl_query(packed_labels.begin(), 0, 14) == INF_INT); + + // check node lengths are taken into account + // 0th forward to 3rd forward: should need to go through 1st which has length 2 assert(hhl_query(packed_labels.begin(), 0, 6) == 2); //check u -> v and v -> u are different - assert(hhl_query(packed_labels.begin(), 6, 2) == INF_INT); */ + // 3rd forward to 2nd forward: shouldn't connect because nothing is downstream of 3rd + assert(hhl_query(packed_labels.begin(), 6, 2) == INF_INT); + // 1st forward to 3rd forward: direct connection + assert(hhl_query(packed_labels.begin(), 2, 6) == 0); + //need to debug - for (int a = 0; a < 10; a++ ) { + for (int a = 0; a < handles.size() * 2; a++ ) { cerr << hhl_query(packed_labels.begin(), 2, a) << endl; } - assert(hhl_query(packed_labels.begin(), 2, 6) == 0); - /* + //node to itself in the same direction (edge exists) assert(hhl_query(packed_labels.begin(), 10, 10) == 0); //node to itself in the same direction (edge doesn't exist) assert(hhl_query(packed_labels.begin(), 6, 6) == INF_INT); - */ } cerr << "HubLabeling tests successful!" << endl; From 9b33554208b80efb619a3d1291f3f0b6ca4e51ef Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Tue, 27 Jan 2026 17:53:28 -0500 Subject: [PATCH 18/75] Add some synthetic unit tests, with revisions, for some more graph shapes and cases --- bdsg/src/test_libbdsg.cpp | 226 +++++++++++++++++++++++++++++++++----- 1 file changed, 196 insertions(+), 30 deletions(-) diff --git a/bdsg/src/test_libbdsg.cpp b/bdsg/src/test_libbdsg.cpp index 95f3c7ea..38d16ec6 100644 --- a/bdsg/src/test_libbdsg.cpp +++ b/bdsg/src/test_libbdsg.cpp @@ -5036,6 +5036,13 @@ void test_hub_labeling() { return pack_labels(labels_fwd, labels_back); }; + // We also use this to let us write our tests in therms of index into the + // handles vector, and orientation. This converts that to a hub labeling + // rank. + auto rank = [](size_t node_index, bool is_reverse) -> size_t { + return node_index * 2 + (is_reverse ? 1 : 0); + }; + { // Simple stick graph of 3 nodes HashGraph test_g; @@ -5045,22 +5052,22 @@ void test_hub_labeling() { } test_g.create_edge(handles[0], handles[1]); test_g.create_edge(handles[1], handles[2]); - + vector packed_labels = get_packed_labels(test_g); - + // 0th forward to 1st forward: no intervening bases - assert(hhl_query(packed_labels.begin(), 0, 2) == 0); + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(1, false)) == 0); // When asking about the same node twice, we look for self loops. // Here there aren't any. - assert(hhl_query(packed_labels.begin(), 0, 0) == INF_INT); + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(0, false)) == INF_INT); // 2nd reverse to 1st reverse: 1 intervening base - assert(hhl_query(packed_labels.begin(), 5, 1) == 1); - + assert(hhl_query(packed_labels.begin(), rank(2, true), rank(0, true)) == 1); + // 0th reverse to 1st forward: no connection - assert(hhl_query(packed_labels.begin(), 1, 2) == INF_INT); - + assert(hhl_query(packed_labels.begin(), rank(0, true), rank(1, false)) == INF_INT); + //TODO: check that error occurs when nodeside out of range is given } { @@ -5077,11 +5084,11 @@ void test_hub_labeling() { } vector packed_labels = get_packed_labels(test_g); - + // 1st forward to 3rd forward: the only edge there is - assert(hhl_query(packed_labels.begin(), 2, 6) == 0); - //nonexistent path - assert(hhl_query(packed_labels.begin(), 0, 14) == INF_INT); + assert(hhl_query(packed_labels.begin(), rank(1, false), rank(3, false)) == 0); + // nonexistent path + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(7, false)) == INF_INT); } { // Graph with several nodes and several edges @@ -5097,33 +5104,192 @@ void test_hub_labeling() { } vector packed_labels = get_packed_labels(test_g); - + // 1st forward to 3rd forward: direct connection - assert(hhl_query(packed_labels.begin(), 2, 6) == 0); - // 1st forward to 7th forward: nonexistent path - assert(hhl_query(packed_labels.begin(), 0, 14) == INF_INT); + assert(hhl_query(packed_labels.begin(), rank(1, false), rank(3, false)) == 0); + // 0th forward to 7th forward: nonexistent path + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(7, false)) == INF_INT); // check node lengths are taken into account // 0th forward to 3rd forward: should need to go through 1st which has length 2 - assert(hhl_query(packed_labels.begin(), 0, 6) == 2); + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(3, false)) == 2); - //check u -> v and v -> u are different - // 3rd forward to 2nd forward: shouldn't connect because nothing is downstream of 3rd - assert(hhl_query(packed_labels.begin(), 6, 2) == INF_INT); + // check u -> v and v -> u are different + // 3rd forward to 1st forward: shouldn't connect because nothing is downstream of 3rd + assert(hhl_query(packed_labels.begin(), rank(3, false), rank(1, false)) == INF_INT); // 1st forward to 3rd forward: direct connection - assert(hhl_query(packed_labels.begin(), 2, 6) == 0); + assert(hhl_query(packed_labels.begin(), rank(1, false), rank(3, false)) == 0); + + // need to debug + for (size_t a = 0; a < handles.size() * 2; a++) { + cerr << hhl_query(packed_labels.begin(), rank(1, false), a) << endl; + } + + // node to itself in the same direction (edge exists) + assert(hhl_query(packed_labels.begin(), rank(5, false), rank(5, false)) == 0); + // node to itself in the same direction (edge doesn't exist) + assert(hhl_query(packed_labels.begin(), rank(3, false), rank(3, false)) == INF_INT); + } + { + // Test case: Cycle back to the same node with minimum distance > 0 + // Creates a triangle: 0 -> 1 -> 2 -> 0 + // Node lengths: 0=1, 1=2, 2=3 + HashGraph test_g; + vector handles; handles.resize(3); + for (auto n : {0, 1, 2}) { + handles[n] = test_g.create_handle(string(n + 1, 'A')); + } + test_g.create_edge(handles[0], handles[1]); + test_g.create_edge(handles[1], handles[2]); + test_g.create_edge(handles[2], handles[0]); + + vector packed_labels = get_packed_labels(test_g); + + // Forward cycle: 0->1->2->0 + // 0_fwd to 0_fwd via cycle: intermediate nodes 1 and 2, lengths 2+3=5 + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(0, false)) == 5); + // 1_fwd to 1_fwd via cycle: intermediate nodes 2 and 0, lengths 3+1=4 + assert(hhl_query(packed_labels.begin(), rank(1, false), rank(1, false)) == 4); + // 2_fwd to 2_fwd via cycle: intermediate nodes 0 and 1, lengths 1+2=3 + assert(hhl_query(packed_labels.begin(), rank(2, false), rank(2, false)) == 3); + + // The same cycle is visible in reverse. + assert(hhl_query(packed_labels.begin(), rank(0, true), rank(0, true)) == 5); + assert(hhl_query(packed_labels.begin(), rank(1, true), rank(1, true)) == 4); + assert(hhl_query(packed_labels.begin(), rank(2, true), rank(2, true)) == 3); + } + { + // Test case: Forward and reverse orientations of different nodes reaching each other + // Node 0 (len 1) and Node 1 (len 2) + // Edges: 0_fwd -> 1_rev, 2_fwd -> 1_fwd + // This creates a "reversing" pattern where you enter one side and exit the other + HashGraph test_g; + vector handles; handles.resize(3); + handles[0] = test_g.create_handle("A"); + handles[1] = test_g.create_handle("AA"); + handles[2] = test_g.create_handle("AAA"); - //need to debug - for (int a = 0; a < handles.size() * 2; a++ ) { - cerr << hhl_query(packed_labels.begin(), 2, a) << endl; - } + test_g.create_edge(handles[0], test_g.flip(handles[1])); + test_g.create_edge(handles[2], handles[1]); + + vector packed_labels = get_packed_labels(test_g); - //node to itself in the same direction (edge exists) - assert(hhl_query(packed_labels.begin(), 10, 10) == 0); - //node to itself in the same direction (edge doesn't exist) - assert(hhl_query(packed_labels.begin(), 6, 6) == INF_INT); + // We see the 1st node attached the right way around + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(1, true)) == 0); + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(1, false)) == INF_INT); + + // We see the 0th node connected to the 2nd node the right way around + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(2, true)) == 2); + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(2, false)) == INF_INT); + + // We see the same thing looking the other way + assert(hhl_query(packed_labels.begin(), rank(2, false), rank(0, true)) == 2); + assert(hhl_query(packed_labels.begin(), rank(2, false), rank(0, false)) == INF_INT); + } + { + // Test case: Diamond graph with multiple paths of different lengths + // Tests that we find the minimum distance, not just any path + // + // 1 (len 2) + // / \ + // / \ + // 0 3 + // \ / + // \ / + // 2 (len 10) + // + // Node lengths: 0=1, 1=2, 2=10, 3=1 + // Path 0->1->3 has intermediate length 2 + // Path 0->2->3 has intermediate length 10 + // Should find minimum = 2 + HashGraph test_g; + vector handles; handles.resize(4); + handles[0] = test_g.create_handle("A"); + handles[1] = test_g.create_handle("AA"); + handles[2] = test_g.create_handle("AAAAAAAAAA"); + handles[3] = test_g.create_handle("A"); + + test_g.create_edge(handles[0], handles[1]); + test_g.create_edge(handles[0], handles[2]); + test_g.create_edge(handles[1], handles[3]); + test_g.create_edge(handles[2], handles[3]); + + vector packed_labels = get_packed_labels(test_g); + + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(3, false)) == 2); + + // Same paths in reverse, should also be 2 + assert(hhl_query(packed_labels.begin(), rank(3, true), rank(0, true)) == 2); + } + { + // Test case: Graph requiring traversal through a node and back again + // + // 0_fwd -> 1_rev (entering 1 from the right) + // 1_fwd -> 2_fwd (exiting 1 from the right) + // 1_rev -> 1_fwd (turning around) + // This means you can go 0_fwd -> 1_rev -> (through 1) -> 1_fwd -> 2_fwd + HashGraph test_g; + vector handles; handles.resize(3); + handles[0] = test_g.create_handle("A"); + handles[1] = test_g.create_handle("AAA"); + handles[2] = test_g.create_handle("AA"); + + test_g.create_edge(handles[0], test_g.flip(handles[1])); + test_g.create_edge(handles[1], handles[2]); + test_g.create_edge(test_g.flip(handles[1]), handles[1]); + + vector packed_labels = get_packed_labels(test_g); + + // Must go through 1, turn around, and come back through 1 + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(2, false)) == 6); + } + { + // Test case: Graph requiring traversal through a node and back again, but + // without the hairpin edge. + HashGraph test_g; + vector handles; handles.resize(3); + handles[0] = test_g.create_handle("A"); + handles[1] = test_g.create_handle("AAA"); + handles[2] = test_g.create_handle("AA"); + + test_g.create_edge(handles[0], test_g.flip(handles[1])); + test_g.create_edge(handles[1], handles[2]); + + vector packed_labels = get_packed_labels(test_g); + + // We can't turn around inside 1, so we can't make it. + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(2, false)) == INF_INT); + } + { + // Test case: Disconnected components + // Nodes 0,1 are connected; Nodes 2,3 are connected; No edges between components + HashGraph test_g; + vector handles; handles.resize(4); + handles[0] = test_g.create_handle("A"); + handles[1] = test_g.create_handle("AA"); + handles[2] = test_g.create_handle("AAA"); + handles[3] = test_g.create_handle("AAAA"); + + test_g.create_edge(handles[0], handles[1]); + test_g.create_edge(handles[2], handles[3]); + + vector packed_labels = get_packed_labels(test_g); + + // Within first component + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(1, false)) == 0); // 0_fwd to 1_fwd + assert(hhl_query(packed_labels.begin(), rank(1, true), rank(0, true)) == 0); // 1_rev to 0_rev + + // Within second component + assert(hhl_query(packed_labels.begin(), rank(2, false), rank(3, false)) == 0); // 2_fwd to 3_fwd + assert(hhl_query(packed_labels.begin(), rank(3, true), rank(2, true)) == 0); // 3_rev to 2_rev + + // Between components: no path + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(2, false)) == INF_INT); // 0_fwd to 2_fwd + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(3, false)) == INF_INT); // 0_fwd to 3_fwd + assert(hhl_query(packed_labels.begin(), rank(1, false), rank(2, false)) == INF_INT); // 1_fwd to 2_fwd + assert(hhl_query(packed_labels.begin(), rank(2, true), rank(0, true)) == INF_INT); // 2_rev to 0_rev } - + cerr << "HubLabeling tests successful!" << endl; } From 385efa8e3bb3270835d777d6115065c1074f89f3 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Tue, 27 Jan 2026 18:19:00 -0500 Subject: [PATCH 19/75] Fix signature mismatch --- bdsg/include/bdsg/ch.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bdsg/include/bdsg/ch.hpp b/bdsg/include/bdsg/ch.hpp index 7534aabd..b54958d3 100644 --- a/bdsg/include/bdsg/ch.hpp +++ b/bdsg/include/bdsg/ch.hpp @@ -36,7 +36,7 @@ typedef boost::adjacency_list> ContractedGraph; CHOverlay make_boost_graph(const bdsg::HashGraph& hg); -CHOverlay make_boost_graph(const bdsg::SnarlDistanceIndex::TemporaryDistanceIndex& temp_index, const pair& snarl_index, const SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record, vector>& all_children, const HandleGraph* graph); +CHOverlay make_boost_graph(const bdsg::SnarlDistanceIndex::TemporaryDistanceIndex& temp_index, const SnarlIndex::temp_record_ref_t& snarl_index, const SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record, const vector>& all_children, const HandleGraph* graph); int edge_diff(ContractedGraph::vertex_descriptor nid, ContractedGraph& ch, CHOverlay& ov, vector& node_dists, int hop_limit); From 9ae76d4007bd07a2d14ce34a47d97871d6d93318 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Tue, 27 Jan 2026 18:21:23 -0500 Subject: [PATCH 20/75] Fix type name --- bdsg/include/bdsg/ch.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bdsg/include/bdsg/ch.hpp b/bdsg/include/bdsg/ch.hpp index b54958d3..36c81bec 100644 --- a/bdsg/include/bdsg/ch.hpp +++ b/bdsg/include/bdsg/ch.hpp @@ -36,7 +36,7 @@ typedef boost::adjacency_list> ContractedGraph; CHOverlay make_boost_graph(const bdsg::HashGraph& hg); -CHOverlay make_boost_graph(const bdsg::SnarlDistanceIndex::TemporaryDistanceIndex& temp_index, const SnarlIndex::temp_record_ref_t& snarl_index, const SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record, const vector>& all_children, const HandleGraph* graph); +CHOverlay make_boost_graph(const bdsg::SnarlDistanceIndex::TemporaryDistanceIndex& temp_index, const SnarlDistanceIndex::temp_record_ref_t& snarl_index, const SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record, const vector>& all_children, const HandleGraph* graph); int edge_diff(ContractedGraph::vertex_descriptor nid, ContractedGraph& ch, CHOverlay& ov, vector& node_dists, int hop_limit); From fb774f20fe382a3c70155d47f8d2bdc0144e51cd Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 28 Jan 2026 17:57:27 -0500 Subject: [PATCH 21/75] Rework HHL vertex ID computation so we can actually find the right vertices from a net graph child. Add a bunch of comments exlaining why I am confused by the distance index orientation bookkeeping. --- bdsg/include/bdsg/ch.hpp | 86 ++++++++- bdsg/src/ch.cpp | 284 ++++++++++++++++++------------ bdsg/src/snarl_distance_index.cpp | 51 +++++- 3 files changed, 293 insertions(+), 128 deletions(-) diff --git a/bdsg/include/bdsg/ch.hpp b/bdsg/include/bdsg/ch.hpp index 36c81bec..283296b0 100644 --- a/bdsg/include/bdsg/ch.hpp +++ b/bdsg/include/bdsg/ch.hpp @@ -10,11 +10,50 @@ file for quickly playing around with stuff #include #include -namespace bdsg { +//#define debug_binary_intersection +//#define debug_hhl_query + +namespace bdsg { + +/** + * For a handle graph indexed with HHL, get the HHL rank ("Boost graph ID") for + * an orientation of a node, as a source or destination. + */ NODE_UINT bgid(const handle_t& h, const bdsg::HashGraph& hg); - + +/** + * For a net graph indexed with HHL, get the HHL rank for an orientation of a + * net graph element (snarl start node, snarl end node, child node, child + * chain), as either the source or destination of a query. + * + * Snarl start nodes and snarl end nodes are handled so that "forward" + * orientation runs along the snarl, regardless of the orientation that the + * underlying handle graph node is in as a snarl boundary. + * + * Child chains and nodes are also handled so that "forward" orientation is the + * orientation the thing has in the snarl. So if a node is reversed in the + * snarl, asking about forward is actually asking about that node in its local + * reverse orientation. + * + * For net graphs, we need to distinguish between source and destination status + * to allow turning around within a child chain without traversing the full + * length of the chain. Each child chain needs to be represented by a subgraph + * with different in and out "port" nodes in each orientation. + */ +NODE_UINT bgid(size_t net_rank, bool is_reverse, bool is_source); + +/** + * For a handle or net graph indexed with HHL, take the HHL rank of an orientation of + * a node and get that of the opposite orientation of a node. + * + * For handle graphs, ranks are the same for source and destination. + * + * For net graphs, ranks differ between source and destination "ports" for a + * net graph element; this also swaps source and destination status. + */ NODE_UINT rev_bgid(NODE_UINT n); + typedef struct NodeProp { DIST_UINT seqlen; DIST_UINT max_out = 0; @@ -35,7 +74,26 @@ typedef struct EdgeProp { typedef boost::adjacency_list CHOverlay; typedef boost::filtered_graph> ContractedGraph; +/** + * Build the intermediate hub labeling computation data structure ("Boost + * graph") from a HashGraph. + * + * The nodes in the graph must have dense node IDs starting at 1. + * + * For later queries, orientations of nodes are assigned ranks as provided by + * the bgid() function. + */ CHOverlay make_boost_graph(const bdsg::HashGraph& hg); +/** + * Build the intermediate hub labeling computation data structure ("Boost + * graph") for the net graph of a snarl in a TemporaryDistanceIndex. + * + * all_children must contain the child chains and nodes of the snarl, as well as the bounding nodes of the snarl, in any order. + * + * For later queries, orientations of children or the snarl boundary nodes are assigned query ranks based on their snarl distance index rank. + * + * The snarl distance index ranks are 0 and 1 for the start and end nodes of the snarl, and the rank_in_parent field of the temporary index for each child. + */ CHOverlay make_boost_graph(const bdsg::SnarlDistanceIndex::TemporaryDistanceIndex& temp_index, const SnarlDistanceIndex::temp_record_ref_t& snarl_index, const SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record, const vector>& all_children, const HandleGraph* graph); int edge_diff(ContractedGraph::vertex_descriptor nid, ContractedGraph& ch, CHOverlay& ov, vector& node_dists, int hop_limit); @@ -84,12 +142,16 @@ DIST_UINT binary_intersection_ch(ItrType start_itr, size_t v1_start_bound_index, auto v1_start_itr = next(start_itr, *v1_start_bound_itr); auto v1_end_itr = next(start_itr, *v1_end_bound_itr); +#ifdef debug_binary_intersection std::cerr << "Found " << v1_end_itr - v1_start_itr << " labels for vertex 1" << std::endl; +#endif auto v2_start_itr = next(start_itr, *v2_start_bound_itr); auto v2_end_itr = next(start_itr, *v2_end_bound_itr); +#ifdef debug_binary_intersection std::cerr << "Found " << v2_end_itr - v2_start_itr << " labels for vertex 2" << std::endl; +#endif auto v1_range = ranges::subrange(v1_start_itr, v1_end_itr); auto v2_range = ranges::subrange(v2_start_itr, v2_end_itr); @@ -101,22 +163,32 @@ DIST_UINT binary_intersection_ch(ItrType start_itr, size_t v1_start_bound_index, auto search_end_itr = search_vec.end(); DIST_UINT min_dist = INF_INT; for (auto it = key_vec.begin(); it < key_vec.end(); it++) { +#ifdef debug_binary_intersection cerr << "Performing key query" << endl; +#endif auto k = *it; - auto k_dist_itr = get_dist_itr(start_itr, it); + auto k_dist_itr = get_dist_itr(start_itr, it); +#ifdef debug_binary_intersection cerr << "Distance for k " << k << " is " << *k_dist_itr << ", at: " << distance(start_itr,k_dist_itr) << endl; cerr << "searching for " << k << " between " << distance(start_itr,search_start_itr) << " & " << distance(start_itr,search_end_itr) << endl; +#endif search_start_itr = lower_bound(search_start_itr, search_end_itr, k); if (search_start_itr == search_end_itr) { - std::cerr << "No more search results possible" << std::endl; +#ifdef debug_binary_intersection + std::cerr << "No more search results possible" << std::endl; +#endif return min_dist; } if (*search_start_itr == k) { +#ifdef debug_binary_intersection cerr << "match found, key: " << *search_start_itr << ", at " << distance(start_itr,search_start_itr) << endl; +#endif auto dist_itr = get_dist_itr(start_itr, search_start_itr); DIST_UINT d = *(dist_itr) + *(k_dist_itr); +#ifdef debug_binary_intersection cerr << "dist for key is: " << *dist_itr << ", at " << distance(start_itr,dist_itr) << endl; cerr << "total dist is: " << d << endl; +#endif min_dist = min(min_dist, d); } } @@ -143,19 +215,25 @@ template DIST_UINT hhl_query(ItrType start_itr, size_t rank1, size_t rank2) { size_t label_count = *start_itr; +#ifdef debug_hhl_query std::cerr << "Making hub label query on " << label_count << " labels" << std::endl; +#endif // Bounds start after the label count, and at the rank of the first // vertex past there we find the start bound for the first vertex. auto start_index_1 = 1+rank1; +#ifdef debug_hhl_query std::cerr << "Start bound for forward label for rank " << rank1 << " is at index " << start_index_1 << " past there" << std::endl; +#endif // And there's a final end value for the first set of labels before we go on // to the bounds where we would find the start bound for the second vertex. auto start_index_2 = 1+label_count+1+rank2; +#ifdef debug_hhl_query std::cerr << "Start bound for reverse label for rank " << rank2 << " is at index " << start_index_2 << " past there" << std::endl; +#endif DIST_UINT dist = binary_intersection_ch(start_itr, start_index_1, start_index_2); diff --git a/bdsg/src/ch.cpp b/bdsg/src/ch.cpp index 93db0e40..dcfb143e 100644 --- a/bdsg/src/ch.cpp +++ b/bdsg/src/ch.cpp @@ -30,6 +30,13 @@ NODE_UINT bgid(const handle_t& h, const bdsg::HashGraph& hg) { return hg.get_is_reverse(h) ? (nid-1)*2+1 : (nid-1)*2; } +NODE_UINT bgid(size_t net_rank, bool is_reverse, bool is_source) { + // The diagram is: + // 1<-3 (reverse traversal: start_rev (source) <- end_rev (not source)) + // 0->2 (forward traversal: start_fwd (not source) -> end_fwd (source)) + return net_rank * 4 + ((is_source ^ is_reverse) ? 2 : 0) + (is_reverse ? 1 : 0); +} + NODE_UINT rev_bgid(NODE_UINT n) { return n ^ 1; } @@ -57,182 +64,211 @@ CHOverlay make_boost_graph(const bdsg::HashGraph& hg) { } CHOverlay make_boost_graph(const SnarlDistanceIndex::TemporaryDistanceIndex& temp_index, const SnarlDistanceIndex::temp_record_ref_t& snarl_index, const SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record, const vector& all_children, const HandleGraph* hgraph) { - // Boost graph vertex allocation: - // - Chains get 4 vertices: start_fwd, start_rev, end_fwd, end_rev - // - Nodes get 2 vertices: fwd, rev (like in the HashGraph overload) - // - rev_bgid(n) = n ^ 1 swaps between forward/reverse orientations + + // Every net graph element (start node at rank 0, end node at rank 1, each + // child nose/chain) needs to have 4 Boost graph nodes. We need separate + // representations for forward and reverse orientations, and within each + // orientation we need an in "port" and an out "port" so that we can draw the + // subgraphs describing internal reversals inside chains. We use the in ports + // to represent each element as a destination, and the out ports to represent + // each element as a source. // - // For chains, the diagram is: - // 1<-3 (reverse traversal: end_rev -> start_rev) - // 0->2 (forward traversal: start_fwd -> end_fwd) - // With loop edges: 2->3 (end loop), 1->0 (start loop) + // We wouldn't really need all 4 vertices to represent node children, or the + // start and end, but we need O(1) mapping from net graph rank. // - // For nodes, we follow the HashGraph pattern: - // base = fwd orientation, base+1 = rev orientation - - // First pass: count how many vertices we need - size_t total_vertices = 0; - for (const auto& child : all_children) { - if (child.first == bdsg::SnarlDistanceIndex::TEMP_CHAIN) { - total_vertices += 4; - } else if (child.first == bdsg::SnarlDistanceIndex::TEMP_NODE) { - total_vertices += 2; - } - } + // TODO: Can we reassign the net graph ranks so all the snarls are first and + // store a number of snarl children to let us throw out a bunch of + // never-queried labels? + + size_t total_vertices = all_children.size() * 4; #ifdef debug_boost_graph cerr << "=== make_boost_graph: Building net graph ===" << endl; cerr << "Number of children: " << all_children.size() << endl; - cerr << "Allocating " << total_vertices << " Boost vertices (4 per chain, 2 per node)" << endl; + cerr << "Allocating " << total_vertices << " Boost vertices" << endl; #endif CHOverlay ov(total_vertices); + // Maps inward-facing handle to Boost graph vertex ID. // Doesn't include outward-facing handles. unordered_map handle_bgnid_map; - // Track current vertex offset as we iterate (chains get 4, nodes get 2) - size_t vertex_offset = 0; - #ifdef debug_boost_graph cerr << "--- Phase 1: Creating Boost vertices for each net graph child ---" << endl; #endif - for (size_t child_num = 0; child_num < all_children.size(); child_num++) { - auto child = all_children[child_num]; + for (auto& child : all_children) { + // Ignore index in all_children and use whether the ID matches the + // start/end or else use the stored rank to determine the child number. + + // Depending on the child type we need to load these different ways and some might be INF_INT. + // The start and end handles point forward, not inward. + size_t child_net_rank; + handle_t start_handle; + handle_t end_handle; + DIST_UINT start_end_distance; + DIST_UINT start_start_distance; + DIST_UINT end_end_distance; + if (child.first == bdsg::SnarlDistanceIndex::TEMP_CHAIN) { + // This is a child chain auto& record = temp_index.get_chain(child); - handle_t start_handle = hgraph->get_handle(record.start_node_id, record.start_node_rev); - handle_t end_handle = hgraph->get_handle(record.end_node_id, record.end_node_rev); + + // A child chain can never be the start or end boundary + child_net_rank = record.rank_in_parent; + + start_handle = hgraph->get_handle(record.start_node_id, record.start_node_rev); + end_handle = hgraph->get_handle(record.end_node_id, record.end_node_rev); + + // Fetch straight-through distance. + // TODO: What value does this have if straight-through is unreachable? Then we want INF_INT. + start_end_distance = record.min_length; + + // Fetch looping distances (thanks Xian!) + // TODO: What's the representation for "not connected"? Is it not having a value or is it having a sentinel value we need to translate to INF_INT here? + if (!record.forward_loops.empty()) { + // We know a chain always has a first child that's a node, so we can + // get the start node length. + auto& first_child = record.children.front(); + assert(first_child.first == bdsg::SnarlDistanceIndex::TEMP_NODE); + DIST_UINT start_node_length = temp_index.get_node(first_child).node_length; + start_start_distance = record.forward_loops[0] + (2 * start_node_length); + } else { + start_start_distance = INF_INT; + } + if (!record.backward_loops.empty()) { + // The end node length is already helpfully stored for us. + end_end_distance = record.backward_loops.back() + (2 * record.end_node_length); + } else { + end_end_distance = INF_INT; + } + + if (record.reversed_in_parent) { + // Fix up everything so we're thinking of the orientation of the chain + // in its parent, rather than its local forward orientation. + auto temp = start_handle; + start_handle = hgraph->flip(end_handle); + end_handle = hgraph->flip(temp); + std::swap(start_start_distance, end_end_distance); + } #ifdef debug_boost_graph - cerr << "Child " << child_num << " is CHAIN: start_node=" << record.start_node_id + cerr << "Child " << child_net_rank << " is CHAIN: start_node=" << record.start_node_id << " (rev=" << record.start_node_rev << "), end_node=" << record.end_node_id << " (rev=" << record.end_node_rev << "), min_length=" << record.min_length << endl; cerr << " start_handle: id=" << hgraph->get_id(start_handle) << " rev=" << hgraph->get_is_reverse(start_handle) << endl; cerr << " end_handle: id=" << hgraph->get_id(end_handle) << " rev=" << hgraph->get_is_reverse(end_handle) << endl; - cerr << " Boost vertices: " << vertex_offset << " (start_fwd), " << vertex_offset+1 << " (start_rev), " - << vertex_offset+2 << " (end_fwd), " << vertex_offset+3 << " (end_rev)" << endl; + cerr << " (reversed_in_parent=" << record.reversed_in_parent << ")" << endl; + cerr << " Boost vertices: " << bgid(child_net_rank, false, false) << " (start_fwd), " + << bgid(child_net_rank, true, true) << " (start_rev), " + << bgid(child_net_rank, false, true) << " (end_fwd), " + << bgid(child_net_rank, true, false) << " (end_rev)" << endl; #endif - // Chain vertex layout (offsets from vertex_offset): - // Both start_handle and end_handle point left-to-right along the chain. - // At start (left): left-to-right = inward. At end (right): left-to-right = outward. - // 0 = start_handle (inward-facing, receives edges from outside) - // 1 = flip(start_handle) (outward-facing, sends edges to outside) - // 2 = end_handle (outward-facing, sends edges to outside) - // 3 = flip(end_handle) (inward-facing, receives edges from outside) - // - // But only the inward-facing versions can be arrived at, so only they need to be indexed directly. - - // Map inward orientations of start and end handles - handle_bgnid_map[start_handle] = vertex_offset; // start inward - handle_bgnid_map[hgraph->flip(end_handle)] = vertex_offset + 3; // end inward + } else if (child.first == bdsg::SnarlDistanceIndex::TEMP_NODE) { + // This is a child node + auto& record = temp_index.get_node(child); -#ifdef debug_boost_graph - cerr << " Mapping start_handle (inward) -> Boost " << vertex_offset << endl; - cerr << " Mapping flip(end_handle) (inward) -> Boost " << vertex_offset + 3 << endl; -#endif + // The rank may need to be 0 or 1 if we are a start or end bound. + if (record.node_id == temp_snarl_record.start_node_id) { + // TODO: Don't we need to handle having the same node as a start and an end bound??? + child_net_rank = 0; + // Handles need to point along snarl + start_handle = hgraph->get_handle(temp_snarl_record.start_node_id, temp_snarl_record.start_node_rev); + } else if (record.node_id == temp_snarl_record.end_node_id) { + child_net_rank = 1; + // Handles need to point along snarl + start_handle = hgraph->get_handle(temp_snarl_record.end_node_id, temp_snarl_record.end_node_rev); + } else { + child_net_rank = record.rank_in_parent; + // Handle needs to represent the thing in the orientation we have it in in the snarl. + start_handle = hgraph->get_handle(record.node_id, record.reversed_in_parent); + } + + // Node is potentially reachable in both directions (though we only want to index one of these for bounds) + end_handle = start_handle; - // Add edges representing distance across chain - auto new_edge = add_edge(vertex_offset, vertex_offset + 2, ov); - ov[new_edge.first].weight = record.min_length; -#ifdef debug_boost_graph - cerr << " Edge " << vertex_offset << " -> " << vertex_offset + 2 << " (fwd traversal, weight=" << record.min_length << ")" << endl; -#endif + start_end_distance = record.node_length; + start_start_distance = INF_INT; + end_end_distance = INF_INT; - new_edge = add_edge(vertex_offset + 3, vertex_offset + 1, ov); - ov[new_edge.first].weight = record.min_length; #ifdef debug_boost_graph - cerr << " Edge " << vertex_offset + 3 << " -> " << vertex_offset + 1 << " (rev traversal, weight=" << record.min_length << ")" << endl; + cerr << "Child " << child_net_rank << " is NODE: node_id=" << record.node_id + << " (reversed_in_parent=" << record.reversed_in_parent << "), length=" << record.node_length << endl; + cerr << " id=" << hgraph->get_id(start_handle) << " rev=" << hgraph->get_is_reverse(start_handle) << endl; #endif + } else { + throw std::runtime_error("unexpected rec_type: " + std::to_string(child.first)); + } + - // Add looping distances (thanks Xian!) - auto& first_child = record.children.front(); - assert(first_child.first == bdsg::SnarlDistanceIndex::TEMP_NODE); - DIST_UINT start_node_length = temp_index.get_node(first_child).node_length; - DIST_UINT start_start_distance = record.forward_loops[0] + (2 * start_node_length); - DIST_UINT end_end_distance = record.backward_loops.back() + (2 * record.end_node_length); - - // TODO: Shouldn't we not make the loop edges if the loops don't exist - // or are unreachable distance sentinels or whatever? Are forward_loops - // and backward_loops always nonempty? + // Map inward orientations of start and end handles + if (child_net_rank != 0) { + // We can arrive at the start of everything but our own start. + handle_bgnid_map[start_handle] = bgid(child_net_rank, false, false); + } + if (child_net_rank != 1) { + // We can arrive at the end of everything but our own end. + handle_bgnid_map[hgraph->flip(end_handle)] = bgid(child_net_rank, true, false); + } #ifdef debug_boost_graph - cerr << " Loop distances: start_start=" << start_start_distance << ", end_end=" << end_end_distance << endl; + cerr << " Mapping start_handle (inward) -> Boost " << handle_bgnid_map[start_handle] << endl; + cerr << " Mapping flip(end_handle) (inward) -> Boost " << handle_bgnid_map[hgraph->flip(end_handle)] << endl; #endif - // Loops are edges between different orientations of the same endpoint - auto new_loop_edge = add_edge(vertex_offset + 2, vertex_offset + 3, ov); - ov[new_loop_edge.first].weight = end_end_distance; + if (start_end_distance != INF_INT) { + // Add edges representing distance across chain + auto new_edge = add_edge(bgid(child_net_rank, false, false), bgid(child_net_rank, false, true), ov); + ov[new_edge.first].weight = start_end_distance; #ifdef debug_boost_graph - cerr << " Edge " << vertex_offset + 2 << " -> " << vertex_offset + 3 << " (end loop, weight=" << end_end_distance << ")" << endl; + cerr << " Edge " << bgid(child_net_rank, false, false) << " -> " << bgid(child_net_rank, false, true) << " (fwd traversal, weight=" << start_end_distance << ")" << endl; #endif - new_loop_edge = add_edge(vertex_offset + 1, vertex_offset, ov); - ov[new_loop_edge.first].weight = start_start_distance; + new_edge = add_edge(bgid(child_net_rank, true, false), bgid(child_net_rank, true, true), ov); + ov[new_edge.first].weight = start_end_distance; #ifdef debug_boost_graph - cerr << " Edge " << vertex_offset + 1 << " -> " << vertex_offset << " (start loop, weight=" << start_start_distance << ")" << endl; + cerr << " Edge " << bgid(child_net_rank, true, false) << " -> " << bgid(child_net_rank, true, true) << " (rev traversal, weight=" << start_end_distance << ")" << endl; #endif - - vertex_offset += 4; - - } else if (child.first == bdsg::SnarlDistanceIndex::TEMP_NODE) { - auto& record = temp_index.get_node(child); - handle_t node_handle = hgraph->get_handle(record.node_id, record.reversed_in_parent); + } #ifdef debug_boost_graph - cerr << "Child " << child_num << " is NODE: node_id=" << record.node_id - << " (reversed_in_parent=" << record.reversed_in_parent << "), length=" << record.node_length << endl; - cerr << " node_handle: id=" << hgraph->get_id(node_handle) << " rev=" << hgraph->get_is_reverse(node_handle) << endl; - cerr << " Boost vertices: " << vertex_offset << " (fwd), " << vertex_offset + 1 << " (rev)" << endl; + cerr << " Loop distances: start_start=" << start_start_distance << ", end_end=" << end_end_distance << endl; #endif - // Node vertex layout (like HashGraph overload): - // base = forward orientation - // base+1 = reverse orientation - // Both get seqlen set - - // Map both orientations; both orientations of a node count as - // "inward-facing" since both can be arrived at. - handle_bgnid_map[node_handle] = vertex_offset; - handle_bgnid_map[hgraph->flip(node_handle)] = vertex_offset + 1; - + if (end_end_distance != INF_INT) { + // Loops are edges between different orientations of the same endpoint + auto new_loop_edge = add_edge(bgid(child_net_rank, true, false), bgid(child_net_rank, false, true), ov); + ov[new_loop_edge.first].weight = end_end_distance; #ifdef debug_boost_graph - cerr << " Mapping node_handle -> Boost " << vertex_offset << endl; - cerr << " Mapping flip(node_handle) -> Boost " << vertex_offset + 1 << endl; + cerr << " Edge " << bgid(child_net_rank, true, false) << " -> " << bgid(child_net_rank, false, true) << " (end loop, weight=" << end_end_distance << ")" << endl; #endif + } - // Set seqlen on both orientations (like HashGraph overload) - ov[vertex_offset].seqlen = record.node_length; - ov[vertex_offset + 1].seqlen = record.node_length; - + if (start_start_distance != INF_INT) { + auto new_loop_edge = add_edge(bgid(child_net_rank, false, false), bgid(child_net_rank, true, true), ov); + ov[new_loop_edge.first].weight = start_start_distance; #ifdef debug_boost_graph - cerr << " Setting seqlen=" << record.node_length << " on both Boost vertices " << vertex_offset << " and " << vertex_offset + 1 << endl; + cerr << " Edge " << bgid(child_net_rank, false, false) << " -> " << bgid(child_net_rank, true, true) << " (start loop, weight=" << start_start_distance << ")" << endl; #endif - - vertex_offset += 2; - - } else { - cerr << "unexpected rec_type" << endl; } } #ifdef debug_boost_graph cerr << "--- Phase 2: Adding edges between children based on handle graph edges ---" << endl; - cerr << "Handle map contents:" << endl; - for (const auto& [h, bg_id] : handle_bgnid_map) { - cerr << " handle(id=" << hgraph->get_id(h) << ", rev=" << hgraph->get_is_reverse(h) << ") -> Boost " << bg_id << endl; - } #endif for (auto [handle_in, vertex_id_in] : handle_bgnid_map) { // The map contains inward-facing orientations of every handle. // So get the outward-facing versioin. - handle_t handle = hgraph->flip(handle_in); NODE_UINT vertex_id = rev_bgid(vertex_id_in); +#ifdef debug_boost_graph + cerr << " handle(id=" << hgraph->get_id(handle) << ", rev=" << hgraph->get_is_reverse(handle) << ") -> Boost " << vertex_id << endl; +#endif + // We need to get all the edges off the right side of this outward-facing // handle and create them if they don't already exist. hgraph->follow_edges(handle, false, [&] (const handle_t& next) { @@ -245,17 +281,21 @@ CHOverlay make_boost_graph(const SnarlDistanceIndex::TemporaryDistanceIndex& tem } NODE_UINT next_id = found->second; +#ifdef debug_boost_graph + cerr << " Connects to handle(id=" << hgraph->get_id(next) << ", rev=" << hgraph->get_is_reverse(next) << ") -> Boost " << next_id << endl; +#endif + auto edge_info = edge(vertex_id, next_id, ov); if (!edge_info.second) { #ifdef debug_boost_graph - cerr << " Adding edge " << vertex_id << " -> " << next_id << endl; - cerr << " Adding reverse edge " << rev_bgid(next_id) << " -> " << rev_bgid(vertex_id) << endl; + cerr << " Adding edge " << vertex_id << " -> " << next_id << endl; + cerr << " Adding reverse edge " << rev_bgid(next_id) << " -> " << rev_bgid(vertex_id) << endl; #endif add_edge(vertex_id, next_id, ov); add_edge(rev_bgid(next_id), rev_bgid(vertex_id), ov); } else { #ifdef debug_boost_graph - cerr << " Edge already exists" << endl; + cerr << " Edge already exists" << endl; #endif } }); @@ -528,6 +568,10 @@ void make_contraction_hierarchy(CHOverlay& ov) { //std::fill(skip.begin(), skip.end(), false); //for (auto n: arti_pts) { skip[n] = true; } + // We maintain a priority queue that lest us find the smallest-priority item. + // + // We keep all but the last item heap-ified, and the smallest-priority item + // last, as our invariant. vector> queue_objs; queue_objs.reserve(num_vertices(ov)/2); for (int i = 0; i < num_vertices(ov); i+=1) { if (ov[i].contracted) { continue; } @@ -552,9 +596,15 @@ void make_contraction_hierarchy(CHOverlay& ov) { int new_pri = ((2*edif)+ (1*ov[node].contracted_neighbors)) + (5*(ov[node].level+1)) + ov[node].arc_cover; - if (new_pri > get<0>(queue_objs.back())) { + if (new_pri > get<0>(queue_objs.back())) { + // After recomputing priority, the priority is actually greater than the nex-tlowest-priority entry. + // Put this back so we can get that one instead. + // First we need to put what's the current last item back in its proper place. + push_heap(queue_objs.begin(), queue_objs.end(), greater>()); + // Then we put this item back and heapify everything queue_objs.emplace_back(new_pri, node); - push_heap(queue_objs.begin(), queue_objs.end(), greater>()); + push_heap(queue_objs.begin(), queue_objs.end(), greater>()); + // Then we find the new smallest-priority item. pop_heap(queue_objs.begin(), queue_objs.end(), greater>()); continue; } diff --git a/bdsg/src/snarl_distance_index.cpp b/bdsg/src/snarl_distance_index.cpp index af7da36a..fe36213f 100644 --- a/bdsg/src/snarl_distance_index.cpp +++ b/bdsg/src/snarl_distance_index.cpp @@ -1119,8 +1119,8 @@ size_t SnarlDistanceIndex::distance_in_parent(const net_handle_t& parent, #ifdef debug_distances cerr << "\t\tFind distance between " << net_handle_as_string(child1) - << " and " << net_handle_as_string(child2) - << "\tin parent " << net_handle_as_string(parent) << endl; + << " and " << net_handle_as_string(child2) + << " facing back toward it in parent " << net_handle_as_string(parent) << endl; assert(canonical(parent) == canonical(get_parent(child1))); assert(canonical(parent) == canonical(get_parent(child2))); #endif @@ -1315,7 +1315,7 @@ size_t SnarlDistanceIndex::distance_in_parent(const net_handle_t& parent, } #ifdef debug_distances - cerr << " between ranks " << rank1 << " " << rev1 << " " << rank2 << " " << rev2 << endl; + cerr << " between ranks " << rank1 << " " << rev1 << " " << rank2 << " " << rev2 << endl; #endif if (get_record_type(snarl_tree_records->at(get_record_offset(parent))) == DISTANCED_SIMPLE_SNARL) { @@ -1329,6 +1329,7 @@ size_t SnarlDistanceIndex::distance_in_parent(const net_handle_t& parent, auto record_it = snarl_tree_records->begin() + get_record_offset(parent); // This points to the length and the variable-sized data auto length_data_it = record_it + SNARL_RECORD_SIZE; +#ifdef debug_hub_label_storage std::cerr << " Hub label data length: " << *length_data_it << endl; std::cerr << " Hub label data: "; for (size_t i = 0; i < *length_data_it; i++) { @@ -1339,7 +1340,39 @@ size_t SnarlDistanceIndex::distance_in_parent(const net_handle_t& parent, std::cerr << *(length_data_it + 1 + i); } std::cerr << std::endl; - size_t distance = hhl_query(length_data_it + 1, rank1, rank2); +#endif + // Here we need to convert from child rank (where 0 and 1 represent + // snarl boundaries oriented along the snarl) and child orientation + // to HHL vertex ranks. + // + // The HHL index thinks a child being "reverse" means that we're + // thinking of the child in the opposite orientation form how it + // appears in us. (So even not-reversed won't mean local forward + // orientation if is_reversed_in_parent() is true for that child). + // + // TODO: Probably need to also flip for is_reversed_in_parent() to + // account for this. + // + // TODO: rev1 and rev2 seem to actually be backwards for + // non-sentinel children (rev1 is true if child1's traversal + // *doesn't* end at its start, and thus ends at its end). Right now + // we address this by flipping them. We flip them and query + // going out the end of child 1 if its traversal ends at its end. + // But that's weird and makes no sense that we would need to do + // that! + // + // Because this function takes the destination as facing back + // towards the start, but hhl_query takes it as facing along the + // connecting path, we need to flip the second orientation one more + // time. + size_t distance = hhl_query(length_data_it + 1, bgid(rank1, rev1 ^ !is_sentinel(child1), true), bgid(rank2, !(rev2 ^ !is_sentinel(child2)), false)); + if (distance == bdsg::INF_INT) { + // Promote unreachable sentinel to wider type. + distance = std::numeric_limits::max(); + } +#ifdef debug_distances + cerr << " Resulting distance: " << distance << endl; +#endif return distance; } else if (rank1 == 0 && rank2 == 0 && !snarl_is_root) { @@ -1831,7 +1864,7 @@ size_t SnarlDistanceIndex::minimum_distance(const handlegraph::nid_t id1, const #ifdef debug_distances cerr << endl; - cerr << "Find the minimum distance between " << id1 << " " <" + std::to_string(record.get_end_id()) + (record.get_end_orientation() ? "rev" : "fd")); - result += "traversing "; + result += " traversing "; result += (starts_at(net) == START ? "start" : (starts_at(net) == END ? "end" : "tip")); result += "->"; result += (ends_at(net) == START ? "start" : (ends_at(net) == END ? "end" : "tip")); From a5c20a8ee157f117dde5715e514b94e8e8edc815 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Thu, 29 Jan 2026 08:56:24 -0800 Subject: [PATCH 22/75] Be clear about who is responsible for initializing NodeProp fields, and do it --- bdsg/include/bdsg/ch.hpp | 9 ++++++++- bdsg/src/ch.cpp | 11 ++++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/bdsg/include/bdsg/ch.hpp b/bdsg/include/bdsg/ch.hpp index 283296b0..0b3fda5c 100644 --- a/bdsg/include/bdsg/ch.hpp +++ b/bdsg/include/bdsg/ch.hpp @@ -54,13 +54,15 @@ NODE_UINT bgid(size_t net_rank, bool is_reverse, bool is_source); NODE_UINT rev_bgid(NODE_UINT n); -typedef struct NodeProp { +typedef struct NodeProp { + // This is initialized by make_boost_graph() DIST_UINT seqlen; DIST_UINT max_out = 0; NODE_UINT contracted_neighbors = 0; NODE_UINT level = 0; NODE_UINT arc_cover = 1; bool contracted = false; + // This is left uninitialized until make_contraction_hierarchy() is run. NODE_UINT new_id; } NodeProp; @@ -100,6 +102,11 @@ int edge_diff(ContractedGraph::vertex_descriptor nid, ContractedGraph& ch, CHOve void contract(CHOverlay::vertex_descriptor nid, ContractedGraph& ch, CHOverlay& ov, vector& node_dists, vector& shouldnt_contract, int hop_limit); +/** + * Find the contraction hierarchy order for the graph. + * + * Initializes the new_id field of each NodeProb in the graph. + */ void make_contraction_hierarchy(CHOverlay& ov); template diff --git a/bdsg/src/ch.cpp b/bdsg/src/ch.cpp index dcfb143e..2fb4e5c5 100644 --- a/bdsg/src/ch.cpp +++ b/bdsg/src/ch.cpp @@ -55,6 +55,7 @@ CHOverlay make_boost_graph(const bdsg::HashGraph& hg) { hg.for_each_handle([&](const handle_t& h) { auto nid = bgid(h, hg); + // Initialize all the seqlen fields g[nid].seqlen = hg.get_length(h); g[rev_bgid(nid)].seqlen = g[nid].seqlen; }); @@ -201,8 +202,16 @@ CHOverlay make_boost_graph(const SnarlDistanceIndex::TemporaryDistanceIndex& tem } else { throw std::runtime_error("unexpected rec_type: " + std::to_string(child.first)); } - + // Initialize all the seqlen fields of the vertices to 0; we only use edge + // weights in this mode, but we're still responsible for them. + // TODO: Is it worth doing this just as a separate scan in order instead? + for (bool is_reverse : {false, true}) { + for (bool is_source : {false, true}) { + ov[bgid(child_net_rank, is_reverse, is_source)].seqlen = 0; + } + } + // Map inward orientations of start and end handles if (child_net_rank != 0) { // We can arrive at the start of everything but our own start. From 9a7e4c32c0b3c3ddc5fc65df1b5c29a959e68b20 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Thu, 29 Jan 2026 10:58:22 -0800 Subject: [PATCH 23/75] Produce more descriptive errors for out of bounds temporary snarl record access attempts --- bdsg/include/bdsg/snarl_distance_index.hpp | 36 +++++++++++++++--- bdsg/src/snarl_distance_index.cpp | 44 ++++++++++++---------- 2 files changed, 54 insertions(+), 26 deletions(-) diff --git a/bdsg/include/bdsg/snarl_distance_index.hpp b/bdsg/include/bdsg/snarl_distance_index.hpp index 99ac281b..9c8d98a3 100644 --- a/bdsg/include/bdsg/snarl_distance_index.hpp +++ b/bdsg/include/bdsg/snarl_distance_index.hpp @@ -1737,44 +1737,68 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab if (ref.first != TEMP_CHAIN) { throw std::invalid_argument("Trying to look up a non-chain as a chain"); } - return temp_chain_records.at(ref.second); + if (ref.second >= temp_chain_records.size()) { + throw std::out_of_range("Trying to look up chain " + std::to_string(ref.second) + " but temporary index only has " + std::to_string(temp_chain_records.size()) + " chains"); + } + return temp_chain_records[ref.second]; } inline const TemporaryChainRecord& get_chain(const temp_record_ref_t& ref) const { if (ref.first != TEMP_CHAIN) { throw std::invalid_argument("Trying to look up a non-chain as a chain"); } - return temp_chain_records.at(ref.second); + if (ref.second >= temp_chain_records.size()) { + throw std::out_of_range("Trying to look up chain " + std::to_string(ref.second) + " but temporary index only has " + std::to_string(temp_chain_records.size()) + " chains"); + } + return temp_chain_records[ref.second]; } inline TemporarySnarlRecord& get_snarl(const temp_record_ref_t& ref) { if (ref.first != TEMP_SNARL) { throw std::invalid_argument("Trying to look up a non-snarl as a snarl"); } - return temp_snarl_records.at(ref.second); + if (ref.second >= temp_snarl_records.size()) { + throw std::out_of_range("Trying to look up snarl " + std::to_string(ref.second) + " but temporary index only has " + std::to_string(temp_snarl_records.size()) + " snarls"); + } + return temp_snarl_records[ref.second]; } inline const TemporarySnarlRecord& get_snarl(const temp_record_ref_t& ref) const { if (ref.first != TEMP_SNARL) { throw std::invalid_argument("Trying to look up a non-snarl as a snarl"); } - return temp_snarl_records.at(ref.second); + if (ref.second >= temp_snarl_records.size()) { + throw std::out_of_range("Trying to look up snarl " + std::to_string(ref.second) + " but temporary index only has " + std::to_string(temp_snarl_records.size()) + " snarls"); + } + return temp_snarl_records[ref.second]; } inline TemporaryNodeRecord& get_node(const temp_record_ref_t& ref) { if (ref.first != TEMP_NODE) { throw std::invalid_argument("Trying to look up a non-node as a node"); } + if (ref.second < min_node_id) { + throw std::out_of_range("Trying to look up node " + std::to_string(ref.second) + " but temporary index starts at node " + std::to_string(min_node_id)); + } + if (ref.second >= temp_node_records.size() + min_node_id) { + throw std::out_of_range("Trying to look up node " + std::to_string(ref.second) + " but temporary index only goes up until node " + std::to_string(temp_node_records.size() + min_node_id)); + } // Nodes use a node ID in the ref, not an index. - return temp_node_records.at(ref.second - min_node_id); + return temp_node_records[ref.second - min_node_id]; } inline const TemporaryNodeRecord& get_node(const temp_record_ref_t& ref) const { if (ref.first != TEMP_NODE) { throw std::invalid_argument("Trying to look up a non-node as a node"); } + if (ref.second < min_node_id) { + throw std::out_of_range("Trying to look up node " + std::to_string(ref.second) + " but temporary index starts at node " + std::to_string(min_node_id)); + } + if (ref.second >= temp_node_records.size() + min_node_id) { + throw std::out_of_range("Trying to look up node " + std::to_string(ref.second) + " but temporary index only goes up until node " + std::to_string(temp_node_records.size() + min_node_id)); + } // Nodes use a node ID in the ref, not an index. - return temp_node_records.at(ref.second - min_node_id); + return temp_node_records[ref.second - min_node_id]; } // Roots never need to be looked up. diff --git a/bdsg/src/snarl_distance_index.cpp b/bdsg/src/snarl_distance_index.cpp index fe36213f..d8e5f249 100644 --- a/bdsg/src/snarl_distance_index.cpp +++ b/bdsg/src/snarl_distance_index.cpp @@ -30,26 +30,30 @@ SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryDistanceIndex(){} SnarlDistanceIndex::TemporaryDistanceIndex::~TemporaryDistanceIndex(){} string SnarlDistanceIndex::TemporaryDistanceIndex::structure_start_end_as_string(temp_record_ref_t index) const { - if (index.first == TEMP_NODE) { - const TemporaryNodeRecord& temp_node_record = get_node(index); - assert(index.second == temp_node_record.node_id); - return "node " + std::to_string(temp_node_record.node_id); - } else if (index.first == TEMP_SNARL) { - const TemporarySnarlRecord& temp_snarl_record = get_snarl(index); - return "snarl " + std::to_string(temp_snarl_record.start_node_id) - + (temp_snarl_record.start_node_rev ? " rev" : " fd") - + " -> " + std::to_string(temp_snarl_record.end_node_id) - + (temp_snarl_record.end_node_rev ? " rev" : " fd"); - } else if (index.first == TEMP_CHAIN) { - const TemporaryChainRecord& temp_chain_record = get_chain(index); - return "chain " + std::to_string(temp_chain_record.start_node_id) - + (temp_chain_record.start_node_rev ? " rev" : " fd") - + " -> " + std::to_string(temp_chain_record.end_node_id) - + (temp_chain_record.end_node_rev ? " rev" : " fd"); - } else if (index.first == TEMP_ROOT) { - return (string) "root"; - } else { - return (string)"???" + std::to_string(index.first) + "???"; + try { + if (index.first == TEMP_NODE) { + const TemporaryNodeRecord& temp_node_record = get_node(index); + assert(index.second == temp_node_record.node_id); + return "node " + std::to_string(temp_node_record.node_id); + } else if (index.first == TEMP_SNARL) { + const TemporarySnarlRecord& temp_snarl_record = get_snarl(index); + return "snarl " + std::to_string(temp_snarl_record.start_node_id) + + (temp_snarl_record.start_node_rev ? " rev" : " fd") + + " -> " + std::to_string(temp_snarl_record.end_node_id) + + (temp_snarl_record.end_node_rev ? " rev" : " fd"); + } else if (index.first == TEMP_CHAIN) { + const TemporaryChainRecord& temp_chain_record = get_chain(index); + return "chain " + std::to_string(temp_chain_record.start_node_id) + + (temp_chain_record.start_node_rev ? " rev" : " fd") + + " -> " + std::to_string(temp_chain_record.end_node_id) + + (temp_chain_record.end_node_rev ? " rev" : " fd"); + } else if (index.first == TEMP_ROOT) { + return (string) "root"; + } else { + return (string)"???" + std::to_string(index.first) + "???"; + } + } catch (std::out_of_range& e) { + throw std::out_of_range("Unable to look up (" + std::to_string(index.first) + ", " + std::to_string(index.second) + ") in temporary distance index due to out of range error: " + e.what()); } } //The max record length of this chain From a868f9243b9240090e6bd599cce46f22e39700ae Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Thu, 29 Jan 2026 12:03:23 -0800 Subject: [PATCH 24/75] Fix cases where simple snarls could be accesed with the non-simple SnarlRecord --- bdsg/include/bdsg/snarl_distance_index.hpp | 4 +- bdsg/src/snarl_distance_index.cpp | 108 ++++++++++----------- 2 files changed, 56 insertions(+), 56 deletions(-) diff --git a/bdsg/include/bdsg/snarl_distance_index.hpp b/bdsg/include/bdsg/snarl_distance_index.hpp index 9c8d98a3..f9cb19d8 100644 --- a/bdsg/include/bdsg/snarl_distance_index.hpp +++ b/bdsg/include/bdsg/snarl_distance_index.hpp @@ -634,6 +634,8 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab ROOT_SNARL, DISTANCED_ROOT_SNARL, CHAIN, DISTANCED_CHAIN, MULTICOMPONENT_CHAIN, CHILDREN}; + // TODO: Doesn't this need to be inline? And isn't const not allowed on a + // static method? Is this just making the bool const? const static bool has_distances(record_t type) { return type == DISTANCED_NODE || type == DISTANCED_TRIVIAL_SNARL || type == DISTANCED_SIMPLE_SNARL || type == DISTANCED_SNARL || type == OVERSIZED_SNARL || type == DISTANCED_ROOT_SNARL @@ -1085,7 +1087,7 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab bool get_start_orientation() const; handlegraph::nid_t get_end_id() const; //Return true if the end node is traversed backwards to leave the snarl - handlegraph::nid_t get_end_orientation() const; + bool get_end_orientation() const; }; diff --git a/bdsg/src/snarl_distance_index.cpp b/bdsg/src/snarl_distance_index.cpp index d8e5f249..73ebbe68 100644 --- a/bdsg/src/snarl_distance_index.cpp +++ b/bdsg/src/snarl_distance_index.cpp @@ -449,7 +449,10 @@ net_handle_t SnarlDistanceIndex::get_net(const handle_t& handle, const handlegra } handle_t SnarlDistanceIndex::get_handle(const net_handle_t& net, const handlegraph::HandleGraph* graph) const{ if (get_handle_type(net) == SENTINEL_HANDLE) { - SnarlRecord snarl_record(net, &snarl_tree_records); + // We don't know if this is a trivial or nontrivial snarl, so we need + // to access it with the base class. + // TODO: Make a base class for any kind of snarl. + SnarlTreeRecord snarl_record(net, &snarl_tree_records); if (starts_at(net) == START) { return graph->get_handle(snarl_record.get_start_id(), ends_at(net) == START ? !snarl_record.get_start_orientation() //Going out @@ -751,8 +754,15 @@ bool SnarlDistanceIndex::for_each_child_impl(const net_handle_t& traversal, cons throw runtime_error("error: Looking for children of a node or sentinel in a simple snarl"); } } else if (record_type == SNARL_HANDLE) { - SnarlRecord snarl_record(traversal, &snarl_tree_records); - return snarl_record.for_each_child(iteratee); + // This could be a simple or non-simple snarl + net_handle_record_t specific_type = SnarlTreeRecord(traversal, &snarl_tree_records).get_record_handle_type(); + if (specific_type == SIMPLE_SNARL || specific_type == DISTANCED_SIMPLE_SNARL) { + SimpleSnarlRecord snarl_record(traversal, &snarl_tree_records); + return snarl_record.for_each_child(iteratee); + } else { + SnarlRecord snarl_record(traversal, &snarl_tree_records); + return snarl_record.for_each_child(iteratee); + } } else if (record_type == CHAIN_HANDLE) { ChainRecord chain_record(traversal, &snarl_tree_records); return chain_record.for_each_child(iteratee); @@ -1174,7 +1184,8 @@ size_t SnarlDistanceIndex::distance_in_parent(const net_handle_t& parent, cerr << "=>They are in a snarl, check distance in snarl" << endl; cerr << "\tsnarl at offset " << parent_record_offset1 << " with ranks " << get_rank_in_parent(child1) << " " << get_rank_in_parent(child2) << endl; #endif - //They are in the same root snarl, so find the distance between them + //They are in the same root snarl, so find the distance between them. + // We know this isn't a simple snarl. SnarlRecord snarl_record(parent_record_offset1, &snarl_tree_records); return snarl_record.get_distance(get_rank_in_parent(child1), !child_ends_at_start1, @@ -2763,7 +2774,7 @@ void SnarlDistanceIndex::for_each_handle_in_shortest_path_in_snarl(const net_han size_t target_distance = distance_to_traverse; size_t starting_distance = distance_traversed; cerr << "Find shortest path in " << net_handle_as_string(snarl_handle) << " from " << net_handle_as_string(start) << " to " << net_handle_as_string(end) << " with distance " << distance_to_traverse << endl; - if (SnarlRecord(snarl_handle, &snarl_tree_records).get_record_type() != OVERSIZED_SNARL) { + if (SnarlTreeRecord(snarl_handle, &snarl_tree_records).get_record_type() != OVERSIZED_SNARL) { cerr << "\tactual distance is " << distance_in_parent(snarl_handle, start, flip(end)) << endl; assert(distance_in_parent(snarl_handle, start, flip(end)) == distance_to_traverse); } @@ -2775,8 +2786,7 @@ void SnarlDistanceIndex::for_each_handle_in_shortest_path_in_snarl(const net_han * there will always be only one that is on the minimum distance path. */ - SnarlRecord snarl_record (snarl_handle, &snarl_tree_records); - if (snarl_record.get_record_type() == OVERSIZED_SNARL) { + if (SnarlTreeRecord(snarl_handle, &snarl_tree_records).get_record_type() == OVERSIZED_SNARL) { //IF this is an oversized snarl, then we don't have any distance information so use the handlgraph algorithm //for traversing the shortest path @@ -3352,8 +3362,10 @@ nid_t SnarlDistanceIndex::node_id(const net_handle_t& net) const { return TrivialSnarlRecord(get_record_offset(net), &snarl_tree_records).get_node_id(get_node_record_offset(net)); } } else if (is_sentinel(net)) { - SnarlRecord snarl_record(net, &snarl_tree_records); - NodeRecord node_record; + // We don't know if this is a trivial or nontrivial snarl, so we need + // to access it with the base class. + // TODO: Make a base class for any kind of snarl. + SnarlTreeRecord snarl_record(net, &snarl_tree_records); if (get_start_endpoint(net) == START) { return snarl_record.get_start_id(); } else { @@ -3726,7 +3738,7 @@ handlegraph::nid_t SnarlDistanceIndex::SnarlTreeRecord::get_end_id() const { } } -handlegraph::nid_t SnarlDistanceIndex::SnarlTreeRecord::get_end_orientation() const { +bool SnarlDistanceIndex::SnarlTreeRecord::get_end_orientation() const { record_t type = get_record_type(); if (type == ROOT) { throw runtime_error("error: trying to get the end node of the root"); @@ -4212,18 +4224,18 @@ SnarlDistanceIndex::SnarlRecord::SnarlRecord (size_t pointer, const bdsg::yomo:: records = tree_records; #ifdef debug_distance_indexing record_t type = get_record_type(); - assert(type == SNARL || type == DISTANCED_SNARL || type == OVERSIZED_SNARL || type == ROOT_SNARL - || type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL); + if (!(type == SNARL || type == DISTANCED_SNARL || + type == OVERSIZED_SNARL || + type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL)) { + + throw std::runtime_error("SnarlRecord record type " + std::to_string(type) + " at offset " + std::to_string(record_offset) + " is not an acceptable type for a SnarlRecord; maybe SimpleSnarlRecord should be used instead?"); + } #endif } -SnarlDistanceIndex::SnarlRecord::SnarlRecord (net_handle_t net, const bdsg::yomo::UniqueMappedPointer* tree_records){ - record_offset = get_record_offset(net); - records = tree_records; -#ifdef debug_distance_indexing - net_handle_record_t type = get_handle_type(net); - assert(type == SNARL_HANDLE || type == SENTINEL_HANDLE || type == ROOT_HANDLE); -#endif +SnarlDistanceIndex::SnarlRecord::SnarlRecord (net_handle_t net, const bdsg::yomo::UniqueMappedPointer* tree_records) : + SnarlRecord(get_record_offset(net), tree_records) { + // Nothing to do! } @@ -5102,36 +5114,24 @@ SnarlDistanceIndex::ChainRecord::ChainRecord (size_t pointer, const bdsg::yomo:: record_offset = pointer; records = tree_records; + +#ifdef debug_distance_indexing net_handle_record_t record_type= get_record_handle_type(); if (record_type == NODE_HANDLE) { net_handle_record_t parent_type = SnarlTreeRecord( NodeRecord(pointer, 0, records).get_parent_record_offset(), records ).get_record_handle_type(); -#ifdef debug_distance_indexing - assert(parent_type == ROOT_HANDLE || parent_type == SNARL_HANDLE); -#endif - } else { -#ifdef debug_distance_indexing - assert(get_record_handle_type() == CHAIN_HANDLE); -#endif - } -} -SnarlDistanceIndex::ChainRecord::ChainRecord (net_handle_t net, const bdsg::yomo::UniqueMappedPointer* tree_records){ - record_offset = get_record_offset(net); - records = tree_records; - - net_handle_record_t record_type = get_record_handle_type(); -#ifdef debug_distance_indexing - if (record_type == NODE_HANDLE) { - net_handle_record_t parent_type = SnarlTreeRecord( - NodeRecord(record_offset, 0, records).get_parent_record_offset(), records - ).get_record_handle_type(); assert(parent_type == ROOT_HANDLE || parent_type == SNARL_HANDLE); - } else { - assert(get_record_handle_type() == CHAIN_HANDLE); + } else if (record_type != CHAIN_HANDLE) { + throw std::runtime_error("ChainRecord record type " + std::to_string(record_type) + " at offset " + std::to_string(record_offset) + " is not a node or a chain"); } #endif +} + +SnarlDistanceIndex::ChainRecord::ChainRecord (net_handle_t net, const bdsg::yomo::UniqueMappedPointer* tree_records) : + ChainRecord(get_record_offset(net), tree_records) { + // Nothing to do! } SnarlDistanceIndex::ChainRecord::ChainRecord (net_handle_t net, const bdsg::yomo::UniqueMappedPointer* tree_records, size_t tag){ @@ -5145,9 +5145,9 @@ SnarlDistanceIndex::ChainRecord::ChainRecord (net_handle_t net, const bdsg::yomo NodeRecord(record_offset, 0, records).get_parent_record_offset(), records ).get_record_handle_type(); assert(parent_type == ROOT_HANDLE || parent_type == SNARL_HANDLE); - } else { - assert(get_record_handle_type() == CHAIN_HANDLE); - } + } else if (record_type != CHAIN_HANDLE) { + throw std::runtime_error("ChainRecord record type " + std::to_string(record_type) + " at offset " + std::to_string(record_offset) + " is not a node or a chain"); + } #endif } @@ -5944,17 +5944,16 @@ void SnarlDistanceIndex::print_descendants_of(const net_handle_t net) const { } else { parent = net_handle_as_string(get_parent(net)); if (record_type == CHAIN_HANDLE) { - child_count = ChainRecord(net, &snarl_tree_records).get_node_count(); + child_count = ChainRecord(net, &snarl_tree_records).get_node_count(); } else if (record.get_record_type() == SNARL || - record.get_record_type() == DISTANCED_SNARL|| - record.get_record_type() == OVERSIZED_SNARL - ){ + record.get_record_type() == DISTANCED_SNARL|| + record.get_record_type() == OVERSIZED_SNARL) { child_count = SnarlRecord(net, &snarl_tree_records).get_node_count(); } else if (record.get_record_type() == TRIVIAL_SNARL || record.get_record_type() == DISTANCED_TRIVIAL_SNARL) { child_count = TrivialSnarlRecord(get_record_offset(net), &snarl_tree_records).get_node_count(); - }else if (record.get_record_type() == SIMPLE_SNARL || + } else if (record.get_record_type() == SIMPLE_SNARL || record.get_record_type() == DISTANCED_SIMPLE_SNARL) { child_count = SimpleSnarlRecord(net, &snarl_tree_records).get_node_count(); } else { @@ -5981,10 +5980,9 @@ void SnarlDistanceIndex::print_snarl_stats() const { //Get the number of children depending on the type of record size_t child_count; if (record.get_record_type() == SNARL || - record.get_record_type() == DISTANCED_SNARL|| - record.get_record_type() == OVERSIZED_SNARL - ){ - + record.get_record_type() == DISTANCED_SNARL || + record.get_record_type() == OVERSIZED_SNARL) { + child_count = SnarlRecord(snarl_child, &snarl_tree_records).get_node_count(); } else if (record.get_record_type() == SIMPLE_SNARL || record.get_record_type() == DISTANCED_SIMPLE_SNARL) { @@ -6051,9 +6049,9 @@ void SnarlDistanceIndex::write_snarls_to_json() const { //Get the number of children depending on the type of record if (record.get_record_type() == SNARL || - record.get_record_type() == DISTANCED_SNARL|| - record.get_record_type() == OVERSIZED_SNARL - ){ + record.get_record_type() == DISTANCED_SNARL|| + record.get_record_type() == OVERSIZED_SNARL) { + size_t child_count = SnarlRecord(snarl_child, &snarl_tree_records).get_node_count(); json_object_set_new(out_json, "child_count", json_integer(child_count)); } else if (record.get_record_type() == SIMPLE_SNARL || From f8de1dc505dffb935a492e7133f4985b34af9f51 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Thu, 29 Jan 2026 14:17:18 -0800 Subject: [PATCH 25/75] Figure out ChainRecord is used to decode snarls sometimes and document that instead of unpicking it --- bdsg/include/bdsg/snarl_distance_index.hpp | 33 +++--- bdsg/src/snarl_distance_index.cpp | 125 ++++++++++++++++----- 2 files changed, 117 insertions(+), 41 deletions(-) diff --git a/bdsg/include/bdsg/snarl_distance_index.hpp b/bdsg/include/bdsg/snarl_distance_index.hpp index f9cb19d8..a58313d3 100644 --- a/bdsg/include/bdsg/snarl_distance_index.hpp +++ b/bdsg/include/bdsg/snarl_distance_index.hpp @@ -219,8 +219,10 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab END_START, END_END, END_TIP, TIP_START, TIP_END, TIP_TIP}; - ///Type of a net_handle_t, which may not be the type of the record - ///This is to allow a node record to be seen as a chain from the perspective of a handle + /// Type of a net_handle_t, which may not be the type of the record + /// This is to allow a node record to be seen as a chain from the perspective of a handle. + /// And to allow a simple snarl record to be seen as a node, a chain, or a snarl. + /// TODO: What does that really mean? Why can that happen? enum net_handle_record_t {ROOT_HANDLE=0, NODE_HANDLE, SNARL_HANDLE, CHAIN_HANDLE, SENTINEL_HANDLE}; ///////////////////////////// functions for distance calculations using net_handle_t's @@ -653,8 +655,9 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab return ROOT_HANDLE; } else if (type == NODE || type == DISTANCED_NODE || type == TRIVIAL_SNARL || type == DISTANCED_TRIVIAL_SNARL) { return NODE_HANDLE; - } else if (type == SNARL || type == DISTANCED_SNARL || type == SIMPLE_SNARL ||type == OVERSIZED_SNARL - || type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL){ + } else if (type == SNARL || type == DISTANCED_SNARL || + type == OVERSIZED_SNARL || + type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL) { return SNARL_HANDLE; } else if (type == CHAIN || type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN) { return CHAIN_HANDLE; @@ -943,17 +946,10 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab private: /*Give each of the enum types a name for printing */ - vector record_t_as_string = {"ROOT", "NODE", "DISTANCED_NODE", - "TRIVIAL_SNARL", "DISTANCED_TRIVIAL_SNARL", - "SNARL", "DISTANCED_SNARL", "SIMPLE_SNARL", "OVERSIZED_SNARL", - "ROOT_SNARL", "DISTANCED_ROOT_SNARL", - "CHAIN", "DISTANCED_CHAIN", "MULTICOMPONENT_CHAIN", - "CHILDREN"}; - vector connectivity_t_as_string = { "START_START", "START_END", "START_TIP", - "END_START", "END_END", "END_TIP", - "TIP_START", "TIP_END", "TIP_TIP"}; - vector net_handle_record_t_string = {"ROOT_HANDLE", "NODE_HANDLE", "SNARL_HANDLE", - "CHAIN_HANDLE", "SENTINEL_HANDLE"}; + // TODO: The names can;'t be here unless we give up using them in static methods. + const static vector record_t_as_string; + const static vector connectivity_t_as_string; + const static vector net_handle_record_t_string; /* If this is 0, then don't store distances. @@ -1018,6 +1014,13 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab * * SnarlTreeRecordWriter does the same thing but for writing values to the index. * + * Note that each SnarlTreeRecord class (expecially ChainRecord) sometimes + * knows how to parse/interpret *other* actual record types, to support the + * system where a node can "pretend" to be a chain, or a simple snarl can + * pretend to be either a node (TODO: why?) or a chain. We end up parsing the + * record with the class appropriate to the thing we want to interpret it as, + * not the one you would pick from its stored record type. + * */ struct SnarlTreeRecord { diff --git a/bdsg/src/snarl_distance_index.cpp b/bdsg/src/snarl_distance_index.cpp index 73ebbe68..43825a75 100644 --- a/bdsg/src/snarl_distance_index.cpp +++ b/bdsg/src/snarl_distance_index.cpp @@ -12,6 +12,23 @@ using namespace std; using namespace handlegraph; namespace bdsg { +const vector SnarlDistanceIndex::record_t_as_string = { + "ROOT", "NODE", "DISTANCED_NODE", + "TRIVIAL_SNARL", "DISTANCED_TRIVIAL_SNARL", + "SNARL", "DISTANCED_SNARL", "SIMPLE_SNARL", "OVERSIZED_SNARL", + "ROOT_SNARL", "DISTANCED_ROOT_SNARL", + "CHAIN", "DISTANCED_CHAIN", "MULTICOMPONENT_CHAIN", + "CHILDREN" +}; +const vector SnarlDistanceIndex::connectivity_t_as_string = { + "START_START", "START_END", "START_TIP", + "END_START", "END_END", "END_TIP", + "TIP_START", "TIP_END", "TIP_TIP" +}; +const vector SnarlDistanceIndex::net_handle_record_t_string = { + "ROOT_HANDLE", "NODE_HANDLE", "SNARL_HANDLE", + "CHAIN_HANDLE", "SENTINEL_HANDLE" +}; /////////////////////////////////////////////////////////////////////////////////////////////////// //Constructor @@ -495,23 +512,40 @@ net_handle_t SnarlDistanceIndex::get_parent(const net_handle_t& child) const { throw runtime_error("error: trying to find the parent of the root"); } else if (get_record_type(snarl_tree_records->at(get_record_offset(child))) == SIMPLE_SNARL || get_record_type(snarl_tree_records->at(get_record_offset(child))) == DISTANCED_SIMPLE_SNARL) { +#ifdef debug_distances + std::cerr << "Child " << net_handle_as_string(child) << " has simple snarl record type " << record_t_as_string.at(get_record_type(snarl_tree_records->at(get_record_offset(child)))) << " and default handle type " << net_handle_record_t_string.at(get_record_handle_type(get_record_type(get_record_offset(child)))) << " and current handle type " << net_handle_record_t_string.at(get_handle_type(child)) << std::endl; +#endif + //If this is a simple snarl and a node or chain, then the parent offset doesn't change if (get_handle_type(child) == NODE_HANDLE) { - //If this is a node, then return it as a chain + // If this is a node, then return it as a chain + // TODO: Why can a simple snarl need to look like a node itself? + // TODO: Why can a simple snarl need to look like a chain? Because the node needs to look like a chain? +#ifdef debug_distances + std::cerr << "We were looking at a simple snarl as a node; project it as a chain." << std::endl; +#endif return get_net_handle_from_values(get_record_offset(child), child_connectivity, CHAIN_HANDLE, get_node_record_offset(child)); } else if (get_handle_type(child) == CHAIN_HANDLE) { //If this is a chain, then return the same thing as a snarl +#ifdef debug_distances + std::cerr << "We were looking at a simple snarl as a chain; project it as a snarl." << std::endl; +#endif return get_net_handle_from_values(get_record_offset(child), START_END, SNARL_HANDLE, 1); } } //Otherwise, we need to move up one level in the snarl tree + SnarlTreeRecord child_record(child, &snarl_tree_records); //Get the pointer to the parent to find its type - size_t parent_pointer = SnarlTreeRecord(child, &snarl_tree_records).get_parent_record_offset(); - net_handle_record_t parent_type = SnarlTreeRecord(parent_pointer, &snarl_tree_records).get_record_handle_type(); + size_t parent_pointer = child_record.get_parent_record_offset(); + SnarlTreeRecord parent_record(parent_pointer, &snarl_tree_records); + net_handle_record_t parent_type = parent_record.get_record_handle_type(); +#ifdef debug_distances + std::cerr << "Parent of " << net_handle_as_string(child) << " has record type " << record_t_as_string.at(parent_record.get_record_type()) << " and default handle type " << net_handle_record_t_string.at(parent_type) << std::endl; +#endif //The connectivity of the parent defaults to start-end connectivity_t parent_connectivity = START_END; @@ -530,8 +564,13 @@ net_handle_t SnarlDistanceIndex::get_parent(const net_handle_t& child) const { } if (get_handle_type(child) == NODE_HANDLE && parent_type != CHAIN_HANDLE) { //If this is a node and it's parent is not a chain, we want to pretend that its - //parent is a chain - return get_net_handle_from_values(get_record_offset(child), child_connectivity, CHAIN_HANDLE, get_node_record_offset(child)); + //parent is a chain version of the child + net_handle_t projected = get_net_handle_from_values(get_record_offset(child), child_connectivity, CHAIN_HANDLE, get_node_record_offset(child)); +#ifdef debug_distances + std::cerr << "Parent of " << net_handle_as_string(child) << " projected as " << net_handle_as_string(projected) << std::endl; +#endif + + return projected; } return get_net_handle(parent_pointer, parent_connectivity); @@ -539,6 +578,10 @@ net_handle_t SnarlDistanceIndex::get_parent(const net_handle_t& child) const { net_handle_t SnarlDistanceIndex::get_bound(const net_handle_t& snarl, bool get_end, bool face_in) const { if (get_handle_type(snarl) == CHAIN_HANDLE) { + // This could be a real chain, a node looking like a chain, or a simple + // snarl record looking like a chain (maybe because the node it was + // looking like needs to look like a chain now). ChainRecord promises + // to know how to interpret all of them. ChainRecord chain_record(snarl, &snarl_tree_records); size_t offset; size_t node_offset; @@ -755,7 +798,7 @@ bool SnarlDistanceIndex::for_each_child_impl(const net_handle_t& traversal, cons } } else if (record_type == SNARL_HANDLE) { // This could be a simple or non-simple snarl - net_handle_record_t specific_type = SnarlTreeRecord(traversal, &snarl_tree_records).get_record_handle_type(); + record_t specific_type = SnarlTreeRecord(traversal, &snarl_tree_records).get_record_type(); if (specific_type == SIMPLE_SNARL || specific_type == DISTANCED_SIMPLE_SNARL) { SimpleSnarlRecord snarl_record(traversal, &snarl_tree_records); return snarl_record.for_each_child(iteratee); @@ -4228,7 +4271,7 @@ SnarlDistanceIndex::SnarlRecord::SnarlRecord (size_t pointer, const bdsg::yomo:: type == OVERSIZED_SNARL || type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL)) { - throw std::runtime_error("SnarlRecord record type " + std::to_string(type) + " at offset " + std::to_string(record_offset) + " is not an acceptable type for a SnarlRecord; maybe SimpleSnarlRecord should be used instead?"); + throw std::runtime_error("SnarlRecord record type " + std::to_string(type) + (type < record_t_as_string.size() ? (" " + record_t_as_string[type]) : "") + " at offset " + std::to_string(record_offset) + " is not an acceptable type for a SnarlRecord; maybe SimpleSnarlRecord should be used instead?"); } #endif } @@ -5116,16 +5159,27 @@ SnarlDistanceIndex::ChainRecord::ChainRecord (size_t pointer, const bdsg::yomo:: records = tree_records; #ifdef debug_distance_indexing - net_handle_record_t record_type= get_record_handle_type(); - if (record_type == NODE_HANDLE) { + net_handle_record_t type = get_record_handle_type(); + if (type == NODE_HANDLE) { net_handle_record_t parent_type = SnarlTreeRecord( - NodeRecord(pointer, 0, records).get_parent_record_offset(), records + NodeRecord(record_offset, 0, records).get_parent_record_offset(), records ).get_record_handle_type(); assert(parent_type == ROOT_HANDLE || parent_type == SNARL_HANDLE); - } else if (record_type != CHAIN_HANDLE) { - throw std::runtime_error("ChainRecord record type " + std::to_string(record_type) + " at offset " + std::to_string(record_offset) + " is not a node or a chain"); + return; + } + record_t record_type = get_record_type(); + if (type == SNARL_HANDLE) { + // Simple snarls are also able to be looked at as chains, and ChainRecord knows how to parse them. + if (record_type == SIMPLE_SNARL || record_type == DISTANCED_SIMPLE_SNARL) { + // This is allowed + return; + } + } else if (type == CHAIN_HANDLE) { + // Chain records as stored are allowed. + return; } + throw std::runtime_error("ChainRecord with handle type " + std::to_string(type) + (type < net_handle_record_t_string.size() ? (" " + net_handle_record_t_string[type]) : "") + " and record type " + std::to_string(record_type) + (record_type < record_t_as_string.size() ? (" " + record_t_as_string[record_type]) : "") + " at offset " + std::to_string(record_offset) + " is not a node or a chain or a simple snarl"); #endif } @@ -5139,15 +5193,27 @@ SnarlDistanceIndex::ChainRecord::ChainRecord (net_handle_t net, const bdsg::yomo records = tree_records; #ifdef debug_distance_indexing - net_handle_record_t record_type= SnarlDistanceIndex::get_record_handle_type(SnarlDistanceIndex::get_record_type(tag )); - if (record_type == NODE_HANDLE) { + net_handle_record_t type = SnarlDistanceIndex::get_record_handle_type(SnarlDistanceIndex::get_record_type(tag )); + if (type == NODE_HANDLE) { net_handle_record_t parent_type = SnarlTreeRecord( NodeRecord(record_offset, 0, records).get_parent_record_offset(), records ).get_record_handle_type(); + assert(parent_type == ROOT_HANDLE || parent_type == SNARL_HANDLE); - } else if (record_type != CHAIN_HANDLE) { - throw std::runtime_error("ChainRecord record type " + std::to_string(record_type) + " at offset " + std::to_string(record_offset) + " is not a node or a chain"); - } + return; + } + record_t record_type = get_record_type(); + if (type == SNARL_HANDLE) { + // Simple snarls are also able to be looked at as chains, and ChainRecord knows how to parse them. + if (record_type == SIMPLE_SNARL || record_type == DISTANCED_SIMPLE_SNARL) { + // This is allowed + return; + } + } else if (type == CHAIN_HANDLE) { + // Chain records as stored are allowed. + return; + } + throw std::runtime_error("ChainRecord with handle type " + std::to_string(type) + (type < net_handle_record_t_string.size() ? (" " + net_handle_record_t_string[type]) : "") + " and record type " + std::to_string(record_type) + (record_type < record_t_as_string.size() ? (" " + record_t_as_string[record_type]) : "") + " at offset " + std::to_string(record_offset) + " is not a node or a chain or a simple snarl"); #endif } @@ -5400,7 +5466,7 @@ net_handle_t SnarlDistanceIndex::ChainRecord::get_next_child(const net_handle_t& if (get_handle_type(net_handle) == NODE_HANDLE) { //If this is a node in a trivial snarl #ifdef debug_snarl_traversal - cerr << "GEt next in chain after " << TrivialSnarlRecord(get_record_offset(net_handle), records).get_node_id(get_node_record_offset(net_handle)) << endl; + cerr << "Get next in chain after " << TrivialSnarlRecord(get_record_offset(net_handle), records).get_node_id(get_node_record_offset(net_handle)) << endl; #endif if (go_left && get_node_record_offset(net_handle) != 0) { //If we are going left and this is not the first node in the trivial snarl, @@ -5831,21 +5897,26 @@ string SnarlDistanceIndex::net_handle_as_string(const net_handle_t& net) const { net_handle_record_t type = get_handle_type(net); SnarlTreeRecord record (net, &snarl_tree_records); net_handle_record_t record_type = record.get_record_handle_type(); - string result; + string result = "@" + std::to_string(get_record_offset(net)) + "="; if (type == ROOT_HANDLE) { if (record.get_record_type() == ROOT_SNARL || record.get_record_type() == DISTANCED_ROOT_SNARL) { - return "root snarl"; + result += "root snarl"; + return result; } else { - return "root"; + result += "root"; + return result; } } else if (type == NODE_HANDLE) { if (ends_at(net) == starts_at(net)) { - return "node" + std::to_string( node_id(net)) + (ends_at(net) == START ? "rev" : "fd") + " that is the end node of a looping chain"; + result += "node" + std::to_string( node_id(net)) + (ends_at(net) == START ? "rev" : "fd") + " that is the end node of a looping chain"; + return result; } - return "node " + std::to_string( node_id(net)) + (ends_at(net) == START ? "rev" : "fd"); + result += "node " + std::to_string( node_id(net)) + (ends_at(net) == START ? "rev" : "fd"); + return result; } else if (type == SNARL_HANDLE) { if (record.get_record_type() == ROOT) { - return "root snarl"; + result += "root snarl"; + return result; } if (get_node_record_offset(net) == 1) { result += "simple snarl "; @@ -5856,11 +5927,13 @@ string SnarlDistanceIndex::net_handle_as_string(const net_handle_t& net) const { result += "snarl "; } } else if (type == CHAIN_HANDLE && record_type == NODE_HANDLE) { - return "node " + std::to_string( NodeRecord(net, &snarl_tree_records).get_node_id()) + result += "node " + std::to_string( NodeRecord(net, &snarl_tree_records).get_node_id()) + (ends_at(net) == START ? "rev" : "fd") + " pretending to be a chain"; + return result; } else if (type == CHAIN_HANDLE && record_type == SNARL_HANDLE) { - return "node " + std::to_string( SimpleSnarlRecord(net, &snarl_tree_records).get_node_id()) + result += "node " + std::to_string( SimpleSnarlRecord(net, &snarl_tree_records).get_node_id()) + (ends_at(net) == START ? "rev" : "fd") + " pretending to be a chain in a simple snarl"; + return result; }else if (type == CHAIN_HANDLE) { result += "chain "; } else if (type == SENTINEL_HANDLE) { From 41ccdf3410a3c4232d9d8aa99190997704fb3e15 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Thu, 29 Jan 2026 14:34:57 -0800 Subject: [PATCH 26/75] Stop trying to report types we aren't allowed to compute --- bdsg/src/snarl_distance_index.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bdsg/src/snarl_distance_index.cpp b/bdsg/src/snarl_distance_index.cpp index 43825a75..d2a5b08c 100644 --- a/bdsg/src/snarl_distance_index.cpp +++ b/bdsg/src/snarl_distance_index.cpp @@ -513,7 +513,7 @@ net_handle_t SnarlDistanceIndex::get_parent(const net_handle_t& child) const { } else if (get_record_type(snarl_tree_records->at(get_record_offset(child))) == SIMPLE_SNARL || get_record_type(snarl_tree_records->at(get_record_offset(child))) == DISTANCED_SIMPLE_SNARL) { #ifdef debug_distances - std::cerr << "Child " << net_handle_as_string(child) << " has simple snarl record type " << record_t_as_string.at(get_record_type(snarl_tree_records->at(get_record_offset(child)))) << " and default handle type " << net_handle_record_t_string.at(get_record_handle_type(get_record_type(get_record_offset(child)))) << " and current handle type " << net_handle_record_t_string.at(get_handle_type(child)) << std::endl; + std::cerr << "Child " << net_handle_as_string(child) << " has simple snarl record type " << record_t_as_string.at(get_record_type(snarl_tree_records->at(get_record_offset(child)))) << " and current handle type " << net_handle_record_t_string.at(get_handle_type(child)) << std::endl; #endif //If this is a simple snarl and a node or chain, then the parent offset doesn't change @@ -544,7 +544,7 @@ net_handle_t SnarlDistanceIndex::get_parent(const net_handle_t& child) const { net_handle_record_t parent_type = parent_record.get_record_handle_type(); #ifdef debug_distances - std::cerr << "Parent of " << net_handle_as_string(child) << " has record type " << record_t_as_string.at(parent_record.get_record_type()) << " and default handle type " << net_handle_record_t_string.at(parent_type) << std::endl; + std::cerr << "Parent of " << net_handle_as_string(child) << " has record type " << record_t_as_string.at(parent_record.get_record_type()) << std::endl; #endif //The connectivity of the parent defaults to start-end From a3948d447c18ed0515445e2f2e15dc66dc2cdde1 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Mon, 2 Feb 2026 13:46:37 -0800 Subject: [PATCH 27/75] Handle ROOT record type as also needing to canonicalize to the one true root handle --- bdsg/src/snarl_distance_index.cpp | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/bdsg/src/snarl_distance_index.cpp b/bdsg/src/snarl_distance_index.cpp index d2a5b08c..8264dc24 100644 --- a/bdsg/src/snarl_distance_index.cpp +++ b/bdsg/src/snarl_distance_index.cpp @@ -667,7 +667,7 @@ net_handle_t SnarlDistanceIndex::flip(const net_handle_t& net) const { net_handle_t SnarlDistanceIndex::canonical(const net_handle_t& net) const { SnarlTreeRecord record(net, &snarl_tree_records); record_t type = record.get_record_type(); - if (type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL) { + if (type == ROOT || type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL) { return get_root(); } @@ -1175,11 +1175,20 @@ size_t SnarlDistanceIndex::distance_in_parent(const net_handle_t& parent, const net_handle_t& child1, const net_handle_t& child2, const HandleGraph* graph, size_t distance_limit) const { #ifdef debug_distances + auto child1_parent = get_parent(child1); + auto child2_parent = get_parent(child2); cerr << "\t\tFind distance between " << net_handle_as_string(child1) << " and " << net_handle_as_string(child2) - << " facing back toward it in parent " << net_handle_as_string(parent) << endl; - assert(canonical(parent) == canonical(get_parent(child1))); - assert(canonical(parent) == canonical(get_parent(child2))); + << " facing back toward it in parent " << net_handle_as_string(canonical(parent)) << endl; + cerr << "\t\tChild parents are " << net_handle_as_string(canonical(child1_parent)) << " and " << net_handle_as_string(canonical(child2_parent)) << endl; + + if (canonical(parent) != canonical(child1_parent) || canonical(parent) != canonical(child2_parent)) { + std::cerr << "Error: parent mismatch!" << std::endl; + std::cerr << as_integer(canonical(parent)) << " = " << net_handle_as_string(canonical(parent)) << std::endl; + std::cerr << as_integer(canonical(child1_parent)) << " = " << net_handle_as_string(canonical(child1_parent)) << std::endl; + std::cerr << as_integer(canonical(child2_parent)) << " = " << net_handle_as_string(canonical(child2_parent)) << std::endl; + assert(false); + } #endif //Get the orientation of the children. This only cares about the end endpoint, and assumes that things that end @@ -5897,7 +5906,7 @@ string SnarlDistanceIndex::net_handle_as_string(const net_handle_t& net) const { net_handle_record_t type = get_handle_type(net); SnarlTreeRecord record (net, &snarl_tree_records); net_handle_record_t record_type = record.get_record_handle_type(); - string result = "@" + std::to_string(get_record_offset(net)) + "="; + string result = net_handle_record_t_string.at(type) + "@" + std::to_string(get_record_offset(net)) + "="; if (type == ROOT_HANDLE) { if (record.get_record_type() == ROOT_SNARL || record.get_record_type() == DISTANCED_ROOT_SNARL) { result += "root snarl"; From 060985545e6d6233de8d701f9cd6ec15ddf21f2b Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Mon, 2 Feb 2026 14:23:32 -0800 Subject: [PATCH 28/75] Use stringification wrappers to avoid mis-indexing the enum name tables --- bdsg/include/bdsg/snarl_distance_index.hpp | 40 ++++++++++++++++++++-- bdsg/src/snarl_distance_index.cpp | 12 +++---- 2 files changed, 43 insertions(+), 9 deletions(-) diff --git a/bdsg/include/bdsg/snarl_distance_index.hpp b/bdsg/include/bdsg/snarl_distance_index.hpp index a58313d3..18fac0a4 100644 --- a/bdsg/include/bdsg/snarl_distance_index.hpp +++ b/bdsg/include/bdsg/snarl_distance_index.hpp @@ -946,11 +946,45 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab private: /*Give each of the enum types a name for printing */ - // TODO: The names can;'t be here unless we give up using them in static methods. - const static vector record_t_as_string; - const static vector connectivity_t_as_string; + // TODO: The names can't be here unless we give up using them in static methods. + const static vector record_t_as_string; // Note that the enum for this one is 1-based but the names are still 0-based + const static vector connectivity_t_as_string; // Note that the enum for this one is 1-based but the names are still 0-based const static vector net_handle_record_t_string; + // To deal with different offsets for the different types we use accessors. + // TODO: Should we just make std::to_string overloads instead? + + /** + * Convert a record_t to a string. + */ + inline static std::string stringify(const record_t& v) { + if ((int)v > 0 && v - 1 < record_t_as_string.size()) { + return record_t_as_string[v - 1]; + } + return ""; + } + + /** + * Convert a connectivity_t to a string. + */ + inline static std::string stringify(const connectivity_t& v) { + if ((int)v > 0 && v - 1 < connectivity_t_as_string.size()) { + return connectivity_t_as_string[v - 1]; + } + return ""; + } + + /** + * Convert a net_handle_record_t to a string. + */ + inline static std::string stringify(const net_handle_record_t& v) { + // For this one, 0 is an allowed value. + if ((int)v >= 0 && v < net_handle_record_t_string.size()) { + return net_handle_record_t_string[v]; + } + return ""; + } + /* If this is 0, then don't store distances. * Otherwise, for snarls with more children than snarl_size_limit, only store the distances diff --git a/bdsg/src/snarl_distance_index.cpp b/bdsg/src/snarl_distance_index.cpp index 8264dc24..de7a57a0 100644 --- a/bdsg/src/snarl_distance_index.cpp +++ b/bdsg/src/snarl_distance_index.cpp @@ -513,7 +513,7 @@ net_handle_t SnarlDistanceIndex::get_parent(const net_handle_t& child) const { } else if (get_record_type(snarl_tree_records->at(get_record_offset(child))) == SIMPLE_SNARL || get_record_type(snarl_tree_records->at(get_record_offset(child))) == DISTANCED_SIMPLE_SNARL) { #ifdef debug_distances - std::cerr << "Child " << net_handle_as_string(child) << " has simple snarl record type " << record_t_as_string.at(get_record_type(snarl_tree_records->at(get_record_offset(child)))) << " and current handle type " << net_handle_record_t_string.at(get_handle_type(child)) << std::endl; + std::cerr << "Child " << net_handle_as_string(child) << " has simple snarl record type " << stringify(get_record_type(snarl_tree_records->at(get_record_offset(child)))) << " and current handle type " << stringify(get_handle_type(child)) << std::endl; #endif //If this is a simple snarl and a node or chain, then the parent offset doesn't change @@ -544,7 +544,7 @@ net_handle_t SnarlDistanceIndex::get_parent(const net_handle_t& child) const { net_handle_record_t parent_type = parent_record.get_record_handle_type(); #ifdef debug_distances - std::cerr << "Parent of " << net_handle_as_string(child) << " has record type " << record_t_as_string.at(parent_record.get_record_type()) << std::endl; + std::cerr << "Parent of " << net_handle_as_string(child) << " at " << parent_pointer << " has record type " << stringify(parent_record.get_record_type()) << std::endl; #endif //The connectivity of the parent defaults to start-end @@ -4280,7 +4280,7 @@ SnarlDistanceIndex::SnarlRecord::SnarlRecord (size_t pointer, const bdsg::yomo:: type == OVERSIZED_SNARL || type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL)) { - throw std::runtime_error("SnarlRecord record type " + std::to_string(type) + (type < record_t_as_string.size() ? (" " + record_t_as_string[type]) : "") + " at offset " + std::to_string(record_offset) + " is not an acceptable type for a SnarlRecord; maybe SimpleSnarlRecord should be used instead?"); + throw std::runtime_error("SnarlRecord record type " + std::to_string(type) + " " + stringify(type) + " at offset " + std::to_string(record_offset) + " is not an acceptable type for a SnarlRecord; maybe SimpleSnarlRecord should be used instead?"); } #endif } @@ -5188,7 +5188,7 @@ SnarlDistanceIndex::ChainRecord::ChainRecord (size_t pointer, const bdsg::yomo:: // Chain records as stored are allowed. return; } - throw std::runtime_error("ChainRecord with handle type " + std::to_string(type) + (type < net_handle_record_t_string.size() ? (" " + net_handle_record_t_string[type]) : "") + " and record type " + std::to_string(record_type) + (record_type < record_t_as_string.size() ? (" " + record_t_as_string[record_type]) : "") + " at offset " + std::to_string(record_offset) + " is not a node or a chain or a simple snarl"); + throw std::runtime_error("ChainRecord with handle type " + std::to_string(type) + " " + stringify(type) + " and record type " + std::to_string(record_type) + " " + stringify(record_type) + " at offset " + std::to_string(record_offset) + " is not a node or a chain or a simple snarl"); #endif } @@ -5222,7 +5222,7 @@ SnarlDistanceIndex::ChainRecord::ChainRecord (net_handle_t net, const bdsg::yomo // Chain records as stored are allowed. return; } - throw std::runtime_error("ChainRecord with handle type " + std::to_string(type) + (type < net_handle_record_t_string.size() ? (" " + net_handle_record_t_string[type]) : "") + " and record type " + std::to_string(record_type) + (record_type < record_t_as_string.size() ? (" " + record_t_as_string[record_type]) : "") + " at offset " + std::to_string(record_offset) + " is not a node or a chain or a simple snarl"); + throw std::runtime_error("ChainRecord with handle type " + std::to_string(type) + " " + stringify(type) + " and record type " + std::to_string(record_type) + " " + stringify(record_type) + " at offset " + std::to_string(record_offset) + " is not a node or a chain or a simple snarl"); #endif } @@ -5906,7 +5906,7 @@ string SnarlDistanceIndex::net_handle_as_string(const net_handle_t& net) const { net_handle_record_t type = get_handle_type(net); SnarlTreeRecord record (net, &snarl_tree_records); net_handle_record_t record_type = record.get_record_handle_type(); - string result = net_handle_record_t_string.at(type) + "@" + std::to_string(get_record_offset(net)) + "="; + string result = stringify(type) + "@" + std::to_string(get_record_offset(net)) + "="; if (type == ROOT_HANDLE) { if (record.get_record_type() == ROOT_SNARL || record.get_record_type() == DISTANCED_ROOT_SNARL) { result += "root snarl"; From 178857dbdbaf05fcb12586bf02ac789e581cc341 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Mon, 2 Feb 2026 14:24:27 -0800 Subject: [PATCH 29/75] Turn off debugging prints and some of the checks --- bdsg/src/snarl_distance_index.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bdsg/src/snarl_distance_index.cpp b/bdsg/src/snarl_distance_index.cpp index de7a57a0..4cff2375 100644 --- a/bdsg/src/snarl_distance_index.cpp +++ b/bdsg/src/snarl_distance_index.cpp @@ -1,6 +1,6 @@ -#define debug_distance_indexing +//#define debug_distance_indexing //#define debug_snarl_traversal -#define debug_distances +//#define debug_distances //#define debug_distance_paths #include "bdsg/snarl_distance_index.hpp" From 8a85c23624317919589a16ba7c433d23b8519c0c Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Mon, 2 Feb 2026 14:34:17 -0800 Subject: [PATCH 30/75] Flag off more debug prints --- bdsg/src/ch.cpp | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/bdsg/src/ch.cpp b/bdsg/src/ch.cpp index 2fb4e5c5..7f1a5976 100644 --- a/bdsg/src/ch.cpp +++ b/bdsg/src/ch.cpp @@ -3,7 +3,8 @@ file for quickly playing around with stuff */ #include "bdsg/ch.hpp" -#define debug_boost_graph +//#define debug_boost_graph +//#define debug_create namespace bdsg { bdsg::HashGraph make_test() { @@ -489,7 +490,9 @@ int get_hop_limit(CHOverlay& ov) { } void make_contraction_hierarchy(CHOverlay& ov) { +#ifdef debug_create cerr << "starting degree: " << (double)num_edges(ov)/num_vertices(ov) << endl; +#endif //thanks https://stackoverflow.com/questions/53490593/boostget-with-boostfiltered-graph-on-adjacency-list-with-netsed-properties for filtered_graph code auto contracted_filter = [&](CHOverlay::edge_descriptor eid) { return !(ov[eid].contracted); }; @@ -568,12 +571,16 @@ void make_contraction_hierarchy(CHOverlay& ov) { } } +#ifdef debug_create cerr << "num contr: " << num_con << endl; - cerr << "after round " << rnd+1 << " degree: " << (double)num_edges(ov)/num_vertices(ov) << endl; + cerr << "after round " << rnd+1 << " degree: " << (double)num_edges(ov)/num_vertices(ov) << endl; +#endif } +#ifdef debug_create cerr << "left over: " << num_vertices(ov) - num_con << endl; +#endif //std::fill(skip.begin(), skip.end(), false); //for (auto n: arti_pts) { skip[n] = true; } @@ -618,11 +625,13 @@ void make_contraction_hierarchy(CHOverlay& ov) { continue; } ov[node].level += 1; - + +#ifdef debug_create //if (queue_objs.size() % 100 == 1) { cerr << "remaining: " << queue_objs.size() << ", deg: " << (double)num_edges(ov)/num_vertices(ov) << endl; cerr << "lv: " << ov[node].level << endl; - //} + //} +#endif ov[node].new_id = num_vertices(ov)-1-num_con; @@ -911,7 +920,9 @@ void test_dijk_back(int node, CHOverlay& ov, vector& node_dists, vect } void create_labels(vector>& labels, vector>& labels_back, CHOverlay& ov) { +#ifdef debug_create cerr << "start create labels" << endl; +#endif vector node_dists(num_vertices(ov), INF_INT); vector v; v.resize(num_vertices(ov)); for (auto i = 0u; i < num_vertices(ov); i++) { @@ -920,8 +931,10 @@ void create_labels(vector>& labels, vector>& for (auto j = 0u; j < num_vertices(ov); j++) { +#ifdef debug_create //if (j % 100 == 1) { - cerr << j << "th node, " << v[j] << endl; + cerr << j << "th node, " << v[j] << endl; +#endif //cerr << "starting dijkstra: " << endl; down_dijk_back(v[j], ov, node_dists, labels, labels_back); From d429581e179d11857c7dd5bfab62409e5e7f6fdf Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Tue, 10 Feb 2026 19:53:18 -0500 Subject: [PATCH 31/75] Adjust debugging for better diffing and add a distance promotion function --- bdsg/deps/libhandlegraph | 2 +- bdsg/include/bdsg/ch.hpp | 3 +- bdsg/include/bdsg/landmark.hpp | 9 +++ bdsg/include/bdsg/snarl_distance_index.hpp | 12 +++- bdsg/src/snarl_distance_index.cpp | 77 +++++++++++++++++----- 5 files changed, 81 insertions(+), 22 deletions(-) diff --git a/bdsg/deps/libhandlegraph b/bdsg/deps/libhandlegraph index 0e70dadb..ec2da41d 160000 --- a/bdsg/deps/libhandlegraph +++ b/bdsg/deps/libhandlegraph @@ -1 +1 @@ -Subproject commit 0e70dadb5054568d8071e280b3b7b11b5658937f +Subproject commit ec2da41d955e30366b6366b8760fd8646e2c0000 diff --git a/bdsg/include/bdsg/ch.hpp b/bdsg/include/bdsg/ch.hpp index 0b3fda5c..1dd8ab7b 100644 --- a/bdsg/include/bdsg/ch.hpp +++ b/bdsg/include/bdsg/ch.hpp @@ -38,7 +38,8 @@ NODE_UINT bgid(const handle_t& h, const bdsg::HashGraph& hg); * For net graphs, we need to distinguish between source and destination status * to allow turning around within a child chain without traversing the full * length of the chain. Each child chain needs to be represented by a subgraph - * with different in and out "port" nodes in each orientation. + * with different in and out "port" nodes in each orientation. The source port + * is the one you would leave the node from in that orientation. */ NODE_UINT bgid(size_t net_rank, bool is_reverse, bool is_source); diff --git a/bdsg/include/bdsg/landmark.hpp b/bdsg/include/bdsg/landmark.hpp index b2c0052f..23b0a8a5 100644 --- a/bdsg/include/bdsg/landmark.hpp +++ b/bdsg/include/bdsg/landmark.hpp @@ -18,6 +18,15 @@ namespace bdsg { #define DIST_UINT uint32_t #define ARR2D_OFFSET 1 +/// Allow promoting a DIST_UINT to a different type, translating infinities to the type's max limit. +template +OtherInt promote_distance(DIST_UINT val) { + if (val == INF_INT) { + return std::numeric_limits::max(); + } + return (OtherInt) val; +} + typedef int NodeId; typedef int NodesideId; typedef enum EnterDir {OTHER_NODESIDE=0,OTHER_NODE=1} EnterDir; diff --git a/bdsg/include/bdsg/snarl_distance_index.hpp b/bdsg/include/bdsg/snarl_distance_index.hpp index 18fac0a4..2ec09467 100644 --- a/bdsg/include/bdsg/snarl_distance_index.hpp +++ b/bdsg/include/bdsg/snarl_distance_index.hpp @@ -621,13 +621,15 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab ///SIMPLE_SNARL is a snarl with all children connecting only to the boundary nodes in one direction (ie, a bubble). ///TRIVIAL_SNARL represents consecutive nodes in a chain. ///NODE represents a node that is a trivial chain. A node can only be the child of a snarl. - ///OVERSIZED_SNARL only stores distances to the boundaries. + ///OVERSIZED_SNARL stores hub labeling data to compute distances. ///ROOT_SNARL represents a connected component of the root. It has no start or end node so /// its children technically belong to the root. ///MULTICOMPONENT_CHAIN can represent a chain with snarls that are not start-end connected. /// The chain is split up into components between these snarls, each node is tagged with /// which component it belongs to. /// + /// TODO: What is a CHILDREN record? Is it ever used? + /// enum record_t {ROOT=1, NODE, DISTANCED_NODE, TRIVIAL_SNARL, DISTANCED_TRIVIAL_SNARL, @@ -1700,12 +1702,18 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab handlegraph::nid_t end_node_id; size_t end_node_length=0; size_t node_count=0; - size_t min_length = std::numeric_limits::max(); //Not including boundary nodes + /// Minimum distance across the snarl from start to end, not + /// including boundary nodes. + size_t min_length = std::numeric_limits::max(); size_t max_length = 0; size_t max_distance = 0; size_t tree_depth = 0; //TODO: This isn't used but I left it because I couldn't get the python bindings to build when I changed it + /// Minimum distance from the start back to itself within the + /// snarl, not including boundary nodes. size_t distance_start_start = std::numeric_limits::max(); + /// Minimum distance from the end back to itself within the snarl, + /// not including boundary nodes. size_t distance_end_end = std::numeric_limits::max(); size_t rank_in_parent=0; diff --git a/bdsg/src/snarl_distance_index.cpp b/bdsg/src/snarl_distance_index.cpp index 4cff2375..ae4d377b 100644 --- a/bdsg/src/snarl_distance_index.cpp +++ b/bdsg/src/snarl_distance_index.cpp @@ -1,6 +1,7 @@ //#define debug_distance_indexing //#define debug_snarl_traversal //#define debug_distances +//#define debug_parent //#define debug_distance_paths #include "bdsg/snarl_distance_index.hpp" @@ -512,7 +513,7 @@ net_handle_t SnarlDistanceIndex::get_parent(const net_handle_t& child) const { throw runtime_error("error: trying to find the parent of the root"); } else if (get_record_type(snarl_tree_records->at(get_record_offset(child))) == SIMPLE_SNARL || get_record_type(snarl_tree_records->at(get_record_offset(child))) == DISTANCED_SIMPLE_SNARL) { -#ifdef debug_distances +#ifdef debug_parent std::cerr << "Child " << net_handle_as_string(child) << " has simple snarl record type " << stringify(get_record_type(snarl_tree_records->at(get_record_offset(child)))) << " and current handle type " << stringify(get_handle_type(child)) << std::endl; #endif @@ -521,14 +522,14 @@ net_handle_t SnarlDistanceIndex::get_parent(const net_handle_t& child) const { // If this is a node, then return it as a chain // TODO: Why can a simple snarl need to look like a node itself? // TODO: Why can a simple snarl need to look like a chain? Because the node needs to look like a chain? -#ifdef debug_distances +#ifdef debug_parent std::cerr << "We were looking at a simple snarl as a node; project it as a chain." << std::endl; #endif return get_net_handle_from_values(get_record_offset(child), child_connectivity, CHAIN_HANDLE, get_node_record_offset(child)); } else if (get_handle_type(child) == CHAIN_HANDLE) { //If this is a chain, then return the same thing as a snarl -#ifdef debug_distances +#ifdef debug_parent std::cerr << "We were looking at a simple snarl as a chain; project it as a snarl." << std::endl; #endif return get_net_handle_from_values(get_record_offset(child), START_END, SNARL_HANDLE, 1); @@ -543,7 +544,7 @@ net_handle_t SnarlDistanceIndex::get_parent(const net_handle_t& child) const { SnarlTreeRecord parent_record(parent_pointer, &snarl_tree_records); net_handle_record_t parent_type = parent_record.get_record_handle_type(); -#ifdef debug_distances +#ifdef debug_parent std::cerr << "Parent of " << net_handle_as_string(child) << " at " << parent_pointer << " has record type " << stringify(parent_record.get_record_type()) << std::endl; #endif @@ -566,7 +567,7 @@ net_handle_t SnarlDistanceIndex::get_parent(const net_handle_t& child) const { //If this is a node and it's parent is not a chain, we want to pretend that its //parent is a chain version of the child net_handle_t projected = get_net_handle_from_values(get_record_offset(child), child_connectivity, CHAIN_HANDLE, get_node_record_offset(child)); -#ifdef debug_distances +#ifdef debug_parent std::cerr << "Parent of " << net_handle_as_string(child) << " projected as " << net_handle_as_string(projected) << std::endl; #endif @@ -1248,6 +1249,10 @@ size_t SnarlDistanceIndex::distance_in_parent(const net_handle_t& parent, } else if (is_chain(parent)) { if (get_record_handle_type(get_record_type(snarl_tree_records->at(get_record_offset(parent)))) == NODE_HANDLE || get_record_handle_type(get_record_type(snarl_tree_records->at(get_record_offset(parent)))) == SNARL_HANDLE) { + // TODO: Why would this happen? +#ifdef debug_distances + std::cerr << "=>They are not reachable because this chain is really a node or snarl(???)" << std::endl; +#endif return std::numeric_limits::max(); } ChainRecord chain_record(parent, &snarl_tree_records); @@ -1344,6 +1349,10 @@ size_t SnarlDistanceIndex::distance_in_parent(const net_handle_t& parent, //If the snarls are adjacent (and not the same snarl) return node_length2;//return the node length } + +#ifdef debug_distances + std::cerr << "=>Measure chain distance between chain ranks " << rank_in_chain1 << " and " << rank_in_chain2 << std::endl; +#endif return sum(chain_record.get_distance(rank_in_chain1, go_left1, node_length1, prefix_sum1, @@ -1428,15 +1437,11 @@ size_t SnarlDistanceIndex::distance_in_parent(const net_handle_t& parent, // But that's weird and makes no sense that we would need to do // that! // - // Because this function takes the destination as facing back - // towards the start, but hhl_query takes it as facing along the - // connecting path, we need to flip the second orientation one more - // time. - size_t distance = hhl_query(length_data_it + 1, bgid(rank1, rev1 ^ !is_sentinel(child1), true), bgid(rank2, !(rev2 ^ !is_sentinel(child2)), false)); - if (distance == bdsg::INF_INT) { - // Promote unreachable sentinel to wider type. - distance = std::numeric_limits::max(); - } + // Because the function we're in takes the destination as facing + // back towards the start, but hhl_query takes it as facing along + // the connecting path, we need to flip the second orientation one + // more time. + size_t distance = promote_distance(hhl_query(length_data_it + 1, bgid(rank1, rev1 ^ !is_sentinel(child1), true), bgid(rank2, !(rev2 ^ !is_sentinel(child2)), false))); #ifdef debug_distances cerr << " Resulting distance: " << distance << endl; #endif @@ -5281,10 +5286,18 @@ size_t SnarlDistanceIndex::ChainRecord::get_distance(size_t rank1, bool left_sid throw runtime_error("error: Trying to get chain distances from a node"); } #endif - - if (get_record_type() == MULTICOMPONENT_CHAIN) { + + record_t record_type = get_record_type(); + if (record_type == MULTICOMPONENT_CHAIN) { if (component1 != component2) { +#ifdef debug_distances + std::cerr << "Ranks " << rank1 << " and " << rank2 << " are in different multicomponent chain components." << std::endl; +#endif if (is_looping_chain) { +#ifdef debug_distances + std::cerr << "Chain is looping." << std::endl; +#endif + //If this is a looping chain, then the first/last node could be in two //components return get_distance_taking_chain_loop(rank1, left_side1, node_length1, @@ -5292,9 +5305,14 @@ size_t SnarlDistanceIndex::ChainRecord::get_distance(size_t rank1, bool left_sid rank2, left_side2, node_length2, prefix_sum2, forward_loop2, reverse_loop2, end_component2); } else { +#ifdef debug_distances + std::cerr << "Chain is not looping." << std::endl; +#endif return std::numeric_limits::max(); } } + } else if (record_type != CHAIN && record_type != DISTANCED_CHAIN) { + std::cerr << "Warning: weird record type for chain: " << stringify(record_type) << std::endl; } @@ -5305,30 +5323,50 @@ size_t SnarlDistanceIndex::ChainRecord::get_distance(size_t rank1, bool left_sid if (rank1 == rank2) { //If these are the same node, then the path would need to go around the node distance = sum(sum(forward_loop1,reverse_loop2),node_length1); +#ifdef debug_distances + std::cerr << "Distance around shared node is " << distance << std::endl; +#endif } else { distance = minus(prefix_sum2 - prefix_sum1, node_length1); +#ifdef debug_distances + std::cerr << "Distance forward along chain is " << distance << std::endl; +#endif } } else if (!left_side1 && !left_side2) { //Right side of 1 and right side of 2 if (rank1 == rank2) { distance = forward_loop2; - +#ifdef debug_distances + std::cerr << "Distance on right self loop is " << distance << std::endl; +#endif } else { distance = minus( sum(sum(prefix_sum2 - prefix_sum1, node_length2), forward_loop2), node_length1); +#ifdef debug_distances + std::cerr << "Distance from right to other right is " << distance << std::endl; +#endif } } else if (left_side1 && left_side2) { //Left side of 1 and left side of 2 if (rank1 == rank2) { distance = reverse_loop1; +#ifdef debug_distances + std::cerr << "Distance on left slef loop is " << distance << std::endl; +#endif } else { distance = sum(prefix_sum2 - prefix_sum1, reverse_loop1); +#ifdef debug_distances + std::cerr << "Distance from left to other left " << distance << std::endl; +#endif } } else { //Left side of 1 and right side of 2 distance = sum(sum(sum(prefix_sum2 - prefix_sum1, reverse_loop1), forward_loop2), node_length2); +#ifdef debug_distances + std::cerr << "Distance back along chain is " << distance << std::endl; +#endif } if (is_looping_chain) { @@ -5336,6 +5374,9 @@ size_t SnarlDistanceIndex::ChainRecord::get_distance(size_t rank1, bool left_sid prefix_sum1, forward_loop1, reverse_loop1, end_component1, rank2, left_side2, node_length2, prefix_sum2, forward_loop2, reverse_loop2, end_component2)); +#ifdef debug_distances + std::cerr << "After handling looping, distance is " << distance << std::endl; +#endif } return distance; } @@ -5906,7 +5947,7 @@ string SnarlDistanceIndex::net_handle_as_string(const net_handle_t& net) const { net_handle_record_t type = get_handle_type(net); SnarlTreeRecord record (net, &snarl_tree_records); net_handle_record_t record_type = record.get_record_handle_type(); - string result = stringify(type) + "@" + std::to_string(get_record_offset(net)) + "="; + string result = stringify(type) + " "; if (type == ROOT_HANDLE) { if (record.get_record_type() == ROOT_SNARL || record.get_record_type() == DISTANCED_ROOT_SNARL) { result += "root snarl"; From 43a6bc7a0f45d588f4c0dc75b8231b1797561f5f Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 11 Feb 2026 14:25:46 -0500 Subject: [PATCH 32/75] Turn on debugging and add more query prints --- bdsg/src/ch.cpp | 4 +-- bdsg/src/snarl_distance_index.cpp | 60 +++++++++++++++++++++++-------- 2 files changed, 47 insertions(+), 17 deletions(-) diff --git a/bdsg/src/ch.cpp b/bdsg/src/ch.cpp index 7f1a5976..c82f03fb 100644 --- a/bdsg/src/ch.cpp +++ b/bdsg/src/ch.cpp @@ -3,8 +3,8 @@ file for quickly playing around with stuff */ #include "bdsg/ch.hpp" -//#define debug_boost_graph -//#define debug_create +#define debug_boost_graph +#define debug_create namespace bdsg { bdsg::HashGraph make_test() { diff --git a/bdsg/src/snarl_distance_index.cpp b/bdsg/src/snarl_distance_index.cpp index ae4d377b..4accf191 100644 --- a/bdsg/src/snarl_distance_index.cpp +++ b/bdsg/src/snarl_distance_index.cpp @@ -1,6 +1,6 @@ -//#define debug_distance_indexing +#define debug_distance_indexing //#define debug_snarl_traversal -//#define debug_distances +#define debug_distances //#define debug_parent //#define debug_distance_paths @@ -1395,7 +1395,11 @@ size_t SnarlDistanceIndex::distance_in_parent(const net_handle_t& parent, #endif if (get_record_type(snarl_tree_records->at(get_record_offset(parent))) == DISTANCED_SIMPLE_SNARL) { - return SimpleSnarlRecord(parent, &snarl_tree_records).get_distance(rank1, rev1, rank2, rev2); + auto result = SimpleSnarlRecord(parent, &snarl_tree_records).get_distance(rank1, rev1, rank2, rev2); +#ifdef debug_distances + std::cerr << " Retrieving simple snarl value: " << result << endl; +#endif + return result; } else if (get_record_type(snarl_tree_records->at(get_record_offset(parent))) == OVERSIZED_SNARL) { #ifdef debug_distances cerr << " Performing HHL query" << endl; @@ -1449,13 +1453,25 @@ size_t SnarlDistanceIndex::distance_in_parent(const net_handle_t& parent, } else if (rank1 == 0 && rank2 == 0 && !snarl_is_root) { //Start to start is stored in the snarl - return SnarlRecord(parent, &snarl_tree_records).get_distance_start_start(); + auto result = SnarlRecord(parent, &snarl_tree_records).get_distance_start_start(); +#ifdef debug_distances + std::cerr << " Retrieving snarl start-start value: " << result << endl; +#endif + return result; } else if ((rank1 == 0 && rank2 == 1) || (rank1 == 1 && rank2 == 0) && !snarl_is_root) { //start to end / end to start is stored in the snarl - return SnarlRecord(parent, &snarl_tree_records).get_min_length(); + auto result = SnarlRecord(parent, &snarl_tree_records).get_min_length(); +#ifdef debug_distances + std::cerr << " Retrieving snarl min-length value: " << result << endl; +#endif + return result; } else if (rank1 == 1 && rank2 == 1 && !snarl_is_root) { //end to end is stored in the snarl - return SnarlRecord(parent, &snarl_tree_records).get_distance_end_end(); + auto result = SnarlRecord(parent, &snarl_tree_records).get_distance_end_end(); +#ifdef debug_distances + std::cerr << " Retrieving snarl end-end value: " << result << endl; +#endif + return result; } else if ((rank1 == 0 || rank1 == 1 || rank2 == 0 || rank2 == 1) && !snarl_is_root) { //If one node is a boundary and the other is a child size_t boundary_rank = (rank1 == 0 || rank1 == 1) ? rank1 : rank2; @@ -1463,37 +1479,51 @@ size_t SnarlDistanceIndex::distance_in_parent(const net_handle_t& parent, bool internal_is_reversed = (rank1 == 0 || rank1 == 1) ? rev2 : rev1; if (is_trivial_chain( internal_child) ) { //Child is just a node pretending to be a chain + size_t result; if (boundary_rank == 0 && !internal_is_reversed) { //Start to left of child - return NodeRecord(internal_child, &snarl_tree_records).get_distance_left_start(); + result = NodeRecord(internal_child, &snarl_tree_records).get_distance_left_start(); } else if (boundary_rank == 0 && internal_is_reversed) { //Start to right of child - return NodeRecord(internal_child, &snarl_tree_records).get_distance_right_start(); + result = NodeRecord(internal_child, &snarl_tree_records).get_distance_right_start(); } else if (boundary_rank == 1 && !internal_is_reversed) { //End to left of child - return NodeRecord(internal_child, &snarl_tree_records).get_distance_left_end(); + result = NodeRecord(internal_child, &snarl_tree_records).get_distance_left_end(); } else { //End to right of child - return NodeRecord(internal_child, &snarl_tree_records).get_distance_right_end(); + result = NodeRecord(internal_child, &snarl_tree_records).get_distance_right_end(); } +#ifdef debug_distances + std::cerr << " Retrieving node record value: " << result << endl; +#endif + return result; } else { //Child is an actual chain + size_t result; if (boundary_rank == 0 && !internal_is_reversed) { //Start to left of child - return ChainRecord(internal_child, &snarl_tree_records).get_distance_left_start(); + result = ChainRecord(internal_child, &snarl_tree_records).get_distance_left_start(); } else if (boundary_rank == 0 && internal_is_reversed) { //Start to right of child - return ChainRecord(internal_child, &snarl_tree_records).get_distance_right_start(); + result = ChainRecord(internal_child, &snarl_tree_records).get_distance_right_start(); } else if (boundary_rank == 1 && !internal_is_reversed) { //End to left of child - return ChainRecord(internal_child, &snarl_tree_records).get_distance_left_end(); + result = ChainRecord(internal_child, &snarl_tree_records).get_distance_left_end(); } else { //End to right of child - return ChainRecord(internal_child, &snarl_tree_records).get_distance_right_end(); + result = ChainRecord(internal_child, &snarl_tree_records).get_distance_right_end(); } +#ifdef debug_distances + std::cerr << " Retrieving chain record value: " << result << endl; +#endif + return result; } } else { - return SnarlRecord(parent, &snarl_tree_records).get_distance(rank1, rev1, rank2, rev2); + auto result = SnarlRecord(parent, &snarl_tree_records).get_distance(rank1, rev1, rank2, rev2); +#ifdef debug_distances + std::cerr << " Retrieving snarl record value: " << result << endl; +#endif + return result; } } else { throw runtime_error("error: Trying to find distance in the wrong type of handle"); From baa9b4910952cd3208793df4f39594c3b4ac0cd1 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 11 Feb 2026 17:36:45 -0500 Subject: [PATCH 33/75] Rename orientation flags and start collecting constraints on them to divine the correct way to use them --- bdsg/src/snarl_distance_index.cpp | 48 +++++++++++++++++-------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/bdsg/src/snarl_distance_index.cpp b/bdsg/src/snarl_distance_index.cpp index 4accf191..984fd741 100644 --- a/bdsg/src/snarl_distance_index.cpp +++ b/bdsg/src/snarl_distance_index.cpp @@ -1251,7 +1251,7 @@ size_t SnarlDistanceIndex::distance_in_parent(const net_handle_t& parent, get_record_handle_type(get_record_type(snarl_tree_records->at(get_record_offset(parent)))) == SNARL_HANDLE) { // TODO: Why would this happen? #ifdef debug_distances - std::cerr << "=>They are not reachable because this chain is really a node or snarl(???)" << std::endl; + std::cerr << "=>They are not reachable because this chain is really a node or snarl(?!)" << std::endl; #endif return std::numeric_limits::max(); } @@ -1366,20 +1366,20 @@ size_t SnarlDistanceIndex::distance_in_parent(const net_handle_t& parent, } else if (is_snarl(parent)) { bool snarl_is_root = is_root_snarl(parent); - size_t rank1, rank2; bool rev1, rev2; + size_t rank1, rank2; bool dir1, dir2; if (is_sentinel(child1)) { rank1 = starts_at(child1) == START ? 0 : 1; - rev1 = false; + dir1 = false; } else { rank1 = get_rank_in_parent(child1); - rev1 = !child_ends_at_start1; + dir1 = !child_ends_at_start1; } if (is_sentinel(child2)) { rank2 = starts_at(child2) == START ? 0 : 1; - rev2 = false; + dir2 = false; } else { rank2 = get_rank_in_parent(child2); - rev2 = !child_ends_at_start2; + dir2 = !child_ends_at_start2; } if ((is_sentinel(child1) && starts_at(child1) == ends_at(child1)) || (is_sentinel(child2) && starts_at(child2) == ends_at(child2)) ) { @@ -1391,11 +1391,11 @@ size_t SnarlDistanceIndex::distance_in_parent(const net_handle_t& parent, } #ifdef debug_distances - cerr << " between ranks " << rank1 << " " << rev1 << " " << rank2 << " " << rev2 << endl; + cerr << " between ranks " << rank1 << " " << dir1 << " " << rank2 << " " << dir2 << endl; #endif if (get_record_type(snarl_tree_records->at(get_record_offset(parent))) == DISTANCED_SIMPLE_SNARL) { - auto result = SimpleSnarlRecord(parent, &snarl_tree_records).get_distance(rank1, rev1, rank2, rev2); + auto result = SimpleSnarlRecord(parent, &snarl_tree_records).get_distance(rank1, dir1, rank2, dir2); #ifdef debug_distances std::cerr << " Retrieving simple snarl value: " << result << endl; #endif @@ -1433,19 +1433,23 @@ size_t SnarlDistanceIndex::distance_in_parent(const net_handle_t& parent, // TODO: Probably need to also flip for is_reversed_in_parent() to // account for this. // - // TODO: rev1 and rev2 seem to actually be backwards for - // non-sentinel children (rev1 is true if child1's traversal - // *doesn't* end at its start, and thus ends at its end). Right now - // we address this by flipping them. We flip them and query - // going out the end of child 1 if its traversal ends at its end. - // But that's weird and makes no sense that we would need to do - // that! + // TODO: dir1 and dir2 aren't just normal is_reverse flags. // - // Because the function we're in takes the destination as facing - // back towards the start, but hhl_query takes it as facing along - // the connecting path, we need to flip the second orientation one - // more time. - size_t distance = promote_distance(hhl_query(length_data_it + 1, bgid(rank1, rev1 ^ !is_sentinel(child1), true), bgid(rank2, !(rev2 ^ !is_sentinel(child2)), false))); + // For a sentinel rank 1 (end node) as rank1, dir1 false needs to mean into the snarl (so start of end node, reverse strand). + // For a sentinel rank 0 (start node) as rank1, dir1 false needs to mean into the snarl (so end of start node, forward strand). + // + // For a node as rank2, with its end connected to rank1, dir2 true needs to mean towards the thing attached to its end. If we're not a source, that means it must be reverse strand. + + size_t from_port = bgid(rank1, dir1 ^ (rank1 == 1), true); +#ifdef debug_distances + std::cerr << " Query from vertex " << from_port << " = rank " << rank1 << " " << (dir1 ? "rev" : "fd") << " " << (is_sentinel(child1)? "sentinel" : "non-sentinel") << ", source" << std::endl; +#endif + size_t to_port = bgid(rank2, dir2, false); +#ifdef debug_distances + std::cerr << " Query to vertex " << to_port << " = rank " << rank2 << " " << (dir2 ? "rev" : "fd") << " " << (is_sentinel(child2)? "sentinel" : "non-sentinel") << ", non-source" << std::endl; +#endif + + size_t distance = promote_distance(hhl_query(length_data_it + 1, from_port, to_port)); #ifdef debug_distances cerr << " Resulting distance: " << distance << endl; #endif @@ -1476,7 +1480,7 @@ size_t SnarlDistanceIndex::distance_in_parent(const net_handle_t& parent, //If one node is a boundary and the other is a child size_t boundary_rank = (rank1 == 0 || rank1 == 1) ? rank1 : rank2; const net_handle_t& internal_child = (rank1 == 0 || rank1 == 1) ? child2 : child1; - bool internal_is_reversed = (rank1 == 0 || rank1 == 1) ? rev2 : rev1; + bool internal_is_reversed = (rank1 == 0 || rank1 == 1) ? dir2 : dir1; if (is_trivial_chain( internal_child) ) { //Child is just a node pretending to be a chain size_t result; @@ -1519,7 +1523,7 @@ size_t SnarlDistanceIndex::distance_in_parent(const net_handle_t& parent, return result; } } else { - auto result = SnarlRecord(parent, &snarl_tree_records).get_distance(rank1, rev1, rank2, rev2); + auto result = SnarlRecord(parent, &snarl_tree_records).get_distance(rank1, dir1, rank2, dir2); #ifdef debug_distances std::cerr << " Retrieving snarl record value: " << result << endl; #endif From ee08df26a4828936db0fbfc53ba75243a803a36f Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Fri, 13 Feb 2026 12:55:48 -0500 Subject: [PATCH 34/75] Turn on debugging again and fix adding to infinite unreachable loop distances --- bdsg/include/bdsg/landmark.hpp | 20 ++++++++++++++++++-- bdsg/include/bdsg/snarl_distance_index.hpp | 12 +++++++----- bdsg/src/ch.cpp | 22 +++++++++++++--------- bdsg/src/landmark.cpp | 10 +++++----- bdsg/src/snarl_distance_index.cpp | 4 +++- 5 files changed, 46 insertions(+), 22 deletions(-) diff --git a/bdsg/include/bdsg/landmark.hpp b/bdsg/include/bdsg/landmark.hpp index 23b0a8a5..417ecb65 100644 --- a/bdsg/include/bdsg/landmark.hpp +++ b/bdsg/include/bdsg/landmark.hpp @@ -27,6 +27,24 @@ OtherInt promote_distance(DIST_UINT val) { return (OtherInt) val; } +/// Allow demoting a DIST_UINT from a different type, translating infinities +/// from the type's max limit and erroring on unrepresentably large values. +template +DIST_UINT demote_distance(OtherInt val) { + if (val == std::numeric_limits::max()) { + return INF_INT; + } + if (val > (OtherInt) INF_INT) { + throw std::overflow_error("Cannot store excessively wide value " + std::to_string(val) + " in " + std::to_string(DIST_NBITS) + " bits for hub labeling"); + } + return (DIST_UINT) val; +} + +/// Sum two distances, propagating infinities. +/// Does not check for overlfow. +/// TODO: We're not really sure if our distances are ints or uints and we freely mix them when we shouldn't. +int addInt(int a, int b); + typedef int NodeId; typedef int NodesideId; typedef enum EnterDir {OTHER_NODESIDE=0,OTHER_NODE=1} EnterDir; @@ -143,8 +161,6 @@ class SdslArray2D { } }; -int addInt(int a, int b); - /* following functions assume input graph node ids go from 1...N */ diff --git a/bdsg/include/bdsg/snarl_distance_index.hpp b/bdsg/include/bdsg/snarl_distance_index.hpp index 2ec09467..8691e25f 100644 --- a/bdsg/include/bdsg/snarl_distance_index.hpp +++ b/bdsg/include/bdsg/snarl_distance_index.hpp @@ -1,7 +1,7 @@ #ifndef BDSG_SNARL_DISTANCE_HPP_INCLUDED #define BDSG_SNARL_DISTANCE_HPP_INCLUDED -//#define debug_distance_indexing +#define debug_distance_indexing //#define count_allocations #include @@ -582,9 +582,11 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab ///and end. net_handle_t get_parent_traversal(const net_handle_t& traversal_start, const net_handle_t& traversal_end) const; - private: - + + // TODO: This is apparently private because it does not actually work, and right now just prints a message to that effect. + // handlegraph::algorithms::for_each_handle_in_shortest_path() is available instead, but it doesn't use the index. + // ///Function to walk through the shortest path between the two nodes+orientations. Orientation is the same as for minimum_distance - ///traverses from the first node going forward to the second node going forward. ///Calls iteratee on each node of the shortest path between the nodes and the distance to the start of that node @@ -1687,8 +1689,8 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab //TODO This would probably be more efficient as a vector of a struct of five ints vector prefix_sum; vector max_prefix_sum; - vector forward_loops; - vector backward_loops; + vector forward_loops; // If no loop is possible, an entry will be std::numeric_limits::max() + vector backward_loops; // If no loop is possible, an entry will be std::numeric_limits::max() vector chain_components;//Which component does each node belong to, usually all 0s diff --git a/bdsg/src/ch.cpp b/bdsg/src/ch.cpp index c82f03fb..0f27b92c 100644 --- a/bdsg/src/ch.cpp +++ b/bdsg/src/ch.cpp @@ -4,7 +4,7 @@ file for quickly playing around with stuff #include "bdsg/ch.hpp" #define debug_boost_graph -#define debug_create +//#define debug_create namespace bdsg { bdsg::HashGraph make_test() { @@ -124,24 +124,28 @@ CHOverlay make_boost_graph(const SnarlDistanceIndex::TemporaryDistanceIndex& tem end_handle = hgraph->get_handle(record.end_node_id, record.end_node_rev); // Fetch straight-through distance. - // TODO: What value does this have if straight-through is unreachable? Then we want INF_INT. - start_end_distance = record.min_length; + // Will be std::numeric_limits::max() if unconnected. + start_end_distance = demote_distance(record.min_length); // Fetch looping distances (thanks Xian!) - // TODO: What's the representation for "not connected"? Is it not having a value or is it having a sentinel value we need to translate to INF_INT here? - if (!record.forward_loops.empty()) { + // If no loop is actually possible, the loop value will be std::numeric_limits::max() + if (!record.forward_loops.empty() && record.forward_loops[0] != std::numeric_limits::max()) { // We know a chain always has a first child that's a node, so we can // get the start node length. auto& first_child = record.children.front(); assert(first_child.first == bdsg::SnarlDistanceIndex::TEMP_NODE); - DIST_UINT start_node_length = temp_index.get_node(first_child).node_length; - start_start_distance = record.forward_loops[0] + (2 * start_node_length); + DIST_UINT start_node_length = demote_distance(temp_index.get_node(first_child).node_length); + // We know nothing can be infinite-distance here. + // TODO: Check for overflow? + start_start_distance = demote_distance(record.forward_loops[0]) + 2 * start_node_length; } else { start_start_distance = INF_INT; } - if (!record.backward_loops.empty()) { + if (!record.backward_loops.empty() && record.backward_loops.back() != std::numeric_limits::max()) { // The end node length is already helpfully stored for us. - end_end_distance = record.backward_loops.back() + (2 * record.end_node_length); + // // We know nothing can be infinite-distance here. + // TODO: Check for overflow? + end_end_distance = record.backward_loops.back() + 2 * demote_distance(record.end_node_length); } else { end_end_distance = INF_INT; } diff --git a/bdsg/src/landmark.cpp b/bdsg/src/landmark.cpp index ed1be9eb..e457c012 100644 --- a/bdsg/src/landmark.cpp +++ b/bdsg/src/landmark.cpp @@ -122,7 +122,7 @@ void fill_other_nodeside_dists(SdslArray2D& dist_table, HashGraph& g) { if (dist_table.get(i,ns) != INF_INT) { NodesideId other_ns = other_nodeside(ns); NodeId node = nodeside_to_node(ns); - dist_table.set(i, other_ns, min(addInt(dist_table.get(i,ns), g.get_length(g.get_handle(node))), static_cast(dist_table.get(i,other_ns)))); + dist_table.set(i, other_ns, min(addInt(dist_table.get(i,ns), demote_distance(g.get_length(g.get_handle(node)))), static_cast(dist_table.get(i,other_ns)))); } } } @@ -259,8 +259,8 @@ SdslArray2D dijkstra(bdsg::HashGraph& g, NodesideId start, NodesideId stop_ns, i } else { //came here from a nodeside of a node that =/= cur_nodeside's node - handle_t handle = g.get_handle(node_id, !nodeside_left(cur_nodeside)); - int handle_len = g.get_length(handle); + handle_t handle = g.get_handle(node_id, !nodeside_left(cur_nodeside)); + int handle_len = demote_distance(g.get_length(handle)); NodesideId other_ns = other_nodeside(cur_nodeside); //handle_t cur_handle = self_loop[cur_nodeside] ? g.flip(handle):handle; @@ -269,7 +269,7 @@ SdslArray2D dijkstra(bdsg::HashGraph& g, NodesideId start, NodesideId stop_ns, i } discover_nodeside( other_ns, OTHER_NODESIDE, - cur_dist == INF_INT ? INF_INT : cur_dist+handle_len, + addInt(cur_dist, handle_len), path_lengths, nodeside_queue ); } @@ -449,7 +449,7 @@ int oracle_query(NodesideId source, NodesideId target, OracleInfo& oracle, HashG NodeId lm = lm_node_vec[closest_lm_ind[ns]]; //auto lm_nodesides = get_node_nodesides(lm); - int lm_length = g.get_length(g.get_handle(lm)); + int lm_length = demote_distance(g.get_length(g.get_handle(lm))); int closest_lm_ns_ind = closest_lm_ind[ns]*2; //Array2D::index_gen ind_gen; diff --git a/bdsg/src/snarl_distance_index.cpp b/bdsg/src/snarl_distance_index.cpp index 984fd741..461b6c74 100644 --- a/bdsg/src/snarl_distance_index.cpp +++ b/bdsg/src/snarl_distance_index.cpp @@ -1438,9 +1438,11 @@ size_t SnarlDistanceIndex::distance_in_parent(const net_handle_t& parent, // For a sentinel rank 1 (end node) as rank1, dir1 false needs to mean into the snarl (so start of end node, reverse strand). // For a sentinel rank 0 (start node) as rank1, dir1 false needs to mean into the snarl (so end of start node, forward strand). // + // For a node as rank1, with its end connected to rank2, dir1 true needs to mean towards the thing attached to its end. If we're a source, that means it must be forward strand. + // // For a node as rank2, with its end connected to rank1, dir2 true needs to mean towards the thing attached to its end. If we're not a source, that means it must be reverse strand. - size_t from_port = bgid(rank1, dir1 ^ (rank1 == 1), true); + size_t from_port = bgid(rank1, !dir1 ^ (rank1 == 0), true); #ifdef debug_distances std::cerr << " Query from vertex " << from_port << " = rank " << rank1 << " " << (dir1 ? "rev" : "fd") << " " << (is_sentinel(child1)? "sentinel" : "non-sentinel") << ", source" << std::endl; #endif From f522ff9a20654627b9a803ba4b0fd33cc4f54dbc Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Fri, 13 Feb 2026 13:08:55 -0500 Subject: [PATCH 35/75] Turn off debugging --- bdsg/include/bdsg/snarl_distance_index.hpp | 2 +- bdsg/src/ch.cpp | 2 +- bdsg/src/snarl_distance_index.cpp | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/bdsg/include/bdsg/snarl_distance_index.hpp b/bdsg/include/bdsg/snarl_distance_index.hpp index 8691e25f..f87fe166 100644 --- a/bdsg/include/bdsg/snarl_distance_index.hpp +++ b/bdsg/include/bdsg/snarl_distance_index.hpp @@ -1,7 +1,7 @@ #ifndef BDSG_SNARL_DISTANCE_HPP_INCLUDED #define BDSG_SNARL_DISTANCE_HPP_INCLUDED -#define debug_distance_indexing +//#define debug_distance_indexing //#define count_allocations #include diff --git a/bdsg/src/ch.cpp b/bdsg/src/ch.cpp index 0f27b92c..ac2cfd2b 100644 --- a/bdsg/src/ch.cpp +++ b/bdsg/src/ch.cpp @@ -3,7 +3,7 @@ file for quickly playing around with stuff */ #include "bdsg/ch.hpp" -#define debug_boost_graph +//#define debug_boost_graph //#define debug_create namespace bdsg { diff --git a/bdsg/src/snarl_distance_index.cpp b/bdsg/src/snarl_distance_index.cpp index 461b6c74..ba28410f 100644 --- a/bdsg/src/snarl_distance_index.cpp +++ b/bdsg/src/snarl_distance_index.cpp @@ -1,6 +1,6 @@ -#define debug_distance_indexing +//#define debug_distance_indexing //#define debug_snarl_traversal -#define debug_distances +//#define debug_distances //#define debug_parent //#define debug_distance_paths From badfab6170e54efd798f2b9c667fc88a6a1d126f Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Fri, 20 Mar 2026 12:29:09 -0700 Subject: [PATCH 36/75] Implement storing regularity in index via the type --- bdsg/include/bdsg/snarl_distance_index.hpp | 167 ++++-- bdsg/src/snarl_distance_index.cpp | 622 ++++++++------------- 2 files changed, 352 insertions(+), 437 deletions(-) diff --git a/bdsg/include/bdsg/snarl_distance_index.hpp b/bdsg/include/bdsg/snarl_distance_index.hpp index f87fe166..1fec46e4 100644 --- a/bdsg/include/bdsg/snarl_distance_index.hpp +++ b/bdsg/include/bdsg/snarl_distance_index.hpp @@ -424,18 +424,11 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab ///edges are allowed bool is_simple_snarl(const net_handle_t& net) const; - ///Returns true if the given net handle refers to (a traversal of) a regular snarl - ///A regular snarl is the same as a simple snarl, except that the children may be - ///nested chains, rather than being restricted to nodes - // If the distance index doesn't store distances then this needs a graph to check edges - // If allow_internal_loops is true, then a snarl can still be considered a regular snarl if one of - // its children allows a path to change direction inside of it. For example, if a path leaving - // the start node can reach the start node again, but not by taking any edges contained within - // the netgraph of the snarl, then this snarl would be considered regular with allow_internal_loops=true, - // but irregular with allow_internal_loops=false - // If there are not distances in the distance index, then it is computationally very expensive to check for internal loops, - // so this will throw an error if allow_internal_loops is false and there are no distances - bool is_regular_snarl(const net_handle_t& net, bool allow_internal_loops=false, const handlegraph::HandleGraph* graph = nullptr) const; + /// Returns true if the given net handle refers to (a traversal of) a regular snarl + /// A regular snarl is the same as a simple snarl, except that the children may be + /// nested chains, rather than being restricted to nodes, as long as the + /// nested chains don't allow reversals. + bool is_regular_snarl(const net_handle_t& net) const; ///Returns true if the given net handle refers to (a traversal of) a chain. bool is_chain(const net_handle_t& net) const; @@ -632,21 +625,113 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab /// /// TODO: What is a CHILDREN record? Is it ever used? /// + /// These MUST match the order in record_t_as_string! enum record_t {ROOT=1, NODE, DISTANCED_NODE, TRIVIAL_SNARL, DISTANCED_TRIVIAL_SNARL, SIMPLE_SNARL, DISTANCED_SIMPLE_SNARL, - SNARL, DISTANCED_SNARL, OVERSIZED_SNARL, + REGULAR_SNARL, DISTANCED_REGULAR_SNARL, OVERSIZED_REGULAR_SNARL, + SNARL, DISTANCED_SNARL, OVERSIZED_SNARL, ROOT_SNARL, DISTANCED_ROOT_SNARL, CHAIN, DISTANCED_CHAIN, MULTICOMPONENT_CHAIN, CHILDREN}; - // TODO: Doesn't this need to be inline? And isn't const not allowed on a - // static method? Is this just making the bool const? - const static bool has_distances(record_t type) { + + // Because the record_t encodes a complex taxonomy of snarls not *quite* + // decomposable to flags, we use these accessors to look at facets of it. + + constexpr static bool has_distances(record_t type) { return type == DISTANCED_NODE || type == DISTANCED_TRIVIAL_SNARL || type == DISTANCED_SIMPLE_SNARL + || type == DISTANCED_REGULAR_SNARL || type == OVERSIZED_REGULAR_SNARL || type == DISTANCED_SNARL || type == OVERSIZED_SNARL || type == DISTANCED_ROOT_SNARL || type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN; } + constexpr static bool is_root_snarl(record_t type) { + return type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL; + } + constexpr static bool is_any_root(record_t type) { + return is_root_snarl(type) || type == ROOT; + } + constexpr static bool is_node(record_t type) { + return type == NODE || type == DISTANCED_NODE; + } + constexpr static bool is_chain(record_t type) { + return type == CHAIN || type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN; + } + constexpr static bool is_trivial_snarl(record_t type) { + return type == TRIVIAL_SNARL || type == DISTANCED_TRIVIAL_SNARL; + } + constexpr static bool is_simple_snarl(record_t type) { + return type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL; + } + constexpr static bool is_oversized_snarl(record_t type) { + return type == OVERSIZED_SNARL || type == OVERSIZED_REGULAR_SNARL; + } + /// Determine if a record type is a regular (but not simple or trivial) snarl. + constexpr static bool is_regular_snarl(record_t type) { + return type == REGULAR_SNARL || type == DISTANCED_REGULAR_SNARL || OVERSIZED_REGULAR_SNARL; + } + /// Determine if a record type is a snarl that isn't also a root or a + /// simple (or trivial) snarl. A "nonsimple" snarl is implicitly + /// nontrivial. + constexpr static bool is_nonroot_nonsimple_snarl(record_t type) { + return is_regular_snarl(type) + || type == SNARL || type == DISTANCED_SNARL || type == OVERSIZED_SNARL; + } + constexpr static bool is_nonsimple_snarl(record_t type) { + return is_nonroot_nonsimple_snarl(type) || is_root_snarl(type); + } + constexpr static bool is_nonroot_nontrivial_snarl(record_t type) { + return is_nonroot_nonsimple_snarl(type) || type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL; + } + constexpr static bool is_nontrivial_snarl(record_t type) { + return is_nonroot_nontrivial_snarl(type) || is_root_snarl(type); + } + /// Make sure a record_t is a known type other than CHILDREN + constexpr static bool is_any_nonchildren(record_t type) { + return is_any_root(type) || is_node(type) || is_chain(type) || is_nonroot_nontrivial_snarl(type) || is_trivival_snarl(type); + } + + constexpr static record_t encode_root_snarl(bool has_distances) { + return has_distances ? DISTANCED_ROOT_SNARL : ROOT_SNARL; + } + + constexpr static record_t encode_simple_snarl(bool has_distances) { + return has_distances ? DISTANCED_SIMPLE_SNARL : SIMPLE_SNARL; + } + + constexpr static record_t encode_nonroot_nonsimple_snarl(bool has_distances, bool is_regular, bool is_oversized) { + if (is_oversized) { + if (!has_distances) { + throw runtime_error("error: oversized snarl must have distances"); + } + if (is_regular) { + return REGULAR_OVERSIZED_SNARL; + } else { + return OVERSIZED_SNARL; + } + } else if (is_regular) { + return has_distances ? DISTANCED_REGULAR_SNARL : REGULAR_SNARL; + } else { + return has_distances ? DISTANCED_SNARL : SNARL; + } + } + + constexpr static record_t encode_node(bool has_distances) { + return has_distances ? DISTANCED_NODE : NODE; + } + + constexpr static record_t encode_chain(bool has_distances, bool is_multicomponent) { + if (is_multicomponent) { + if (!has_distances) { + throw runtime_error("error: multicomponent chain must have distances"); + } + return MULTICOMPONENT_CHAIN; + } else { + return has_distances ? DISTANCED_CHAIN : CHAIN; + } + } + + @@ -654,14 +739,12 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab ///for example a simple snarl record is used to represent a snarl, and the nodes/trivial chains in it. ///This will return whatever is higher on the snarl tree. A simple snarl will be considered a snarl, ///a root snarl will be considered a root, etc - const static net_handle_record_t get_record_handle_type(record_t type) { + constexpr static net_handle_record_t get_record_handle_type(record_t type) { if (type == ROOT || type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL) { return ROOT_HANDLE; } else if (type == NODE || type == DISTANCED_NODE || type == TRIVIAL_SNARL || type == DISTANCED_TRIVIAL_SNARL) { return NODE_HANDLE; - } else if (type == SNARL || type == DISTANCED_SNARL || - type == OVERSIZED_SNARL || - type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL) { + } else if (is_nonroot_nontrivial_snarl(type)) { return SNARL_HANDLE; } else if (type == CHAIN || type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN) { return CHAIN_HANDLE; @@ -683,25 +766,25 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab ///The offset into records that this handle points to - const static size_t get_record_offset (const handlegraph::net_handle_t& net_handle) { + inline static size_t get_record_offset (const handlegraph::net_handle_t& net_handle) { return handlegraph::as_integer(net_handle) >> 15; } ///The offset of a node in a trivial snarl (0 if it isn't a node in a trivial snarl) - const static size_t get_node_record_offset (const handlegraph::net_handle_t& net_handle) { + inline static size_t get_node_record_offset (const handlegraph::net_handle_t& net_handle) { return (handlegraph::as_integer(net_handle) >> 7 ) & MAX_TRIVIAL_SNARL_NODE_COUNT; //Get 8 bits after last 7 } - const static connectivity_t get_connectivity (const handlegraph::net_handle_t& net_handle){ + inline static connectivity_t get_connectivity (const handlegraph::net_handle_t& net_handle){ size_t connectivity_as_int = (handlegraph::as_integer(net_handle)>>3) & 15; //Get 4 bits after last 3 assert (connectivity_as_int <= 9); return static_cast(connectivity_as_int); } - const static net_handle_record_t get_handle_type (const handlegraph::net_handle_t& net_handle) { + inline static net_handle_record_t get_handle_type (const handlegraph::net_handle_t& net_handle) { size_t record_type = handlegraph::as_integer(net_handle) & 7; //Get last 3 bits assert (record_type <= 4); return static_cast(record_type); } - const static handlegraph::net_handle_t get_net_handle_from_values(size_t pointer, connectivity_t connectivity, + inline static handlegraph::net_handle_t get_net_handle_from_values(size_t pointer, connectivity_t connectivity, net_handle_record_t type, size_t node_offset=0) { if (pointer > ((size_t)1 << (64-BITS_FOR_TRIVIAL_NODE_OFFSET-3-4))-1) { throw runtime_error("error: don't have space in net handle for record offset"); @@ -726,7 +809,7 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab ///Get the offset into snarl_tree_records for the pointer to a node record. - const static size_t get_node_pointer_offset (const handlegraph::nid_t& id, const handlegraph::nid_t& min_node_id, size_t component_count) { + inline static size_t get_node_pointer_offset (const handlegraph::nid_t& id, const handlegraph::nid_t& min_node_id, size_t component_count) { size_t node_records_offset = component_count + ROOT_RECORD_SIZE; size_t offset = (id-min_node_id)*2; return node_records_offset + offset; @@ -737,7 +820,7 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab // return ((offset-node_records_offset) / NODE_RECORD_SIZE) + min_node_id; //} - const static connectivity_t endpoints_to_connectivity(endpoint_t start, endpoint_t end) { + inline static connectivity_t endpoints_to_connectivity(endpoint_t start, endpoint_t end) { if (start == START && end == START) { return START_START; } else if (start == START && end == END) { @@ -760,7 +843,7 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab throw runtime_error("error: invalid endpoints"); } } - const static endpoint_t get_start_endpoint(connectivity_t connectivity) { + inline static endpoint_t get_start_endpoint(connectivity_t connectivity) { endpoint_t start_endpoint; if (connectivity == START_START || connectivity == START_END || connectivity == START_TIP){ start_endpoint = START; @@ -773,10 +856,10 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab } return start_endpoint; } - const static endpoint_t get_start_endpoint(net_handle_t net) { + inline static endpoint_t get_start_endpoint(net_handle_t net) { return get_start_endpoint(get_connectivity(net)); } - const static endpoint_t get_end_endpoint(connectivity_t connectivity) { + inline static endpoint_t get_end_endpoint(connectivity_t connectivity) { endpoint_t end_endpoint; if (connectivity == START_START || connectivity == END_START || connectivity == TIP_START){ end_endpoint = START; @@ -789,10 +872,10 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab } return end_endpoint; } - const static endpoint_t get_end_endpoint(const net_handle_t& net) { + inline static endpoint_t get_end_endpoint(const net_handle_t& net) { return get_end_endpoint(get_connectivity(net)); } - const static pair connectivity_to_endpoints(const connectivity_t& connectivity) { + inline static pair connectivity_to_endpoints(const connectivity_t& connectivity) { return make_pair(get_start_endpoint(connectivity), get_end_endpoint(connectivity)); } @@ -829,10 +912,10 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab const static size_t MIN_NODE_ID_OFFSET = 4; const static size_t MAX_TREE_DEPTH_OFFSET = 5; - // While the version number is 3, store it in a bit masked way + // We store the version number in a bit masked way // to avoid getting confused with old indexes without version numbers - // that start with component count - const static size_t CURRENT_VERSION_NUMBER = 3; + // that start with component count (also likely to be a small number). + const static size_t CURRENT_VERSION_NUMBER = 4; /// Arbitrary large number which doens't overflow the number of bits we give const static size_t VERSION_NUMBER_SENTINEL = (1 << 10) - 1; @@ -1689,8 +1772,16 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab //TODO This would probably be more efficient as a vector of a struct of five ints vector prefix_sum; vector max_prefix_sum; - vector forward_loops; // If no loop is possible, an entry will be std::numeric_limits::max() - vector backward_loops; // If no loop is possible, an entry will be std::numeric_limits::max() + /// Forward looping distances. If no loop is possible, an entry + /// will be std::numeric_limits::max(). If any loop is + /// possible anywhere along the chain, the first entry will contain + /// a possible loop distance. + vector forward_loops; + /// Backward lopping distances. If no loop is possible, an entry + /// will be std::numeric_limits::max(). If any lopp is + /// possible anywhere along the chain, the last entry will contain + /// a possible loop distance. + vector backward_loops; vector chain_components;//Which component does each node belong to, usually all 0s @@ -1727,9 +1818,11 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab bool reversed_in_parent; bool start_node_rev; + // The end node is reverse if it points back into the snarl, not along it. bool end_node_rev; bool is_trivial; bool is_simple; + bool is_regular; bool is_tip = false; bool is_root_snarl = false; bool include_distances = true; diff --git a/bdsg/src/snarl_distance_index.cpp b/bdsg/src/snarl_distance_index.cpp index ba28410f..f8579c56 100644 --- a/bdsg/src/snarl_distance_index.cpp +++ b/bdsg/src/snarl_distance_index.cpp @@ -16,7 +16,9 @@ namespace bdsg { const vector SnarlDistanceIndex::record_t_as_string = { "ROOT", "NODE", "DISTANCED_NODE", "TRIVIAL_SNARL", "DISTANCED_TRIVIAL_SNARL", - "SNARL", "DISTANCED_SNARL", "SIMPLE_SNARL", "OVERSIZED_SNARL", + "SIMPLE_SNARL", "DISTANCED_SIMPLE_SNARL", + "REGULAR_SNARL", "DISTANCED_REGULAR_SNARL", "OVERSIZED_REGULAR_SNARL", + "SNARL", "DISTANCED_SNARL", "OVERSIZED_SNARL", "ROOT_SNARL", "DISTANCED_ROOT_SNARL", "CHAIN", "DISTANCED_CHAIN", "MULTICOMPONENT_CHAIN", "CHILDREN" @@ -125,12 +127,12 @@ size_t SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::get_max } else { if (parent.first == TEMP_ROOT) { //TODO: Why is node_count being added? - return SnarlRecord::record_size(include_distances ? DISTANCED_ROOT_SNARL : ROOT_SNARL, node_count, 0) + node_count; + return SnarlRecord::record_size(encode_root_snarl(include_distances), node_count, 0) + node_count; } else if (!(hub_labels.empty())) { - return SnarlRecord::record_size(OVERSIZED_SNARL, node_count, hub_labels.size()); + return SnarlRecord::record_size(encode_nonroot_nonsimple_snarl(true, is_regular, true), node_count, hub_labels.size()); } else { //TODO: Why is node_count being added? - return SnarlRecord::record_size(include_distances ? DISTANCED_SNARL : SNARL, node_count, 0) + node_count; + return SnarlRecord::record_size(encode_nonroot_nonsimple_snarl(include_distances, is_regular, false), node_count, 0) + node_count; } } } @@ -156,8 +158,7 @@ bool SnarlDistanceIndex::is_root(const net_handle_t& net) const { bool SnarlDistanceIndex::is_root_snarl(const net_handle_t& net) const { #ifdef debug_distances if (get_handle_type(net) == ROOT_HANDLE && get_record_offset(net) != 0) { - assert(SnarlTreeRecord(net, &snarl_tree_records).get_record_type() == ROOT_SNARL || - SnarlTreeRecord(net, &snarl_tree_records).get_record_type() == DISTANCED_ROOT_SNARL); + assert(is_root_snarl(SnarlTreeRecord(net, &snarl_tree_records).get_record_type())); } #endif @@ -169,8 +170,7 @@ bool SnarlDistanceIndex::is_snarl(const net_handle_t& net) const { #ifdef debug_distances if(get_handle_type(net) == SNARL_HANDLE){ assert(SnarlTreeRecord(net, &snarl_tree_records).get_record_handle_type() == SNARL_HANDLE || - SnarlTreeRecord(net, &snarl_tree_records).get_record_type() == ROOT_SNARL || - SnarlTreeRecord(net, &snarl_tree_records).get_record_type() == DISTANCED_ROOT_SNARL); + is_root_snarl(SnarlTreeRecord(net, &snarl_tree_records).get_record_type()); assert(get_node_record_offset(net) == 0 || get_node_record_offset(net) == 1); } #endif @@ -178,19 +178,20 @@ if(get_handle_type(net) == SNARL_HANDLE){ } bool SnarlDistanceIndex::is_oversized_snarl(const net_handle_t& net) const { - return SnarlTreeRecord(net, &snarl_tree_records).get_record_type() == OVERSIZED_SNARL; + return is_oversized_snarl(SnarlTreeRecord(net, &snarl_tree_records).get_record_type()); } bool SnarlDistanceIndex::is_dag(const net_handle_t& snarl) const { record_t record_type = SnarlTreeRecord(snarl, &snarl_tree_records).get_record_type(); - if ( record_type == SNARL || record_type == ROOT_SNARL ) { + if ( is_nontrivial_snarl(type) && !has_distances(type) ) { //If this is a snarl but didn't store distances cerr << "warning: checking if a snarl is a dag in an index without distances. Returning true" << endl; return true; - } else if (record_type == DISTANCED_SNARL || record_type == OVERSIZED_SNARL || record_type == DISTANCED_ROOT_SNARL) { + } else if (is_nonsimple_snarl(type)) { //If this is any kind of non-simple snarl + //(We already ruled out not having distances) - if (record_type != DISTANCED_ROOT_SNARL) { + if (!is_root_snarl(record_type)) { //If there were boundary nodes, check for loops on the bounds //The bounds of the snarl facing in @@ -231,149 +232,21 @@ bool SnarlDistanceIndex::is_simple_snarl(const net_handle_t& net) const { #ifdef debug_distances if(get_handle_type(net) == SNARL_HANDLE){ assert(SnarlTreeRecord(net, &snarl_tree_records).get_record_handle_type() == SNARL_HANDLE || - SnarlTreeRecord(net, &snarl_tree_records).get_record_type() == ROOT_SNARL || - SnarlTreeRecord(net, &snarl_tree_records).get_record_type() == DISTANCED_ROOT_SNARL); + is_root_snarl(SnarlTreeRecord(net, &snarl_tree_records).get_record_type())); } #endif return get_handle_type(net) == SNARL_HANDLE && get_node_record_offset(net) == 1; } -bool SnarlDistanceIndex::is_regular_snarl(const net_handle_t& net, bool allow_internal_loops, const handlegraph::HandleGraph* graph) const { +bool SnarlDistanceIndex::is_regular_snarl(const net_handle_t& net) const { #ifdef debug_distances if(get_handle_type(net) == SNARL_HANDLE){ assert(SnarlTreeRecord(net, &snarl_tree_records).get_record_handle_type() == SNARL_HANDLE || - SnarlTreeRecord(net, &snarl_tree_records).get_record_type() == ROOT_SNARL || - SnarlTreeRecord(net, &snarl_tree_records).get_record_type() == DISTANCED_ROOT_SNARL); + is_root_snarl(SnarlTreeRecord(net, &snarl_tree_records).get_record_type())); } #endif - - // Helper function to check if an edge exists based on the distance in the distance index - // Depends on if we allow internal distances or not - auto has_edge = [&] (const net_handle_t& n1, const net_handle_t& n2) { - size_t dist = distance_in_parent(net, n1, n2); - if (allow_internal_loops) { - // If we allow internal loops, then we only check if the edge in the snarl netgraph exists - return dist == 0; - } else { - // If we don't allow internal loops, then check the distance itself - return dist != std::numeric_limits::max(); - } - }; - - record_t record_type = SnarlTreeRecord(net, &snarl_tree_records).get_record_type(); - if (record_type == ROOT_SNARL || record_type == DISTANCED_ROOT_SNARL) { - // Root snarls are not regular - return false; - } else if (record_type == SIMPLE_SNARL || record_type == DISTANCED_SIMPLE_SNARL) { - // All simple snarls are regular - return true; - } - if ((record_type == SNARL || record_type == OVERSIZED_SNARL) && graph == nullptr) { - throw runtime_error("error: is_regular_snarl requires a graph if the distance index doesn't contain distances"); - } - if (record_type == SNARL && !allow_internal_loops) { - throw runtime_error("error: is_regular_snarl requires distances in the distance index to verify that there are no internal loops"); - } - - //If there is any edge from the boundary nodes to themselves, then it cannot be regular - // How we check this depends on if we have distances or not - net_handle_t start_in = get_bound(net, false, true); - net_handle_t end_in = get_bound(net, true, true); - if (record_type == DISTANCED_SNARL || record_type == OVERSIZED_SNARL) { - if (has_edge(start_in, start_in) || - has_edge(end_in, end_in)) { - return false; - } - } else if (record_type != DISTANCED_SNARL && record_type != OVERSIZED_SNARL) { - if (graph->has_edge( get_handle(flip(start_in), graph), get_handle(flip(start_in), graph)) || - graph->has_edge( get_handle(flip(end_in), graph), get_handle(flip(end_in), graph))) { - return false; - } - } - bool is_regular = true; - - - for_each_child(net, [&](const net_handle_t& child) { - //If there isn't a path through the snarl that passes through the child - //or there's an extra path through the child then it is irregular - - // Graph handles for the left/right sides of the child, filled in if necessary - handlegraph::handle_t child_start_in; - handlegraph::handle_t child_end_in; - - // First check that each child is connected to the two bounds by one possible traversal - bool start_right; - bool start_left; - bool end_right; - bool end_left; - - if (record_type == DISTANCED_SNARL || record_type == OVERSIZED_SNARL) { - // If the distance index has distances, then check the distances - start_right = has_edge(start_in, child); - start_left = has_edge(start_in, flip(child)); - end_right = has_edge(end_in, child); - end_left = has_edge(end_in, flip(child)); - } else { - // If the snarl doesn't store distances then check the edges in the graph - child_start_in = is_node(child) ? get_handle(child, graph) : get_handle(get_bound(child, false, true), graph); - child_end_in = is_node(child) ? get_handle(flip(child), graph) : get_handle(get_bound(child, true, true), graph); - start_left = graph->has_edge(get_handle(start_in, graph), child_start_in); - start_right = graph->has_edge(get_handle(start_in, graph), child_end_in); - end_left = graph->has_edge(get_handle(end_in, graph), child_start_in); - end_right = graph->has_edge(get_handle(end_in, graph), child_end_in); - } - - if (start_right && end_left) { - if (start_left || end_right) { - is_regular = false; - return false; - } - } else if (start_left && end_right) { - if (start_right || end_left) { - is_regular = false; - return false; - } - } else { - //There wasn't a path through this node so it is irregular - is_regular = false; - return false; - } - - - //Next, if there is an edge to any other child, then it is irregular - for_each_child(net, [&](const net_handle_t& child2) { - if (record_type == DISTANCED_SNARL) { - if (has_edge(child, child2) || - has_edge(child, flip(child2)) || - has_edge(flip(child), child2) || - has_edge(flip(child), flip(child2))) { - is_regular = false; - return false; - } - //Return true to continue traversing - return true; - } else { - - //This may not have been filled in for an oversized snarl - child_start_in = is_node(child) ? get_handle(child, graph) : get_handle(get_bound(child, false, true), graph); - child_end_in = is_node(child) ? get_handle(flip(child), graph) : get_handle(get_bound(child, true, true), graph); - handlegraph::handle_t child2_start_in = is_node(child2) ? get_handle(child2, graph) : get_handle(get_bound(child2, false, true), graph); - handlegraph::handle_t child2_end_in = is_node(child2) ? get_handle(flip(child2), graph) : get_handle(get_bound(child2, true, true), graph); - if (graph->has_edge(child_start_in, child2_start_in) || - graph->has_edge(child_start_in, child2_end_in) || - graph->has_edge(child_end_in, child2_start_in) || - graph->has_edge(child_end_in, child2_end_in)) { - is_regular = false; - } - return false; - } - }); - - //Return true to continue traversing - return true; - }); - return is_regular; + return is_regular_snarl(record_type); } bool SnarlDistanceIndex::is_chain(const net_handle_t& net) const { @@ -381,8 +254,7 @@ bool SnarlDistanceIndex::is_chain(const net_handle_t& net) const { if (get_handle_type(net) ==CHAIN_HANDLE) { assert(SnarlTreeRecord(net, &snarl_tree_records).get_record_handle_type() == CHAIN_HANDLE || SnarlTreeRecord(net, &snarl_tree_records).get_record_handle_type() == NODE_HANDLE || - SnarlTreeRecord(net, &snarl_tree_records).get_record_type() == SIMPLE_SNARL || - SnarlTreeRecord(net, &snarl_tree_records).get_record_type() == DISTANCED_SIMPLE_SNARL); + is_simple_snarl(SnarlTreeRecord(net, &snarl_tree_records).get_record_type()); } #endif return get_handle_type(net) == CHAIN_HANDLE; @@ -392,8 +264,7 @@ bool SnarlDistanceIndex::is_multicomponent_chain(const net_handle_t& net) const if (get_handle_type(net) ==CHAIN_HANDLE) { assert(SnarlTreeRecord(net, &snarl_tree_records).get_record_handle_type() == CHAIN_HANDLE || SnarlTreeRecord(net, &snarl_tree_records).get_record_handle_type() == NODE_HANDLE || - SnarlTreeRecord(net, &snarl_tree_records).get_record_type() == SIMPLE_SNARL || - SnarlTreeRecord(net, &snarl_tree_records).get_record_type() == DISTANCED_SIMPLE_SNARL); + is_simple_snarl(SnarlTreeRecord(net, &snarl_tree_records).get_record_type())); } #endif return get_handle_type(net) == CHAIN_HANDLE @@ -430,8 +301,7 @@ bool SnarlDistanceIndex::is_ordered_in_chain(const net_handle_t& child1, const n bool SnarlDistanceIndex::is_trivial_chain(const net_handle_t& net) const { bool handle_is_chain =get_handle_type(net) == CHAIN_HANDLE; bool record_is_node = SnarlTreeRecord(net, &snarl_tree_records).get_record_handle_type() == NODE_HANDLE; - bool record_is_simple_snarl = SnarlTreeRecord(net, &snarl_tree_records).get_record_type() == SIMPLE_SNARL || - SnarlTreeRecord(net, &snarl_tree_records).get_record_type() == DISTANCED_SIMPLE_SNARL ; + bool record_is_simple_snarl = is_simple_snarl(SnarlTreeRecord(net, &snarl_tree_records).get_record_type()); bool handle_has_node_offset = get_node_record_offset(net) >= 2; return handle_is_chain && (record_is_node @@ -442,8 +312,7 @@ bool SnarlDistanceIndex::is_node(const net_handle_t& net) const { #ifdef debug_distances if(get_handle_type(net) == NODE_HANDLE){ assert( SnarlTreeRecord(net, &snarl_tree_records).get_record_handle_type() == NODE_HANDLE - || SnarlTreeRecord(net, &snarl_tree_records).get_record_type() == SIMPLE_SNARL - || SnarlTreeRecord(net, &snarl_tree_records).get_record_type() == DISTANCED_SIMPLE_SNARL ); + || is_simple_snarl(SnarlTreeRecord(net, &snarl_tree_records).get_record_type())); } #endif return get_handle_type(net) == NODE_HANDLE; @@ -452,8 +321,7 @@ bool SnarlDistanceIndex::is_sentinel(const net_handle_t& net) const { #ifdef debug_distances if(get_handle_type(net) == SENTINEL_HANDLE){ assert(SnarlTreeRecord(net, &snarl_tree_records).get_record_handle_type() == SNARL_HANDLE - || SnarlTreeRecord(net, &snarl_tree_records).get_record_type() == SIMPLE_SNARL - || SnarlTreeRecord(net, &snarl_tree_records).get_record_type() == DISTANCED_SIMPLE_SNARL); + || is_simple_snarl(SnarlTreeRecord(net, &snarl_tree_records).get_record_type())); } #endif return get_handle_type(net) == SENTINEL_HANDLE; @@ -511,8 +379,7 @@ net_handle_t SnarlDistanceIndex::get_parent(const net_handle_t& child) const { SNARL_HANDLE, get_node_record_offset(child)); } else if (get_handle_type(child) == ROOT_HANDLE) { throw runtime_error("error: trying to find the parent of the root"); - } else if (get_record_type(snarl_tree_records->at(get_record_offset(child))) == SIMPLE_SNARL || - get_record_type(snarl_tree_records->at(get_record_offset(child))) == DISTANCED_SIMPLE_SNARL) { + } else if (is_simple_snarl(get_record_type(snarl_tree_records->at(get_record_offset(child))))) { #ifdef debug_parent std::cerr << "Child " << net_handle_as_string(child) << " has simple snarl record type " << stringify(get_record_type(snarl_tree_records->at(get_record_offset(child)))) << " and current handle type " << stringify(get_handle_type(child)) << std::endl; #endif @@ -668,7 +535,7 @@ net_handle_t SnarlDistanceIndex::flip(const net_handle_t& net) const { net_handle_t SnarlDistanceIndex::canonical(const net_handle_t& net) const { SnarlTreeRecord record(net, &snarl_tree_records); record_t type = record.get_record_type(); - if (type == ROOT || type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL) { + if (is_any_root(type)) { return get_root(); } @@ -706,11 +573,9 @@ SnarlDecomposition::endpoint_t SnarlDistanceIndex::ends_at(const net_handle_t& t size_t SnarlDistanceIndex::get_rank_in_parent(const net_handle_t& net) const { size_t tag = snarl_tree_records->at(get_record_offset(net)); - if (get_record_type(tag) == TRIVIAL_SNARL || - get_record_type(tag) == DISTANCED_TRIVIAL_SNARL) { + if (is_trivial_snarl(get_record_type(tag)) { return TrivialSnarlRecord(get_record_offset(net), &snarl_tree_records).get_rank_in_parent(get_node_record_offset(net)); - } else if (get_record_type(tag) == SIMPLE_SNARL || - get_record_type(tag) == DISTANCED_SIMPLE_SNARL) { + } else if (is_simple_snarl(get_record_type(tag))) { if (is_snarl(net)) { return get_record_offset(net); } else { @@ -784,8 +649,7 @@ bool SnarlDistanceIndex::for_each_child_impl(const net_handle_t& traversal, cons if (record_type == ROOT_HANDLE) { RootRecord root_record(get_root(), &snarl_tree_records); return root_record.for_each_child(iteratee); - } else if (SnarlTreeRecord(traversal, &snarl_tree_records).get_record_type() == SIMPLE_SNARL || - SnarlTreeRecord(traversal, &snarl_tree_records).get_record_type() == DISTANCED_SIMPLE_SNARL ) { + } else if (is_simple_snarl(SnarlTreeRecord(traversal, &snarl_tree_records).get_record_type())) { //If this is a simple snarl then it is a bit different if (handle_type == CHAIN_HANDLE) { //If the handle thinks it's a chain, then it is a trivial chain in the snarl so we do @@ -800,7 +664,7 @@ bool SnarlDistanceIndex::for_each_child_impl(const net_handle_t& traversal, cons } else if (record_type == SNARL_HANDLE) { // This could be a simple or non-simple snarl record_t specific_type = SnarlTreeRecord(traversal, &snarl_tree_records).get_record_type(); - if (specific_type == SIMPLE_SNARL || specific_type == DISTANCED_SIMPLE_SNARL) { + if (is_simple_snarl(specific_type)) { SimpleSnarlRecord snarl_record(traversal, &snarl_tree_records); return snarl_record.for_each_child(iteratee); } else { @@ -854,8 +718,8 @@ bool SnarlDistanceIndex::follow_net_edges_impl(const net_handle_t& here, const h SnarlTreeRecord parent_record (get_parent(here), &snarl_tree_records); if (parent_record.get_record_handle_type() == ROOT_HANDLE && - parent_record.get_record_type() != ROOT_SNARL && - parent_record.get_record_type() != DISTANCED_ROOT_SNARL) { + !is_root_snarl(parent_record.get_record_type())) { + // TODO: should we check for ROOT record type here? #ifdef debug_snarl_traversal cerr << "The parent is a root so just check self connectivity" << endl; #endif @@ -902,8 +766,7 @@ bool SnarlDistanceIndex::follow_net_edges_impl(const net_handle_t& here, const h //If this is a chain (or a node pretending to be a chain) and it is the child of a snarl //Or if it is the sentinel of a snarl, then we walk through edges in the snarl //It can either run into another chain (or node) or the boundary node - bool is_root_snarl = parent_record.get_record_type() == ROOT_SNARL - || parent_record.get_record_type() == DISTANCED_ROOT_SNARL; + bool is_root_snarl = SnarlDistanceIndex::is_root_snarl(parent_record.get_record_type()); //Get the graph handle for the end node of whatever this is, pointing in the right direction @@ -959,8 +822,7 @@ bool SnarlDistanceIndex::follow_net_edges_impl(const net_handle_t& here, const h node_net_handle = flip(node_net_handle); } - if (get_record_type(snarl_tree_records->at(get_record_offset(node_net_handle))) == NODE || - get_record_type(snarl_tree_records->at(get_record_offset(node_net_handle))) == DISTANCED_NODE ) { + if (is_node(get_record_type(snarl_tree_records->at(get_record_offset(node_net_handle))))) { //If this is a node make a net_handle_t of a node pretending to be a chain net_handle_t next_net = get_net_handle_from_values(get_record_offset(node_net_handle), graph->get_is_reverse(h) ? END_START : START_END, @@ -969,8 +831,7 @@ bool SnarlDistanceIndex::follow_net_edges_impl(const net_handle_t& here, const h cerr << " -> actual child node " << net_handle_as_string(next_net) << endl; #endif return iteratee(next_net); - } else if (get_record_type(snarl_tree_records->at(get_record_offset(node_net_handle))) == SIMPLE_SNARL || - get_record_type(snarl_tree_records->at(get_record_offset(node_net_handle))) == DISTANCED_SIMPLE_SNARL ) { + } else if (is_simple_snarl(get_record_type(snarl_tree_records->at(get_record_offset(node_net_handle))))) { //If the node is a node in a simple snarl net_handle_t next_net = get_net_handle_from_values(get_record_offset(node_net_handle), graph->get_is_reverse(h) ? END_START : START_END, @@ -1400,7 +1261,7 @@ size_t SnarlDistanceIndex::distance_in_parent(const net_handle_t& parent, std::cerr << " Retrieving simple snarl value: " << result << endl; #endif return result; - } else if (get_record_type(snarl_tree_records->at(get_record_offset(parent))) == OVERSIZED_SNARL) { + } else if (is_oversized_snarl(get_record_type(snarl_tree_records->at(get_record_offset(parent))))) { #ifdef debug_distances cerr << " Performing HHL query" << endl; #endif @@ -1544,7 +1405,7 @@ size_t SnarlDistanceIndex::distance_in_snarl(const net_handle_t& parent, if (get_record_type(snarl_tree_records->at(get_record_offset(parent))) == DISTANCED_SIMPLE_SNARL) { return SimpleSnarlRecord(parent, &snarl_tree_records).get_distance(rank1, right_side1, rank2, right_side2); - } else if (get_record_type(snarl_tree_records->at(get_record_offset(parent))) == OVERSIZED_SNARL + } else if (is_oversized_snarl(get_record_type(snarl_tree_records->at(get_record_offset(parent)))) && !(rank1 == 0 || rank1 == 1 || rank2 == 0 || rank2 == 1) ) { //If this is an oversized snarl and we're looking for internal distances, then we didn't store the //distance and we have to find it using dijkstra's algorithm @@ -2867,7 +2728,7 @@ void SnarlDistanceIndex::for_each_handle_in_shortest_path_in_snarl(const net_han size_t target_distance = distance_to_traverse; size_t starting_distance = distance_traversed; cerr << "Find shortest path in " << net_handle_as_string(snarl_handle) << " from " << net_handle_as_string(start) << " to " << net_handle_as_string(end) << " with distance " << distance_to_traverse << endl; - if (SnarlTreeRecord(snarl_handle, &snarl_tree_records).get_record_type() != OVERSIZED_SNARL) { + if (!is_oversized_snarl(SnarlTreeRecord(snarl_handle, &snarl_tree_records).get_record_type())) { cerr << "\tactual distance is " << distance_in_parent(snarl_handle, start, flip(end)) << endl; assert(distance_in_parent(snarl_handle, start, flip(end)) == distance_to_traverse); } @@ -2879,7 +2740,7 @@ void SnarlDistanceIndex::for_each_handle_in_shortest_path_in_snarl(const net_han * there will always be only one that is on the minimum distance path. */ - if (SnarlTreeRecord(snarl_handle, &snarl_tree_records).get_record_type() == OVERSIZED_SNARL) { + if (is_oversized_snarl(SnarlTreeRecord(snarl_handle, &snarl_tree_records).get_record_type())) { //IF this is an oversized snarl, then we don't have any distance information so use the handlgraph algorithm //for traversing the shortest path @@ -2934,8 +2795,7 @@ void SnarlDistanceIndex::for_each_handle_in_shortest_path_in_snarl(const net_han cerr << "Checking next net " << net_handle_as_string(next_net) << " find distance to " << net_handle_as_string(flip(end)) << endl; cerr << "Traversed " << distance_traversed << " so far, looking for " << distance_to_traverse << endl; bool snarl_is_root = is_root(snarl_handle) || is_root_snarl(snarl_handle) || - SnarlTreeRecord(snarl_handle, &snarl_tree_records).get_record_type() == ROOT_SNARL || - SnarlTreeRecord(snarl_handle, &snarl_tree_records).get_record_type() == DISTANCED_ROOT_SNARL; + is_root_snarl(SnarlTreeRecord(snarl_handle, &snarl_tree_records).get_record_type()); if (!is_root(snarl_handle)) { if( (end != get_bound(snarl_handle, true, false) && next == get_bound(snarl_handle, true, false)) || (end != get_bound(snarl_handle, false, false) && next == get_bound(snarl_handle, false, false))) { @@ -3380,12 +3240,13 @@ void SnarlDistanceIndex::for_each_handle_in_shortest_path_in_chain(const net_han size_t SnarlDistanceIndex::node_length(const net_handle_t& net) const { if (is_node(net)) { - if (get_record_type(snarl_tree_records->at(get_record_offset(net))) == DISTANCED_NODE) { + auto record_type = get_record_type(snarl_tree_records->at(get_record_offset(net))); + if (record_type == DISTANCED_NODE) { return NodeRecord(net, &snarl_tree_records).get_node_length(); - } else if (get_record_type(snarl_tree_records->at(get_record_offset(net))) == DISTANCED_SIMPLE_SNARL) { + } else if (record_type == DISTANCED_SIMPLE_SNARL) { return SimpleSnarlRecord(net, &snarl_tree_records).get_node_length(); } else { - assert(get_record_type(snarl_tree_records->at(get_record_offset(net))) == DISTANCED_TRIVIAL_SNARL); + assert(record_type == DISTANCED_TRIVIAL_SNARL); return TrivialSnarlRecord(get_record_offset(net), &snarl_tree_records).get_node_length(get_node_record_offset(net)); } } else if (is_sentinel(net)) { @@ -3445,11 +3306,9 @@ size_t SnarlDistanceIndex::maximum_length(const net_handle_t& net) const { } nid_t SnarlDistanceIndex::node_id(const net_handle_t& net) const { if (is_node(net) || is_trivial_chain(net)) { - if (get_record_type(snarl_tree_records->at(get_record_offset(net))) == NODE - || get_record_type(snarl_tree_records->at(get_record_offset(net))) == DISTANCED_NODE) { + if (is_node(get_record_type(snarl_tree_records->at(get_record_offset(net))))) { return NodeRecord(net, &snarl_tree_records).get_node_id(); - } else if (get_record_type(snarl_tree_records->at(get_record_offset(net))) == SIMPLE_SNARL - || get_record_type(snarl_tree_records->at(get_record_offset(net))) == DISTANCED_SIMPLE_SNARL) { + } else if (is_simple_snarl(get_record_type(snarl_tree_records->at(get_record_offset(net))))) { return SimpleSnarlRecord(net, &snarl_tree_records).get_node_id(); } else { return TrivialSnarlRecord(get_record_offset(net), &snarl_tree_records).get_node_id(get_node_record_offset(net)); @@ -3477,9 +3336,9 @@ bool SnarlDistanceIndex::has_node(const nid_t id) const { bool SnarlDistanceIndex::is_reversed_in_parent(const net_handle_t& net) const { SnarlTreeRecord record(net, &snarl_tree_records); - if (record.get_record_type() == TRIVIAL_SNARL || record.get_record_type() == DISTANCED_TRIVIAL_SNARL) { + if (is_trivial_snarl(record.get_record_type())) { return TrivialSnarlRecord(get_record_offset(net), &snarl_tree_records).get_is_reversed_in_parent(get_node_record_offset(net)); - } else if ((record.get_record_type() == SIMPLE_SNARL || record.get_record_type() == DISTANCED_SIMPLE_SNARL) && is_chain(net)) { + } else if (is_simple_snarl(record.get_record_type()) && is_chain(net)) { return SimpleSnarlRecord(net, &snarl_tree_records).get_node_is_reversed(); } else { return record.get_is_reversed_in_parent(); @@ -3500,8 +3359,7 @@ size_t SnarlDistanceIndex::get_max_tree_depth() const { size_t SnarlDistanceIndex::get_depth(const net_handle_t& net) const { if (is_root(net)) { return 0; - } else if (SnarlTreeRecord(net, &snarl_tree_records).get_record_type() == SIMPLE_SNARL || - SnarlTreeRecord(net, &snarl_tree_records).get_record_type() == DISTANCED_SIMPLE_SNARL ){ + } else if (is_simple_snarl(SnarlTreeRecord(net, &snarl_tree_records).get_record_type())){ //If this is a simple snarl, then it can be a node, snarl, or chain //The depth of the snarl's parent chain @@ -3671,22 +3529,22 @@ size_t SnarlDistanceIndex::get_chain_component(const net_handle_t& net, bool get size_t SnarlDistanceIndex::SnarlTreeRecord::get_min_length() const { record_t type = get_record_type(); size_t val; - if (type == DISTANCED_NODE ) { + if (is_any_root(type)) { + throw runtime_error("error: trying to find the length of the root"); + } else if (!has_distances(type)) { + throw runtime_error("error: trying to access get distance in a distanceless index"); + } else if (is_node(type)) { return (*records)->at(record_offset + NODE_LENGTH_OFFSET); - } else if (type == DISTANCED_TRIVIAL_SNARL) { + } else if (is_trivial_snarl(type)) { size_t last_node_offset = TrivialSnarlRecord(record_offset, records).get_node_count()-1; return (*records)->at(record_offset + DISTANCED_TRIVIAL_SNARL_RECORD_SIZE + (last_node_offset*2) + 1); - } else if (type == DISTANCED_SNARL || type == OVERSIZED_SNARL) { - val = (*records)->at(record_offset + SNARL_MIN_LENGTH_OFFSET); - } else if (type == DISTANCED_SIMPLE_SNARL) { + } else if (is_simple_snarl(type)) { size_t raw_val = (*records)->at(record_offset + SIMPLE_SNARL_NODE_COUNT_AND_LENGTHS_OFFSET); return (raw_val >> 11) & ((1 << 11) - 1); - } else if (type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN) { + } else if (is_nonroot_nonsimple_snarl(type)) { + val = (*records)->at(record_offset + SNARL_MIN_LENGTH_OFFSET); + } else if (is_chain(type)) { val = (*records)->at(record_offset + CHAIN_MIN_LENGTH_OFFSET); - } else if (type == NODE || type == SNARL || type == CHAIN) { - throw runtime_error("error: trying to access get distance in a distanceless index"); - } else if (type == ROOT || type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL) { - throw runtime_error("error: trying to find the length of the root"); } else { throw runtime_error("error: trying to access a snarl tree node of the wrong type"); } @@ -3696,22 +3554,22 @@ size_t SnarlDistanceIndex::SnarlTreeRecord::get_min_length() const { size_t SnarlDistanceIndex::SnarlTreeRecord::get_max_length() const { record_t type = get_record_type(); size_t val; - if (type == DISTANCED_NODE ) { + if (is_any_root(type)) { + throw runtime_error("error: trying to find the length of the root"); + } else if (!has_distances(type)) { + throw runtime_error("error: trying to access get distance in a distanceless index"); + } else if (is_node(type)) { return (*records)->at(record_offset + NODE_LENGTH_OFFSET); - } else if (type == DISTANCED_TRIVIAL_SNARL) { + } else if (is_trivial_snarl(type)) { size_t last_node_offset = TrivialSnarlRecord(record_offset, records).get_node_count()-1; return (*records)->at(record_offset + DISTANCED_TRIVIAL_SNARL_RECORD_SIZE + (last_node_offset*2) + 1); - } else if (type == DISTANCED_SNARL || type == OVERSIZED_SNARL) { - val = (*records)->at(record_offset + SNARL_MAX_LENGTH_OFFSET); - } else if (type == DISTANCED_SIMPLE_SNARL) { + } else if (is_simple_snarl(type)) { size_t raw_val = (*records)->at(record_offset + SIMPLE_SNARL_NODE_COUNT_AND_LENGTHS_OFFSET); return raw_val & ((1 << 11) - 1); - } else if (type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN) { + } else if (is_nonroot_nonsimple_snarl(type)) { + val = (*records)->at(record_offset + SNARL_MAX_LENGTH_OFFSET); + } else if (is_chain(type)) { val = (*records)->at(record_offset + CHAIN_MAX_LENGTH_OFFSET); - } else if (type == NODE || type == SNARL || type == CHAIN) { - throw runtime_error("error: trying to access get distance in a distanceless index"); - } else if (type == ROOT || type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL) { - throw runtime_error("error: trying to find the length of the root"); } else { throw runtime_error("error: trying to access a snarl tree node of the wrong type"); } @@ -3721,18 +3579,16 @@ size_t SnarlDistanceIndex::SnarlTreeRecord::get_max_length() const { size_t SnarlDistanceIndex::SnarlTreeRecord::get_rank_in_parent() const { record_t type = get_record_type(); - if (type == NODE || type == DISTANCED_NODE) { + if (is_node(type)) { return (*records)->at(record_offset + NODE_RANK_OFFSET); - } else if (type == TRIVIAL_SNARL || type == DISTANCED_TRIVIAL_SNARL) { + } else if (is_trivial_snarl(type)) { throw runtime_error("error: node ranks need the node offsets"); - } else if (type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL) { + } else if (is_root_snarl(type)) { //For root snarls, the rank gets stored in the length slot return (*records)->at( record_offset + SNARL_MIN_LENGTH_OFFSET); - } else if (type == SNARL || type == DISTANCED_SNARL || type == OVERSIZED_SNARL - || type == SIMPLE_SNARL - || type == DISTANCED_SIMPLE_SNARL) { + } else if (is_nontrivial_nonroot_snarl(type)) { return record_offset; - } else if (type == CHAIN || type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN) { + } else if (is_chain(type)) { return (*records)->at(record_offset + CHAIN_RANK_OFFSET) >> 1; } else { throw runtime_error("error: trying to access a snarl tree node of the wrong type"); @@ -3740,16 +3596,14 @@ size_t SnarlDistanceIndex::SnarlTreeRecord::get_rank_in_parent() const { }; bool SnarlDistanceIndex::SnarlTreeRecord::get_is_reversed_in_parent() const { record_t type = get_record_type(); - if (type == NODE || type == DISTANCED_NODE) { + if (is_node(type)) { return false; - } else if (type == TRIVIAL_SNARL || type == DISTANCED_TRIVIAL_SNARL) { + } else if (is_trivial_snarl(type)) { cerr << "warning: Getting orientation of a trivial snarl" << endl; return false; - } else if (type == SNARL || type == DISTANCED_SNARL || type == OVERSIZED_SNARL - || type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL - || type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL) { + } else if (is_nontrivial_snarl(type)) { return false; - } else if (type == CHAIN || type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN) { + } else if (is_chain(type)) { return (*records)->at(record_offset + CHAIN_RANK_OFFSET) & 1; } else { throw runtime_error("error: trying to access a snarl tree node of the wrong type"); @@ -3760,22 +3614,21 @@ handlegraph::nid_t SnarlDistanceIndex::SnarlTreeRecord::get_start_id() const { record_t type = get_record_type(); if (type == ROOT) { throw runtime_error("error: trying to get the start node of the root"); - } else if (type == NODE || type == DISTANCED_NODE) { + } else if (is_node(type)) { //cerr << "warning: Looking for the start of a node" << endl; return (*records)->at(record_offset + NODE_ID_OFFSET); - } else if (type == TRIVIAL_SNARL || type == DISTANCED_TRIVIAL_SNARL) { + } else if (is_trivial_snarl(type)) { TrivialSnarlRecord trivial_snarl_record(record_offset, records); return trivial_snarl_record.get_node_id(0); - } else if (type == SNARL || type == DISTANCED_SNARL || type == OVERSIZED_SNARL - || type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL) { + } else if (is_nontrivial_nonroot_snarl(type)) { //To get the start node of a snarl, get the thing to the left of it in the chain ChainRecord parent_record (get_parent_record_offset(), records); - size_t node_offset = (type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL) ? 1 : 0; + size_t node_offset = is_simple_snarl(type) ? 1 : 0; net_handle_t next_node_in_chain = parent_record.get_next_child(get_net_handle_from_values(record_offset, START_END, SNARL_HANDLE, node_offset), true); return TrivialSnarlRecord(get_record_offset(next_node_in_chain), records).get_node_id(get_node_record_offset(next_node_in_chain)); - } else if (type == CHAIN || type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN) { + } else if (is_chain(type)) { return ((*records)->at(record_offset + CHAIN_START_NODE_OFFSET)) >> 1; - } else if (type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL) { + } else if (is_root_snarl(type)) { throw runtime_error("error: trying to find the start node of a root snarl"); } else { throw runtime_error("error: trying to access a snarl tree node of the wrong type"); @@ -3785,20 +3638,19 @@ bool SnarlDistanceIndex::SnarlTreeRecord::get_start_orientation() const { record_t type = get_record_type(); if (type == ROOT) { throw runtime_error("error: trying to get the start node of the root"); - } else if (type == NODE || type == DISTANCED_NODE || type == TRIVIAL_SNARL || type == DISTANCED_TRIVIAL_SNARL) { + } else if (is_node(type) || is_trivial_snarl(type)) { //cerr << "warning: Looking for the start of a node" << endl; return false; - } else if (type == SNARL || type == DISTANCED_SNARL || type == OVERSIZED_SNARL - || type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL) { + } else if (is_nontrivial_nonroot_snarl(type)) { ChainRecord parent_record (get_parent_record_offset(), records); - size_t node_offset = (type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL) ? 1 : 0; + size_t node_offset = is_simple_snarl(type) ? 1 : 0; //Get the next node in the chain (going left) //The handle will be pointing in the direction we just moved, so if it is going START_END, then it is reversed net_handle_t next_node_in_chain = parent_record.get_next_child(get_net_handle_from_values(record_offset, START_END, SNARL_HANDLE, node_offset), true); return get_end_endpoint( get_connectivity(next_node_in_chain)) == END; - } else if (type == CHAIN || type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN) { + } else if (is_chain(type)) { return ((*records)->at(record_offset + CHAIN_START_NODE_OFFSET)) & 1; - } else if (type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL) { + } else if (is_root_snarl(type)) { throw runtime_error("error: trying to find the start node of a root snarl"); }else { throw runtime_error("error: trying to access a snarl tree node of the wrong type"); @@ -3808,23 +3660,22 @@ handlegraph::nid_t SnarlDistanceIndex::SnarlTreeRecord::get_end_id() const { record_t type = get_record_type(); if (type == ROOT) { throw runtime_error("error: trying to get the end node of the root"); - } else if (type == NODE || type == DISTANCED_NODE ) { + } else if (is_node(type)) { //cerr << "warning: Looking for the end of a node" << endl; //Offset of the start of the node vector return (*records)->at(record_offset + NODE_ID_OFFSET); - } else if (type == TRIVIAL_SNARL || type == DISTANCED_TRIVIAL_SNARL) { + } else if (is_trivial_snarl(type)) { TrivialSnarlRecord trivial_snarl_record(record_offset, records); return trivial_snarl_record.get_node_id(trivial_snarl_record.get_node_count()-1); - } else if (type == SNARL || type == DISTANCED_SNARL || type == OVERSIZED_SNARL - || type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL) { + } else if (is_nontrivial_nonroot_snarl(type)) { //For a snarl, walk right in the chain ChainRecord parent_record (get_parent_record_offset(), records); - size_t node_offset = (type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL) ? 1 : 0; + size_t node_offset = is_simple_snarl(type) ? 1 : 0; net_handle_t next_node_in_chain = parent_record.get_next_child(get_net_handle_from_values(record_offset, START_END, SNARL_HANDLE, node_offset), false); return TrivialSnarlRecord(get_record_offset(next_node_in_chain), records).get_node_id(get_node_record_offset(next_node_in_chain)); - } else if (type == CHAIN || type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN) { + } else if (is_chain(type)) { return ((*records)->at(record_offset + CHAIN_END_NODE_OFFSET)) >> 1; - } else if (type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL) { + } else if (is_root_snarl(type)) { throw runtime_error("error: trying to find the end node of a root snarl"); } else { throw runtime_error("error: trying to access a snarl tree node of the wrong type"); @@ -3835,21 +3686,20 @@ bool SnarlDistanceIndex::SnarlTreeRecord::get_end_orientation() const { record_t type = get_record_type(); if (type == ROOT) { throw runtime_error("error: trying to get the end node of the root"); - } else if (type == NODE || type == DISTANCED_NODE || type == TRIVIAL_SNARL || type == DISTANCED_TRIVIAL_SNARL) { + } else if (is_node(type) || is_trivial_snarl(type)) { //cerr << "warning: Looking for the end of a node" << endl; //Offset of the start of the node vector return false; - } else if (type == SNARL || type == DISTANCED_SNARL || type == OVERSIZED_SNARL - || type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL) { + } else if (is_nonroot_nontrivial_snarl(type)) { ChainRecord parent_record (get_parent_record_offset(), records); - size_t node_offset = (type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL) ? 1 : 0; + size_t node_offset = is_simple_snarl(type) ? 1 : 0; //Get the next node in the chain (going right) net_handle_t next_node_in_chain = parent_record.get_next_child(get_net_handle_from_values(record_offset, START_END, SNARL_HANDLE, node_offset), false); //The handle will be pointing in the direction we just moved, so if it is going END_START, then it is reversed return get_end_endpoint( get_connectivity(next_node_in_chain)) == START; - } else if (type == CHAIN || type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN) { + } else if (is_chain(type)) { return ((*records)->at(record_offset + CHAIN_END_NODE_OFFSET)) & 1; - } else if (type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL) { + } else if (is_root_snarl(type)) { throw runtime_error("error: trying to find the end node of a root snarl"); } else { throw runtime_error("error: trying to access a snarl tree node of the wrong type"); @@ -3879,16 +3729,15 @@ size_t SnarlDistanceIndex::SnarlTreeRecord::get_parent_record_offset() const { record_t type = get_record_type(); if (type == ROOT) { return 0; - } else if (type == NODE || type == DISTANCED_NODE) { + } else if (is_node(type)) { return ((*records)->at(record_offset + NODE_PARENT_OFFSET)); - } else if (type == TRIVIAL_SNARL || type == DISTANCED_TRIVIAL_SNARL) { + } else if (is_trivial_snarl(type)) { return (*records)->at(record_offset + TRIVIAL_SNARL_PARENT_OFFSET); - } else if (type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL) { + } else if (is_simple_snarl(type)) { return (*records)->at(record_offset + SIMPLE_SNARL_PARENT_OFFSET); - } else if (type == SNARL || type == DISTANCED_SNARL || type == OVERSIZED_SNARL - || type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL) { + } else if (is_nonsimple_snarl(type)) { return ((*records)->at(record_offset + SNARL_PARENT_OFFSET)); - } else if (type == CHAIN || type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN) { + } else if (is_chain(type)) { return ((*records)->at(record_offset + CHAIN_PARENT_OFFSET)); } else { throw runtime_error("error: trying to access a snarl tree node of the wrong type"); @@ -3901,12 +3750,7 @@ SnarlDistanceIndex::SnarlTreeRecordWriter::SnarlTreeRecordWriter (size_t pointer #ifdef debug_distance_indexing record_t type = get_record_type(); - assert(type == ROOT || type == NODE || type == DISTANCED_NODE || - type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL || - type == TRIVIAL_SNARL || type == DISTANCED_TRIVIAL_SNARL || - type == SNARL || type == DISTANCED_SNARL || type == OVERSIZED_SNARL || - type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL || type == CHAIN || - type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN); + assert(is_any_nonchildren(type)); #endif } @@ -3916,12 +3760,7 @@ SnarlDistanceIndex::SnarlTreeRecordWriter::SnarlTreeRecordWriter (const net_hand records = tree_records; #ifdef debug_distance_indexing record_t type = get_record_type(); - assert(type == ROOT || type == NODE || type == DISTANCED_NODE || - type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL || - type == TRIVIAL_SNARL || type == DISTANCED_TRIVIAL_SNARL || - type == SNARL || type == DISTANCED_SNARL || type == OVERSIZED_SNARL || - type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL || type == CHAIN || - type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN); + assert(is_any_nonchildren(type)); #endif } @@ -4001,13 +3840,17 @@ void SnarlDistanceIndex::SnarlTreeRecordWriter::set_record_type(record_t type) { void SnarlDistanceIndex::SnarlTreeRecordWriter::set_min_length(size_t length) { record_t type = get_record_type(); size_t offset; - if (type == DISTANCED_NODE) { + if (!has_distances(type)) { + throw runtime_error("error: trying to access get distance in a distanceless index"); + } else if (is_any_root(type)) { + throw runtime_error("error: set the length of a root snarl"); + } else if (is_node(type)) { offset = record_offset + NODE_LENGTH_OFFSET; - } else if (type == DISTANCED_SNARL || type == OVERSIZED_SNARL) { + } else if (is_nonroot_nonsimple_snarl(type)) { offset = record_offset + SNARL_MIN_LENGTH_OFFSET; - } else if (type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN) { + } else if (is_chain(type)) { offset = record_offset + CHAIN_MIN_LENGTH_OFFSET; - } else if (type == DISTANCED_SIMPLE_SNARL) { + } else if (is_simple_snarl(type)) { if (length > (1 << 11)-1) { throw runtime_error("error: node length is too large"); } @@ -4015,10 +3858,6 @@ void SnarlDistanceIndex::SnarlTreeRecordWriter::set_min_length(size_t length) { size_t new_val = old_val | (length << 11); (*records)->at(record_offset + SIMPLE_SNARL_NODE_COUNT_AND_LENGTHS_OFFSET)= new_val; return; - } else if (type == NODE || type == SNARL || type == CHAIN ) { - throw runtime_error("error: trying to access get distance in a distanceless index"); - } else if (type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL) { - throw runtime_error("error: set the length of a root snarl"); } else { throw runtime_error("error: trying to access a snarl tree node of the wrong type"); } @@ -4032,11 +3871,15 @@ void SnarlDistanceIndex::SnarlTreeRecordWriter::set_min_length(size_t length) { void SnarlDistanceIndex::SnarlTreeRecordWriter::set_max_length(size_t length) { record_t type = get_record_type(); size_t offset; - if (type == DISTANCED_NODE) { + if (!has_distances(type)) { + throw runtime_error("error: trying to access get distance in a distanceless index"); + } else if (is_any_root(type)) { + throw runtime_error("error: set the length of a root snarl"); + } else if (is_node(type)) { throw runtime_error("error: set the max length of a node"); - } else if (type == DISTANCED_SNARL || type == OVERSIZED_SNARL) { + } else if (is_nonroot_nonsimple_snarl(type)) { offset = record_offset + SNARL_MAX_LENGTH_OFFSET; - } else if (type == DISTANCED_SIMPLE_SNARL) { + } else if (is_simple_snarl(type)) { if (length > (1 << 11)-1) { throw runtime_error("error: node length is too large"); } @@ -4044,12 +3887,8 @@ void SnarlDistanceIndex::SnarlTreeRecordWriter::set_max_length(size_t length) { size_t new_val = old_val | length; (*records)->at(record_offset + SIMPLE_SNARL_NODE_COUNT_AND_LENGTHS_OFFSET) = new_val; return; - } else if (type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN) { + } else if (is_chain(type)) { offset = record_offset + CHAIN_MAX_LENGTH_OFFSET; - } else if (type == DISTANCED_NODE || type == SNARL || type == CHAIN) { - throw runtime_error("error: trying to access get distance in a distanceless index"); - } else if (type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL) { - throw runtime_error("error: set the length of a root snarl"); } else { throw runtime_error("error: trying to access a snarl tree node of the wrong type"); } @@ -4064,17 +3903,15 @@ void SnarlDistanceIndex::SnarlTreeRecordWriter::set_max_length(size_t length) { void SnarlDistanceIndex::SnarlTreeRecordWriter::set_rank_in_parent(size_t rank) { record_t type = get_record_type(); size_t offset; - if (type == NODE || type == DISTANCED_NODE) { + if (is_node(type)) { offset = record_offset + NODE_RANK_OFFSET; - } else if (type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL) { + } else if (is_root_snarl(type)) { (*records)->at(record_offset + SNARL_MIN_LENGTH_OFFSET) = rank; return; - } else if (type == SNARL || type == DISTANCED_SNARL || type == OVERSIZED_SNARL - || type == SIMPLE_SNARL - || type == DISTANCED_SIMPLE_SNARL) { + } else if (is_nontrivial_nonroot_snarl(type)) { cerr << "SETTING THE RANK OF A SNARL WHICH I'M PRETTY SURE DOESN'T MEAN ANYTHING" << endl; return; - } else if (type == CHAIN || type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN) { + } else if (is_chain(type)) { offset = record_offset + CHAIN_RANK_OFFSET; } else { throw runtime_error("error: trying to access a snarl tree node of the wrong type"); @@ -4090,11 +3927,9 @@ void SnarlDistanceIndex::SnarlTreeRecordWriter::set_rank_in_parent(size_t rank) void SnarlDistanceIndex::SnarlTreeRecordWriter::set_is_reversed_in_parent(bool rev) { record_t type = get_record_type(); size_t offset; - if (type == SNARL || type == DISTANCED_SNARL || type == OVERSIZED_SNARL - || type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL - || type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL) { + if (is_nontrivial_snarl(type)) { return; - } else if (type == CHAIN || type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN) { + } else if (is_chain(type)) { offset = record_offset + CHAIN_RANK_OFFSET; } else { throw runtime_error("error: trying to access a snarl tree node of the wrong type"); @@ -4108,22 +3943,21 @@ void SnarlDistanceIndex::SnarlTreeRecordWriter::set_is_reversed_in_parent(bool r void SnarlDistanceIndex::SnarlTreeRecordWriter::set_parent_record_offset(size_t pointer){ record_t type = get_record_type(); size_t offset; - if (type == NODE || type == DISTANCED_NODE) { + if (is_node(type)) { offset = record_offset + NODE_PARENT_OFFSET; - } else if (type == TRIVIAL_SNARL || type == DISTANCED_TRIVIAL_SNARL) { + } else if (is_trivial_snarl(type)) { offset = record_offset + TRIVIAL_SNARL_PARENT_OFFSET; - } else if (type == SNARL || type == DISTANCED_SNARL || type == OVERSIZED_SNARL - || type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL) { + } else if (is_nonsimple_snarl(type)) { #ifdef debug_distance_indexing - if (type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL) { + if (is_root_snarl(type)) { assert(pointer == 0); } #endif offset = record_offset + SNARL_PARENT_OFFSET; - } else if (type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL) { + } else if (is_simple_snarl(type)) { offset = record_offset + SIMPLE_SNARL_PARENT_OFFSET; - } else if (type == CHAIN || type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN) { + } else if (is_chain(type)) { offset = record_offset + CHAIN_PARENT_OFFSET; } else { throw runtime_error("error: trying to access a snarl tree node of the wrong type"); @@ -4139,13 +3973,13 @@ void SnarlDistanceIndex::SnarlTreeRecordWriter::set_parent_record_offset(size_t void SnarlDistanceIndex::SnarlTreeRecordWriter::set_start_node(handlegraph::nid_t id, bool rev) { record_t type = get_record_type(); size_t offset; - if (type == ROOT || type == NODE || type == DISTANCED_NODE || type == TRIVIAL_SNARL || type == DISTANCED_TRIVIAL_SNARL) { + if (type == ROOT || is_node(type) || is_trivial_snarl(type)) { throw runtime_error("error: trying to set the start node id of a node or root"); - } else if (type == SNARL || type == DISTANCED_SNARL || type == OVERSIZED_SNARL || type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL) { + } else if (is_nonroot_nontrivial_snarl(type)) { throw runtime_error("error: trying to set the start node id of a snarl"); - } else if (type == CHAIN || type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN) { + } else if (is_chain(type)) { offset = record_offset + CHAIN_START_NODE_OFFSET; - } else if (type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL) { + } else if (is_root_snarl(type)) { throw runtime_error("error: set the start node of a root snarl"); } else { throw runtime_error("error: trying to access a snarl tree node of the wrong type"); @@ -4160,13 +3994,13 @@ void SnarlDistanceIndex::SnarlTreeRecordWriter::set_start_node(handlegraph::nid_ void SnarlDistanceIndex::SnarlTreeRecordWriter::set_end_node(handlegraph::nid_t id, bool rev) const { record_t type = get_record_type(); size_t offset; - if (type == ROOT || type == NODE || type == DISTANCED_NODE || type == TRIVIAL_SNARL || type == DISTANCED_TRIVIAL_SNARL) { + if (type == ROOT || is_node(type) || is_trivial_snarl(type)) { throw runtime_error("error: trying to set the node id of a node or root"); - } else if (type == SNARL || type == DISTANCED_SNARL || type == OVERSIZED_SNARL || type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL) { + } else if (is_nonroot_nontrivial_snarl(type)) { throw runtime_error("error: trying to set the end node id of a snarl"); - } else if (type == CHAIN || type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN) { + } else if (is_chain(type)) { offset = record_offset + CHAIN_END_NODE_OFFSET; - } else if (type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL) { + } else if (is_root_snarl(type)) { throw runtime_error("error: set the end node of a root snarl"); } else { throw runtime_error("error: trying to access a snarl tree node of the wrong type"); @@ -4208,7 +4042,7 @@ bool SnarlDistanceIndex::RootRecord::for_each_child(const std::functionsize() << " -> " << (*records)->size() + extra_size << endl; @@ -4409,7 +4246,7 @@ SnarlDistanceIndex::SnarlRecordWriter::SnarlRecordWriter (size_t node_count, bds set_node_count(node_count); set_record_type(type); - if (type == OVERSIZED_SNARL) { + if (is_oversized_snarl(type)) { set_vec_size(vec_size); } @@ -4440,10 +4277,20 @@ void SnarlDistanceIndex::SnarlRecordWriter::set_distance_end_end(size_t value) { size_t SnarlDistanceIndex::SnarlRecord::get_distance_vector_offset(size_t rank1, bool right_side1, size_t rank2, bool right_side2, size_t node_count, record_t type) { + if (!has_distances(type)) { + throw runtime_error("error: trying to access distance in a distanceless snarl tree"); + } + if (is_oversized_snarl(type)) { + throw runtime_error("error: trying to access distance matrix in an oversized snarl"); + } + if (!is_nonsimple_snarl(type)) { + throw runtime_error("error: trying to access distance matrix in something other than a snarl that would have one"); + } + //how many node sides in this snarl size_t node_side_count = node_count * 2; - if (type == DISTANCED_SNARL) { + if (!is_root_snarl(type)) { //For distances snarls, the ranks 0 and 1 are for the start and end nodes. The distance //matrix in the snarl record is only for distances between internal nodes, so 0 and 1 are //never stored and we decrement the ranks by 2 @@ -4469,15 +4316,9 @@ size_t SnarlDistanceIndex::SnarlRecord::get_distance_vector_offset(size_t rank1, rank2 = tmp; } - if (type == SNARL || type == ROOT_SNARL || type == OVERSIZED_SNARL) { - throw runtime_error("error: trying to access distance in a distanceless snarl tree"); - } else if (type == DISTANCED_SNARL || type == DISTANCED_ROOT_SNARL) { - //normal distance index - size_t k = node_side_count-rank1; - return (((node_side_count+1) * node_side_count)/2) - (((k+1)*k) / 2) + rank2 - rank1; - } else { - throw runtime_error("error: trying to distance from something that isn't a snarl"); - } + //normal distance index + size_t k = node_side_count-rank1; + return (((node_side_count+1) * node_side_count)/2) - (((k+1)*k) / 2) + rank2 - rank1; } size_t SnarlDistanceIndex::SnarlRecord::get_distance_vector_offset(size_t rank1, bool right_side1, @@ -4513,7 +4354,7 @@ void SnarlDistanceIndex::SnarlRecordWriter::set_distance(size_t rank1, bool righ get_distance(rank1, right_side1, rank2, right_side2) == distance)); #endif //Don't save internal distances for oversized snarls - if (get_record_type() == OVERSIZED_SNARL && !(rank1 == 0 || rank1 == 1 || rank2 == 0 || rank2 == 1)){ + if (is_oversized_snarl(get_record_type()) && !(rank1 == 0 || rank1 == 1 || rank2 == 0 || rank2 == 1)){ return; } @@ -4531,7 +4372,7 @@ size_t SnarlDistanceIndex::SnarlRecord::get_distance(size_t rank1, bool right_si if (!has_distances(get_record_type())) { throw runtime_error("error: trying to access get distance in a distanceless index"); } - if (get_record_type() == OVERSIZED_SNARL) { + if (is_oversized_snarl(get_record_type())) { throw runtime_error("error: trying to distance from an oversized snarl"); } @@ -4614,7 +4455,7 @@ SnarlDistanceIndex::SimpleSnarlRecord::SimpleSnarlRecord (size_t pointer, const node_rank = node; #ifdef debug_distance_indexing assert (node_rank >=2); - assert(get_record_type() == SIMPLE_SNARL || get_record_type() == DISTANCED_SIMPLE_SNARL); + assert(is_simple_snarl(get_record_type())); #endif } @@ -4625,7 +4466,7 @@ SnarlDistanceIndex::SimpleSnarlRecord::SimpleSnarlRecord (net_handle_t net, cons #ifdef debug_distance_indexing assert (node_rank >=2); - assert(get_record_type() == SIMPLE_SNARL || get_record_type() == DISTANCED_SIMPLE_SNARL); + assert(is_simple_snarl(get_record_type())); #endif } @@ -4787,7 +4628,7 @@ SnarlDistanceIndex::NodeRecord::NodeRecord (size_t pointer, size_t node_offset, records = tree_records; #ifdef debug_distance_indexing - assert(get_record_type() == NODE || get_record_type() == DISTANCED_NODE); + assert(is_node(get_record_type())); #endif } @@ -4797,7 +4638,7 @@ SnarlDistanceIndex::NodeRecord::NodeRecord (net_handle_t net, const bdsg::yomo:: #ifdef debug_distance_indexing assert(get_handle_type(net) == NODE_HANDLE || get_handle_type(net) == CHAIN_HANDLE); - assert(get_record_type() == NODE || get_record_type() == DISTANCED_NODE); + assert(is_node(get_record_type())); assert(get_connectivity(net) == START_END || get_connectivity(net) == END_START || get_connectivity(net) == START_START || get_connectivity(net) == END_END); #endif @@ -4857,7 +4698,7 @@ SnarlDistanceIndex::TrivialSnarlRecord::TrivialSnarlRecord (size_t offset, const record_offset = offset; #ifdef debug_distance_indexing - assert(get_record_type() == TRIVIAL_SNARL || get_record_type() == DISTANCED_TRIVIAL_SNARL); + assert(is_trivial_snarl(get_record_type())); //assert(get_connectivity(net) == START_END || get_connectivity(net) == END_START // || get_connectivity(net) == START_START || get_connectivity(net) == END_END); #endif @@ -4873,7 +4714,7 @@ tuple SnarlDistanceIndex::TrivialSnarlRecord::ge throw runtime_error("error: trying to access get distance in a distanceless index"); } #ifdef debug_distances - assert(get_record_type() == TRIVIAL_SNARL || get_record_type() == DISTANCED_TRIVIAL_SNARL); + assert(is_trivial_snarl(get_record_type())); #endif size_t prefix_sum = (*records)->at(record_offset+TRIVIAL_SNARL_PREFIX_SUM_OFFSET); size_t forward_loop = (*records)->at(record_offset+TRIVIAL_SNARL_FORWARD_LOOP_OFFSET); @@ -4923,7 +4764,7 @@ size_t SnarlDistanceIndex::TrivialSnarlRecord::get_max_prefix_sum(size_t node_ra throw runtime_error("error: trying to access get distance in a distanceless index"); } #ifdef debug_distances - assert(get_record_type() == TRIVIAL_SNARL || get_record_type() == DISTANCED_TRIVIAL_SNARL); + assert(is_trivial_snarl(get_record_type())); #endif size_t prefix_sum = (*records)->at(record_offset+TRIVIAL_SNARL_MAX_PREFIX_SUM_OFFSET); @@ -4942,7 +4783,7 @@ size_t SnarlDistanceIndex::TrivialSnarlRecord::get_prefix_sum(size_t node_rank) throw runtime_error("error: trying to access get distance in a distanceless index"); } #ifdef debug_distances - assert(get_record_type() == TRIVIAL_SNARL || get_record_type() == DISTANCED_TRIVIAL_SNARL); + assert(is_trivial_snarl(get_record_type())); #endif size_t prefix_sum = (*records)->at(record_offset+TRIVIAL_SNARL_PREFIX_SUM_OFFSET); prefix_sum = prefix_sum == 0 ? std::numeric_limits::max() : prefix_sum - 1; @@ -4958,7 +4799,7 @@ size_t SnarlDistanceIndex::TrivialSnarlRecord::get_forward_loop(size_t node_rank throw runtime_error("error: trying to access get distance in a distanceless index"); } #ifdef debug_distances - assert(get_record_type()== TRIVIAL_SNARL || get_record_type() == DISTANCED_TRIVIAL_SNARL); + assert(is_trivial_snarl(get_record_type())); #endif size_t forward_loop = (*records)->at(record_offset+TRIVIAL_SNARL_FORWARD_LOOP_OFFSET); forward_loop = forward_loop == 0 ? std::numeric_limits::max() : forward_loop - 1; @@ -4979,7 +4820,7 @@ size_t SnarlDistanceIndex::TrivialSnarlRecord::get_reverse_loop(size_t node_rank throw runtime_error("error: trying to access get distance in a distanceless index"); } #ifdef debug_distances - assert(get_record_type() == TRIVIAL_SNARL || get_record_type() == DISTANCED_TRIVIAL_SNARL); + assert(is_trivial_snarl(get_record_type())); #endif size_t reverse_loop = (*records)->at(record_offset+TRIVIAL_SNARL_REVERSE_LOOP_OFFSET); reverse_loop = reverse_loop == 0 ? std::numeric_limits::max() : reverse_loop - 1; @@ -5044,7 +4885,7 @@ SnarlDistanceIndex::NodeRecordWriter::NodeRecordWriter (size_t pointer, size_t n //Set the pointer for the node to this record #ifdef debug_distance_indexinging - assert (type == NODE || type == DISTANCED_NODE); + assert (is_node(type)); cerr << get_node_pointer_offset(node_id, (*records)->at(MIN_NODE_ID_OFFSET), @@ -5119,7 +4960,7 @@ SnarlDistanceIndex::TrivialSnarlRecordWriter::TrivialSnarlRecordWriter (size_t p TrivialSnarlRecord::record_offset = pointer; TrivialSnarlRecord::records = records; - assert (type == TRIVIAL_SNARL || type == DISTANCED_TRIVIAL_SNARL); + assert (is_trivial_snarl(type)); #ifdef debug_distance_indexing cerr << " Resizing array to add trivial snarl: length " << (*records)->size() << " -> " << @@ -5221,7 +5062,7 @@ SnarlDistanceIndex::ChainRecord::ChainRecord (size_t pointer, const bdsg::yomo:: record_t record_type = get_record_type(); if (type == SNARL_HANDLE) { // Simple snarls are also able to be looked at as chains, and ChainRecord knows how to parse them. - if (record_type == SIMPLE_SNARL || record_type == DISTANCED_SIMPLE_SNARL) { + if (is_simple_snarl(record_type)) { // This is allowed return; } @@ -5255,7 +5096,7 @@ SnarlDistanceIndex::ChainRecord::ChainRecord (net_handle_t net, const bdsg::yomo record_t record_type = get_record_type(); if (type == SNARL_HANDLE) { // Simple snarls are also able to be looked at as chains, and ChainRecord knows how to parse them. - if (record_type == SIMPLE_SNARL || record_type == DISTANCED_SIMPLE_SNARL) { + if (is_simple_snarl(record_type)) { // This is allowed return; } @@ -5347,7 +5188,7 @@ size_t SnarlDistanceIndex::ChainRecord::get_distance(size_t rank1, bool left_sid return std::numeric_limits::max(); } } - } else if (record_type != CHAIN && record_type != DISTANCED_CHAIN) { + } else if (!is_chain(record_type)) { std::cerr << "Warning: weird record type for chain: " << stringify(record_type) << std::endl; } @@ -5643,13 +5484,10 @@ net_handle_t SnarlDistanceIndex::ChainRecord::get_next_child(const net_handle_t& size_t next_pointer = get_record_offset(net_handle) + (go_left ? -(*records)->at(get_record_offset(net_handle)-2)-2 : (*records)->at(get_record_offset(net_handle)-1)+2); - if (SnarlDistanceIndex::get_record_type((*records)->at(next_pointer)) == SNARL || - SnarlDistanceIndex::get_record_type((*records)->at(next_pointer)) == DISTANCED_SNARL|| - SnarlDistanceIndex::get_record_type((*records)->at(next_pointer)) == OVERSIZED_SNARL) { + if (SnarlDistanceIndex::is_nonsimple_nonroot_snarl(SnarlDistanceIndex::get_record_type((*records)->at(next_pointer)))) { //If the next thing is a snarl, then just return the snarl going in the direction we just moved in return get_net_handle_from_values(next_pointer, (go_left ? END_START : START_END), SNARL_HANDLE); - } else if (SnarlDistanceIndex::get_record_type((*records)->at(next_pointer)) == SIMPLE_SNARL || - SnarlDistanceIndex::get_record_type((*records)->at(next_pointer)) == DISTANCED_SIMPLE_SNARL) { + } else if (SnarlDistanceIndex::is_simple_snarl(SnarlDistanceIndex::get_record_type((*records)->at(next_pointer)))) { //If the next thing is a snarl, then just return the snarl going in the direction we just moved in return get_net_handle_from_values(next_pointer, (go_left ? END_START : START_END), SNARL_HANDLE, 1); } else{ @@ -5709,9 +5547,7 @@ bool SnarlDistanceIndex::ChainRecord::for_each_child(const std::function* records){ #ifdef debug_distance_indexing - assert(type == CHAIN || - type == DISTANCED_CHAIN || - type == MULTICOMPONENT_CHAIN); + assert(is_chain(type)); #endif record_offset = pointer; records = records; @@ -5793,7 +5629,7 @@ SnarlDistanceIndex::SnarlRecordWriter SnarlDistanceIndex::ChainRecordWriter::add #ifdef debug_distance_indexing cerr << (*records)->size() << " Adding child snarl length to the end of the array " << endl; cerr << "Previous child was at " << previous_child_offset << endl; - assert(SnarlDistanceIndex::get_record_type((*records)->at(previous_child_offset))== DISTANCED_TRIVIAL_SNARL || SnarlDistanceIndex::get_record_type((*records)->at(previous_child_offset))== TRIVIAL_SNARL); + assert(SnarlDistanceIndex::is_trivial_snarl(SnarlDistanceIndex::get_record_type((*records)->at(previous_child_offset)))); #endif @@ -5826,7 +5662,7 @@ SnarlDistanceIndex::SimpleSnarlRecordWriter SnarlDistanceIndex::ChainRecordWrite size_t snarl_record_size = SIMPLE_SNARL_RECORD_SIZE + 2*snarl_size; #ifdef debug_distance_indexing cerr << (*records)->size() << " Adding simple snarl to the end of the array " << endl; - assert(SnarlDistanceIndex::get_record_type((*records)->at(previous_child_offset))== DISTANCED_TRIVIAL_SNARL || SnarlDistanceIndex::get_record_type((*records)->at(previous_child_offset))== TRIVIAL_SNARL); + assert(SnarlDistanceIndex::is_trivial_snarl(SnarlDistanceIndex::get_record_type((*records)->at(previous_child_offset)))); #endif @@ -5862,8 +5698,7 @@ size_t SnarlDistanceIndex::ChainRecordWriter::add_node(nid_t node_id, size_t nod #ifdef debug_distance_indexing cerr << "Adding new node to chain, with previous child at offset " << previous_child_offset << endl; #endif - if ((SnarlDistanceIndex::get_record_type((*records)->at(previous_child_offset)) == DISTANCED_TRIVIAL_SNARL || - SnarlDistanceIndex::get_record_type((*records)->at(previous_child_offset)) == TRIVIAL_SNARL) + if ((SnarlDistanceIndex::is_trivial_snarl(SnarlDistanceIndex::get_record_type((*records)->at(previous_child_offset)))) && (TrivialSnarlRecord(previous_child_offset, records).get_node_count() == MAX_TRIVIAL_SNARL_NODE_COUNT || new_record || reverse_loop == 0)) { //If the last thing was a trivial snarl and it is full, then finish it off @@ -5874,11 +5709,7 @@ size_t SnarlDistanceIndex::ChainRecordWriter::add_node(nid_t node_id, size_t nod } if (previous_child_offset == 0 - || SnarlDistanceIndex::get_record_type((*records)->at(previous_child_offset)) == SNARL - || SnarlDistanceIndex::get_record_type((*records)->at(previous_child_offset)) == DISTANCED_SNARL - || SnarlDistanceIndex::get_record_type((*records)->at(previous_child_offset)) == OVERSIZED_SNARL - || SnarlDistanceIndex::get_record_type((*records)->at(previous_child_offset)) == SIMPLE_SNARL - || SnarlDistanceIndex::get_record_type((*records)->at(previous_child_offset)) == DISTANCED_SIMPLE_SNARL + || SnarlDistanceIndex::is_nonroot_nontrivial_snarl(SnarlDistanceIndex::get_record_type((*records)->at(previous_child_offset)) || TrivialSnarlRecord(previous_child_offset, records).get_node_count() == MAX_TRIVIAL_SNARL_NODE_COUNT || reverse_loop == 0 || new_record) { //If the last thing was a snarl or nothing (previous_child_offset == 0, meaning that this is the @@ -5985,7 +5816,7 @@ string SnarlDistanceIndex::net_handle_as_string(const net_handle_t& net) const { net_handle_record_t record_type = record.get_record_handle_type(); string result = stringify(type) + " "; if (type == ROOT_HANDLE) { - if (record.get_record_type() == ROOT_SNARL || record.get_record_type() == DISTANCED_ROOT_SNARL) { + if (is_root_snarl(record.get_record_type())) { result += "root snarl"; return result; } else { @@ -6104,16 +5935,12 @@ void SnarlDistanceIndex::print_descendants_of(const net_handle_t net) const { parent = net_handle_as_string(get_parent(net)); if (record_type == CHAIN_HANDLE) { child_count = ChainRecord(net, &snarl_tree_records).get_node_count(); - } else if (record.get_record_type() == SNARL || - record.get_record_type() == DISTANCED_SNARL|| - record.get_record_type() == OVERSIZED_SNARL) { + } else if (is_nonroot_nonsimple_snarl(record.get_record_type())) { child_count = SnarlRecord(net, &snarl_tree_records).get_node_count(); - } else if (record.get_record_type() == TRIVIAL_SNARL || - record.get_record_type() == DISTANCED_TRIVIAL_SNARL) { + } else if (is_trivial_snarl(record.get_record_type())) { child_count = TrivialSnarlRecord(get_record_offset(net), &snarl_tree_records).get_node_count(); - } else if (record.get_record_type() == SIMPLE_SNARL || - record.get_record_type() == DISTANCED_SIMPLE_SNARL) { + } else if (is_simple_snarl(record.get_record_type())) { child_count = SimpleSnarlRecord(net, &snarl_tree_records).get_node_count(); } else { throw runtime_error("error: printing the wrong kind of record"); @@ -6138,13 +5965,10 @@ void SnarlDistanceIndex::print_snarl_stats() const { //Get the number of children depending on the type of record size_t child_count; - if (record.get_record_type() == SNARL || - record.get_record_type() == DISTANCED_SNARL || - record.get_record_type() == OVERSIZED_SNARL) { + if (is_nonroot_nonsimple_snarl(record.get_record_type())) { child_count = SnarlRecord(snarl_child, &snarl_tree_records).get_node_count(); - } else if (record.get_record_type() == SIMPLE_SNARL || - record.get_record_type() == DISTANCED_SIMPLE_SNARL) { + } else if (is_simple_snarl(record.get_record_type())) { child_count = SimpleSnarlRecord(snarl_child, &snarl_tree_records).get_node_count(); } else { throw runtime_error("error: getting the snarl child count of the wrong type of record"); @@ -6207,14 +6031,11 @@ void SnarlDistanceIndex::write_snarls_to_json() const { json_object_set_new(out_json, "parent", parent_json); //Get the number of children depending on the type of record - if (record.get_record_type() == SNARL || - record.get_record_type() == DISTANCED_SNARL|| - record.get_record_type() == OVERSIZED_SNARL) { + if (is_nonroot_nonsimple_snarl(record.get_record_type())) { size_t child_count = SnarlRecord(snarl_child, &snarl_tree_records).get_node_count(); json_object_set_new(out_json, "child_count", json_integer(child_count)); - } else if (record.get_record_type() == SIMPLE_SNARL || - record.get_record_type() == DISTANCED_SIMPLE_SNARL) { + } else if (is_simple_snarl(record.get_record_type())) { size_t child_count = SimpleSnarlRecord(snarl_child, &snarl_tree_records).get_node_count(); json_object_set_new(out_json, "child_count", json_integer(child_count)); } else { @@ -6222,8 +6043,8 @@ void SnarlDistanceIndex::write_snarls_to_json() const { } //Set the min and max length, if applicable - if (record.get_record_type() == DISTANCED_SNARL || - record.get_record_type() == DISTANCED_SIMPLE_SNARL){ + if (has_distances(record.get_record_type()) && is_nonroot_nontrivial_snarl(record.get_record_type()) && !is_oversized_snarl(record.get_record_type())){ + // TODO: May need a new accessor to match just snarls and simple snarls that have distances. json_object_set_new(out_json, "minimum_length", json_integer(minimum_length(snarl_child))); json_object_set_new(out_json, "maximum_length", json_integer(maximum_length(snarl_child))); } @@ -6506,12 +6327,12 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vectorsize(), record_type, temp_chain_record.prefix_sum.size(), &snarl_tree_records); chain_record_constructor.set_start_end_connected(); } else { - chain_record_constructor = ChainRecordWriter(snarl_tree_records->size(), MULTICOMPONENT_CHAIN, + chain_record_constructor = ChainRecordWriter(snarl_tree_records->size(), encode_chain(!ignore_distances, true), temp_chain_record.prefix_sum.size(), &snarl_tree_records); } chain_record_constructor.set_parent_record_offset( @@ -6609,8 +6430,8 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vector snarl_size_limit); SnarlRecordWriter snarl_record_constructor = chain_record_constructor.add_snarl(temp_snarl_record.node_count, record_type, temp_snarl_record.hub_labels.size(), last_child_offset.first); @@ -6627,7 +6448,7 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vectorsize(), 0, record_type, &snarl_tree_records, temp_node_record.node_id); node_record.set_node_id(temp_node_record.node_id); node_record.set_rank_in_parent(temp_chain_record.rank_in_parent); @@ -6810,7 +6632,7 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vectorget_snarl(current_record_index); record_to_offset.emplace(make_pair(temp_index_i,current_record_index), snarl_tree_records->size()); @@ -6863,7 +6685,7 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vectorget_node(current_record_index); bool ignore_distances = (snarl_size_limit == 0) || only_top_level_chain_distances; - record_t record_type = ignore_distances ? NODE : DISTANCED_NODE; + record_t record_type = encode_node(!ignore_distances); NodeRecordWriter node_record(snarl_tree_records->size(), 0, record_type, &snarl_tree_records, temp_node_record.node_id); node_record.set_node_id(temp_node_record.node_id); node_record.set_rank_in_parent(temp_node_record.rank_in_parent); @@ -6900,7 +6722,7 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vectorget_handle(record.get_start_id(), !record.get_start_orientation()); handle_t end_out = graph->get_handle(record.get_end_id(), record.get_end_orientation()); From 1522fa93e502fdd5f301fd7b4b048e49c52d53fd Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Fri, 20 Mar 2026 12:34:39 -0700 Subject: [PATCH 37/75] Fix non-Python build --- bdsg/include/bdsg/snarl_distance_index.hpp | 4 ++-- bdsg/src/snarl_distance_index.cpp | 20 ++++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/bdsg/include/bdsg/snarl_distance_index.hpp b/bdsg/include/bdsg/snarl_distance_index.hpp index 1fec46e4..60240c47 100644 --- a/bdsg/include/bdsg/snarl_distance_index.hpp +++ b/bdsg/include/bdsg/snarl_distance_index.hpp @@ -688,7 +688,7 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab } /// Make sure a record_t is a known type other than CHILDREN constexpr static bool is_any_nonchildren(record_t type) { - return is_any_root(type) || is_node(type) || is_chain(type) || is_nonroot_nontrivial_snarl(type) || is_trivival_snarl(type); + return is_any_root(type) || is_node(type) || is_chain(type) || is_nonroot_nontrivial_snarl(type) || is_trivial_snarl(type); } constexpr static record_t encode_root_snarl(bool has_distances) { @@ -705,7 +705,7 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab throw runtime_error("error: oversized snarl must have distances"); } if (is_regular) { - return REGULAR_OVERSIZED_SNARL; + return OVERSIZED_REGULAR_SNARL; } else { return OVERSIZED_SNARL; } diff --git a/bdsg/src/snarl_distance_index.cpp b/bdsg/src/snarl_distance_index.cpp index f8579c56..41377785 100644 --- a/bdsg/src/snarl_distance_index.cpp +++ b/bdsg/src/snarl_distance_index.cpp @@ -183,11 +183,11 @@ bool SnarlDistanceIndex::is_oversized_snarl(const net_handle_t& net) const { bool SnarlDistanceIndex::is_dag(const net_handle_t& snarl) const { record_t record_type = SnarlTreeRecord(snarl, &snarl_tree_records).get_record_type(); - if ( is_nontrivial_snarl(type) && !has_distances(type) ) { + if ( is_nontrivial_snarl(record_type) && !has_distances(record_type) ) { //If this is a snarl but didn't store distances cerr << "warning: checking if a snarl is a dag in an index without distances. Returning true" << endl; return true; - } else if (is_nonsimple_snarl(type)) { + } else if (is_nonsimple_snarl(record_type)) { //If this is any kind of non-simple snarl //(We already ruled out not having distances) @@ -573,7 +573,7 @@ SnarlDecomposition::endpoint_t SnarlDistanceIndex::ends_at(const net_handle_t& t size_t SnarlDistanceIndex::get_rank_in_parent(const net_handle_t& net) const { size_t tag = snarl_tree_records->at(get_record_offset(net)); - if (is_trivial_snarl(get_record_type(tag)) { + if (is_trivial_snarl(get_record_type(tag))) { return TrivialSnarlRecord(get_record_offset(net), &snarl_tree_records).get_rank_in_parent(get_node_record_offset(net)); } else if (is_simple_snarl(get_record_type(tag))) { if (is_snarl(net)) { @@ -3586,7 +3586,7 @@ size_t SnarlDistanceIndex::SnarlTreeRecord::get_rank_in_parent() const { } else if (is_root_snarl(type)) { //For root snarls, the rank gets stored in the length slot return (*records)->at( record_offset + SNARL_MIN_LENGTH_OFFSET); - } else if (is_nontrivial_nonroot_snarl(type)) { + } else if (is_nonroot_nontrivial_snarl(type)) { return record_offset; } else if (is_chain(type)) { return (*records)->at(record_offset + CHAIN_RANK_OFFSET) >> 1; @@ -3620,7 +3620,7 @@ handlegraph::nid_t SnarlDistanceIndex::SnarlTreeRecord::get_start_id() const { } else if (is_trivial_snarl(type)) { TrivialSnarlRecord trivial_snarl_record(record_offset, records); return trivial_snarl_record.get_node_id(0); - } else if (is_nontrivial_nonroot_snarl(type)) { + } else if (is_nonroot_nontrivial_snarl(type)) { //To get the start node of a snarl, get the thing to the left of it in the chain ChainRecord parent_record (get_parent_record_offset(), records); size_t node_offset = is_simple_snarl(type) ? 1 : 0; @@ -3641,7 +3641,7 @@ bool SnarlDistanceIndex::SnarlTreeRecord::get_start_orientation() const { } else if (is_node(type) || is_trivial_snarl(type)) { //cerr << "warning: Looking for the start of a node" << endl; return false; - } else if (is_nontrivial_nonroot_snarl(type)) { + } else if (is_nonroot_nontrivial_snarl(type)) { ChainRecord parent_record (get_parent_record_offset(), records); size_t node_offset = is_simple_snarl(type) ? 1 : 0; //Get the next node in the chain (going left) @@ -3667,7 +3667,7 @@ handlegraph::nid_t SnarlDistanceIndex::SnarlTreeRecord::get_end_id() const { } else if (is_trivial_snarl(type)) { TrivialSnarlRecord trivial_snarl_record(record_offset, records); return trivial_snarl_record.get_node_id(trivial_snarl_record.get_node_count()-1); - } else if (is_nontrivial_nonroot_snarl(type)) { + } else if (is_nonroot_nontrivial_snarl(type)) { //For a snarl, walk right in the chain ChainRecord parent_record (get_parent_record_offset(), records); size_t node_offset = is_simple_snarl(type) ? 1 : 0; @@ -3908,7 +3908,7 @@ void SnarlDistanceIndex::SnarlTreeRecordWriter::set_rank_in_parent(size_t rank) } else if (is_root_snarl(type)) { (*records)->at(record_offset + SNARL_MIN_LENGTH_OFFSET) = rank; return; - } else if (is_nontrivial_nonroot_snarl(type)) { + } else if (is_nonroot_nontrivial_snarl(type)) { cerr << "SETTING THE RANK OF A SNARL WHICH I'M PRETTY SURE DOESN'T MEAN ANYTHING" << endl; return; } else if (is_chain(type)) { @@ -5484,7 +5484,7 @@ net_handle_t SnarlDistanceIndex::ChainRecord::get_next_child(const net_handle_t& size_t next_pointer = get_record_offset(net_handle) + (go_left ? -(*records)->at(get_record_offset(net_handle)-2)-2 : (*records)->at(get_record_offset(net_handle)-1)+2); - if (SnarlDistanceIndex::is_nonsimple_nonroot_snarl(SnarlDistanceIndex::get_record_type((*records)->at(next_pointer)))) { + if (SnarlDistanceIndex::is_nonroot_nonsimple_snarl(SnarlDistanceIndex::get_record_type((*records)->at(next_pointer)))) { //If the next thing is a snarl, then just return the snarl going in the direction we just moved in return get_net_handle_from_values(next_pointer, (go_left ? END_START : START_END), SNARL_HANDLE); } else if (SnarlDistanceIndex::is_simple_snarl(SnarlDistanceIndex::get_record_type((*records)->at(next_pointer)))) { @@ -5709,7 +5709,7 @@ size_t SnarlDistanceIndex::ChainRecordWriter::add_node(nid_t node_id, size_t nod } if (previous_child_offset == 0 - || SnarlDistanceIndex::is_nonroot_nontrivial_snarl(SnarlDistanceIndex::get_record_type((*records)->at(previous_child_offset)) + || SnarlDistanceIndex::is_nonroot_nontrivial_snarl(SnarlDistanceIndex::get_record_type((*records)->at(previous_child_offset))) || TrivialSnarlRecord(previous_child_offset, records).get_node_count() == MAX_TRIVIAL_SNARL_NODE_COUNT || reverse_loop == 0 || new_record) { //If the last thing was a snarl or nothing (previous_child_offset == 0, meaning that this is the From 46aa0941b73b809879c7c0a6a7f239cbbe6d45b8 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Fri, 20 Mar 2026 13:58:39 -0700 Subject: [PATCH 38/75] Enable debugging and stop OR-ing in enum values --- bdsg/include/bdsg/snarl_distance_index.hpp | 52 +++++++++++++++------- bdsg/src/snarl_distance_index.cpp | 10 ++--- 2 files changed, 42 insertions(+), 20 deletions(-) diff --git a/bdsg/include/bdsg/snarl_distance_index.hpp b/bdsg/include/bdsg/snarl_distance_index.hpp index 60240c47..f82f9b6e 100644 --- a/bdsg/include/bdsg/snarl_distance_index.hpp +++ b/bdsg/include/bdsg/snarl_distance_index.hpp @@ -640,55 +640,77 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab // decomposable to flags, we use these accessors to look at facets of it. constexpr static bool has_distances(record_t type) { - return type == DISTANCED_NODE || type == DISTANCED_TRIVIAL_SNARL || type == DISTANCED_SIMPLE_SNARL + return type == DISTANCED_NODE + || type == DISTANCED_TRIVIAL_SNARL || type == DISTANCED_SIMPLE_SNARL || type == DISTANCED_REGULAR_SNARL || type == OVERSIZED_REGULAR_SNARL - || type == DISTANCED_SNARL || type == OVERSIZED_SNARL || type == DISTANCED_ROOT_SNARL + || type == DISTANCED_SNARL || type == OVERSIZED_SNARL + || type == DISTANCED_ROOT_SNARL || type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN; } constexpr static bool is_root_snarl(record_t type) { - return type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL; + return type == ROOT_SNARL + || type == DISTANCED_ROOT_SNARL; } constexpr static bool is_any_root(record_t type) { - return is_root_snarl(type) || type == ROOT; + return is_root_snarl(type) + || type == ROOT; } constexpr static bool is_node(record_t type) { - return type == NODE || type == DISTANCED_NODE; + return type == NODE + || type == DISTANCED_NODE; } constexpr static bool is_chain(record_t type) { - return type == CHAIN || type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN; + return type == CHAIN + || type == DISTANCED_CHAIN + || type == MULTICOMPONENT_CHAIN; } constexpr static bool is_trivial_snarl(record_t type) { - return type == TRIVIAL_SNARL || type == DISTANCED_TRIVIAL_SNARL; + return type == TRIVIAL_SNARL + || type == DISTANCED_TRIVIAL_SNARL; } constexpr static bool is_simple_snarl(record_t type) { - return type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL; + return type == SIMPLE_SNARL + || type == DISTANCED_SIMPLE_SNARL; } constexpr static bool is_oversized_snarl(record_t type) { - return type == OVERSIZED_SNARL || type == OVERSIZED_REGULAR_SNARL; + return type == OVERSIZED_SNARL + || type == OVERSIZED_REGULAR_SNARL; } /// Determine if a record type is a regular (but not simple or trivial) snarl. constexpr static bool is_regular_snarl(record_t type) { - return type == REGULAR_SNARL || type == DISTANCED_REGULAR_SNARL || OVERSIZED_REGULAR_SNARL; + return type == REGULAR_SNARL + || type == DISTANCED_REGULAR_SNARL + || type == OVERSIZED_REGULAR_SNARL; } /// Determine if a record type is a snarl that isn't also a root or a /// simple (or trivial) snarl. A "nonsimple" snarl is implicitly /// nontrivial. constexpr static bool is_nonroot_nonsimple_snarl(record_t type) { return is_regular_snarl(type) - || type == SNARL || type == DISTANCED_SNARL || type == OVERSIZED_SNARL; + || type == SNARL + || type == DISTANCED_SNARL + || type == OVERSIZED_SNARL; } constexpr static bool is_nonsimple_snarl(record_t type) { - return is_nonroot_nonsimple_snarl(type) || is_root_snarl(type); + return is_nonroot_nonsimple_snarl(type) + || is_root_snarl(type); } constexpr static bool is_nonroot_nontrivial_snarl(record_t type) { - return is_nonroot_nonsimple_snarl(type) || type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL; + return is_nonroot_nonsimple_snarl(type) + || type == SIMPLE_SNARL + || type == DISTANCED_SIMPLE_SNARL; } constexpr static bool is_nontrivial_snarl(record_t type) { - return is_nonroot_nontrivial_snarl(type) || is_root_snarl(type); + return is_nonroot_nontrivial_snarl(type) + || is_root_snarl(type); } /// Make sure a record_t is a known type other than CHILDREN constexpr static bool is_any_nonchildren(record_t type) { - return is_any_root(type) || is_node(type) || is_chain(type) || is_nonroot_nontrivial_snarl(type) || is_trivial_snarl(type); + return is_any_root(type) + || is_node(type) + || is_chain(type) + || is_nonroot_nontrivial_snarl(type) + || is_trivial_snarl(type); } constexpr static record_t encode_root_snarl(bool has_distances) { diff --git a/bdsg/src/snarl_distance_index.cpp b/bdsg/src/snarl_distance_index.cpp index 41377785..8473fc3a 100644 --- a/bdsg/src/snarl_distance_index.cpp +++ b/bdsg/src/snarl_distance_index.cpp @@ -1,4 +1,4 @@ -//#define debug_distance_indexing +#define debug_distance_indexing //#define debug_snarl_traversal //#define debug_distances //#define debug_parent @@ -3909,12 +3909,12 @@ void SnarlDistanceIndex::SnarlTreeRecordWriter::set_rank_in_parent(size_t rank) (*records)->at(record_offset + SNARL_MIN_LENGTH_OFFSET) = rank; return; } else if (is_nonroot_nontrivial_snarl(type)) { - cerr << "SETTING THE RANK OF A SNARL WHICH I'M PRETTY SURE DOESN'T MEAN ANYTHING" << endl; + cerr << "SETTING THE RANK OF A " << stringify(type) << " SNARL WHICH I'M PRETTY SURE DOESN'T MEAN ANYTHING" << endl; return; } else if (is_chain(type)) { offset = record_offset + CHAIN_RANK_OFFSET; } else { - throw runtime_error("error: trying to access a snarl tree node of the wrong type"); + throw runtime_error("error: trying to set rank of a snarl tree node of the wrong type: " + stringify(type)); } #ifdef debug_distance_indexing cerr << offset << " set rank in parent to be " << rank << endl; @@ -3982,7 +3982,7 @@ void SnarlDistanceIndex::SnarlTreeRecordWriter::set_start_node(handlegraph::nid_ } else if (is_root_snarl(type)) { throw runtime_error("error: set the start node of a root snarl"); } else { - throw runtime_error("error: trying to access a snarl tree node of the wrong type"); + throw runtime_error("error: trying to set start on a snarl tree node of the wrong type: " + stringify(type)); } #ifdef debug_distance_indexing cerr << offset << " set start node to be " << id << " facing " << (rev ? "rev" : "fd") << endl; @@ -4003,7 +4003,7 @@ void SnarlDistanceIndex::SnarlTreeRecordWriter::set_end_node(handlegraph::nid_t } else if (is_root_snarl(type)) { throw runtime_error("error: set the end node of a root snarl"); } else { - throw runtime_error("error: trying to access a snarl tree node of the wrong type"); + throw runtime_error("error: trying to set end on a snarl tree node of the wrong type: " + stringify(type)); } #ifdef debug_distance_indexing cerr << offset << " set end node to be " << id << " facing " << (rev ? "rev" : "fd") << endl; From efa47f2bceb68cbc7159fb0ddced9c478f6962a6 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Fri, 20 Mar 2026 14:20:50 -0700 Subject: [PATCH 39/75] Turn off debugging --- bdsg/src/snarl_distance_index.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bdsg/src/snarl_distance_index.cpp b/bdsg/src/snarl_distance_index.cpp index 8473fc3a..445d3077 100644 --- a/bdsg/src/snarl_distance_index.cpp +++ b/bdsg/src/snarl_distance_index.cpp @@ -1,4 +1,4 @@ -#define debug_distance_indexing +//#define debug_distance_indexing //#define debug_snarl_traversal //#define debug_distances //#define debug_parent From bb713f43f6e38fa57fc54948ff89c71ba65b05b0 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Fri, 20 Mar 2026 15:37:16 -0700 Subject: [PATCH 40/75] Try not to run a jillion threads during tests --- bdsg/src/vectorizable_overlays.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/bdsg/src/vectorizable_overlays.cpp b/bdsg/src/vectorizable_overlays.cpp index 71da1ff9..979cd2b4 100644 --- a/bdsg/src/vectorizable_overlays.cpp +++ b/bdsg/src/vectorizable_overlays.cpp @@ -173,16 +173,21 @@ void VectorizableOverlay::index_nodes_and_edges() { } } + // We limit threading on small inputs. + auto limited_threads = [&](size_t batch) { + return std::max(1, std::min(batch / 1024, get_thread_count())); + }; + // Make edge PMHF. Does its own threading. Do it first so we can drop the edge buffer. // note: we're mapping to 0-based rank, so need to add one after lookup edge_to_rank.reset(new boomphf::mphf, pair>, boomph_pair_pair_hash>( - edge_buffer.size(), edge_buffer, get_thread_count(), 2.0, false, false)); + edge_buffer.size(), edge_buffer, limited_threads(edge_buffer.size()), 2.0, false, false)); edge_buffer.clear(); // Make node PMHF. Does its own threading. // Note: we're mapping to 0-based rank, so need to add one after lookup node_to_rank.reset(new boomphf::mphf>(rank_to_node.size(), rank_to_node, - get_thread_count(), 2.0, false, false)); + limited_threads(rank_to_node.size()), 2.0, false, false)); // Add one slot to keep ranks in this table 1-based. From 00327bd7c92f61613265478c7bc44d2962f0cddf Mon Sep 17 00:00:00 2001 From: Zia Truong <194475824+electricEpilith@users.noreply.github.com> Date: Tue, 24 Mar 2026 09:23:50 -0700 Subject: [PATCH 41/75] Add get_snarl_child_count and use hub labels for oversized snarl distances Add O(1) get_snarl_child_count() that reads the stored count directly. Replace Dijkstra fallback for oversized snarl internal distances with hub label (HHL) queries. Co-Authored-By: Claude Sonnet 4.6 --- bdsg/include/bdsg/snarl_distance_index.hpp | 6 ++- bdsg/src/snarl_distance_index.cpp | 56 +++++++--------------- 2 files changed, 21 insertions(+), 41 deletions(-) diff --git a/bdsg/include/bdsg/snarl_distance_index.hpp b/bdsg/include/bdsg/snarl_distance_index.hpp index f82f9b6e..ebdf5ed1 100644 --- a/bdsg/include/bdsg/snarl_distance_index.hpp +++ b/bdsg/include/bdsg/snarl_distance_index.hpp @@ -427,9 +427,13 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab /// Returns true if the given net handle refers to (a traversal of) a regular snarl /// A regular snarl is the same as a simple snarl, except that the children may be /// nested chains, rather than being restricted to nodes, as long as the - /// nested chains don't allow reversals. + /// nested chains don't allow reversals. bool is_regular_snarl(const net_handle_t& net) const; + ///Returns the number of direct children of a snarl (not counting boundary nodes). + ///O(1) — reads the stored count directly from the record without iterating. + size_t get_snarl_child_count(const net_handle_t& net) const; + ///Returns true if the given net handle refers to (a traversal of) a chain. bool is_chain(const net_handle_t& net) const; ///Returns true if the given net handle refers to (a traversal of) a chain that is not start-end connected diff --git a/bdsg/src/snarl_distance_index.cpp b/bdsg/src/snarl_distance_index.cpp index 445d3077..786622f3 100644 --- a/bdsg/src/snarl_distance_index.cpp +++ b/bdsg/src/snarl_distance_index.cpp @@ -637,6 +637,15 @@ bool SnarlDistanceIndex::has_distances() const { return has_distances(get_node_net_handle(root_record.get_min_node_id())); } +size_t SnarlDistanceIndex::get_snarl_child_count(const net_handle_t& net) const { + record_t specific_type = SnarlTreeRecord(net, &snarl_tree_records).get_record_type(); + if (is_simple_snarl(specific_type)) { + return SimpleSnarlRecord(net, &snarl_tree_records).get_node_count(); + } else { + return SnarlRecord(net, &snarl_tree_records).get_node_count(); + } +} + bool SnarlDistanceIndex::for_each_child_impl(const net_handle_t& traversal, const std::function& iteratee) const { #ifdef debug_snarl_traversal cerr << "Go through children of " << net_handle_as_string(traversal) << endl; @@ -1405,48 +1414,15 @@ size_t SnarlDistanceIndex::distance_in_snarl(const net_handle_t& parent, if (get_record_type(snarl_tree_records->at(get_record_offset(parent))) == DISTANCED_SIMPLE_SNARL) { return SimpleSnarlRecord(parent, &snarl_tree_records).get_distance(rank1, right_side1, rank2, right_side2); - } else if (is_oversized_snarl(get_record_type(snarl_tree_records->at(get_record_offset(parent)))) + } else if (is_oversized_snarl(get_record_type(snarl_tree_records->at(get_record_offset(parent)))) && !(rank1 == 0 || rank1 == 1 || rank2 == 0 || rank2 == 1) ) { - //If this is an oversized snarl and we're looking for internal distances, then we didn't store the - //distance and we have to find it using dijkstra's algorithm - if (graph == nullptr) { - if (size_limit_warnings.load() < max_num_size_limit_warnings) { - int warning_num = const_cast(this)->size_limit_warnings++; - if (warning_num < max_num_size_limit_warnings) { - std::string msg = "warning: Trying to find the distance in an oversized snarl with zip codes. Returning inf\n"; - if (warning_num + 1 == max_num_size_limit_warnings) { - msg += "suppressing further warnings\n"; - } - std::cerr << msg; - } - } - return std::numeric_limits::max(); - } else { - net_handle_t net1 = get_snarl_child_from_rank(parent, rank1); - if (!right_side1) { - net1 = flip(net1); - } - net_handle_t net2 = get_snarl_child_from_rank(parent, rank2); - if (right_side2) { - net2 = flip(net2); - } - handle_t handle1 = get_handle(net1, graph); - handle_t handle2 = get_handle(net2, graph); + //If this is an oversized snarl and we're looking for internal distances, use the hub labels. + auto record_it = snarl_tree_records->begin() + get_record_offset(parent); + auto length_data_it = record_it + SNARL_RECORD_SIZE; + size_t from_port = bgid(rank1, !right_side1 ^ (rank1 == 0), true); + size_t to_port = bgid(rank2, right_side2, false); + return promote_distance(hhl_query(length_data_it + 1, from_port, to_port)); - size_t distance = std::numeric_limits::max(); - handlegraph::algorithms::dijkstra(graph, handle1, [&](const handle_t& reached, size_t dist) { - if (reached == handle2) { - distance = dist; - return false; - } else if (dist > distance_limit) { - distance = std::numeric_limits::max(); - return false; - } - return true; - }, false); - return distance; - } - } else if (rank1 == 0 && rank2 == 0 && !snarl_is_root) { //Start to start is stored in the snarl return SnarlRecord(parent, &snarl_tree_records).get_distance_start_start(); From 72eefa7dab8791c180bcb6238f67f497507c59a3 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Tue, 24 Mar 2026 18:16:15 -0400 Subject: [PATCH 42/75] Count simple and trivial snarls as regular because zip codes do --- bdsg/include/bdsg/snarl_distance_index.hpp | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/bdsg/include/bdsg/snarl_distance_index.hpp b/bdsg/include/bdsg/snarl_distance_index.hpp index ebdf5ed1..b76447b2 100644 --- a/bdsg/include/bdsg/snarl_distance_index.hpp +++ b/bdsg/include/bdsg/snarl_distance_index.hpp @@ -428,6 +428,8 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab /// A regular snarl is the same as a simple snarl, except that the children may be /// nested chains, rather than being restricted to nodes, as long as the /// nested chains don't allow reversals. + /// + /// Simple and trivial snarls also count as regular snarls. bool is_regular_snarl(const net_handle_t& net) const; ///Returns the number of direct children of a snarl (not counting boundary nodes). @@ -680,17 +682,25 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab return type == OVERSIZED_SNARL || type == OVERSIZED_REGULAR_SNARL; } - /// Determine if a record type is a regular (but not simple or trivial) snarl. - constexpr static bool is_regular_snarl(record_t type) { + /// Determine if a record type is a regular, but not a not simple (or + /// trivial), snarl. Root snarls cannot be regular. + constexpr static bool is_regular_nonsimple_snarl(record_t type) { return type == REGULAR_SNARL || type == DISTANCED_REGULAR_SNARL || type == OVERSIZED_REGULAR_SNARL; } + /// Determine if a record type is a regular snarl. Root snarls cannot be + /// regular. Counts simple and trivial snarls as regular. + constexpr static bool is_regular_snarl(record_t type) { + return is_regular_nonsimple_snarl(type) + || is_simple_snarl(type) + || is_trivial_snarl(type); + } /// Determine if a record type is a snarl that isn't also a root or a /// simple (or trivial) snarl. A "nonsimple" snarl is implicitly /// nontrivial. constexpr static bool is_nonroot_nonsimple_snarl(record_t type) { - return is_regular_snarl(type) + return is_regular_nonsimple_snarl(type) || type == SNARL || type == DISTANCED_SNARL || type == OVERSIZED_SNARL; @@ -1848,6 +1858,8 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab bool end_node_rev; bool is_trivial; bool is_simple; + /// Set to true if the snarl is regular (see SnarlDistanceIndex::is_regular_snarl()). + /// If is_simple is true, this must also be set to true when filling in the TemporarySnarlRecord. bool is_regular; bool is_tip = false; bool is_root_snarl = false; From 28d2d2bc28346b3efc5312359c70401f72541d89 Mon Sep 17 00:00:00 2001 From: Zia Truong <194475824+electricEpilith@users.noreply.github.com> Date: Mon, 30 Mar 2026 00:25:34 -0700 Subject: [PATCH 43/75] add back get_snarl_child_count Co-Authored-By: Claude Sonnet 4.6 --- bdsg/include/bdsg/snarl_distance_index.hpp | 4 ++++ bdsg/src/snarl_distance_index.cpp | 8 ++++++++ 2 files changed, 12 insertions(+) diff --git a/bdsg/include/bdsg/snarl_distance_index.hpp b/bdsg/include/bdsg/snarl_distance_index.hpp index 93ad7ec4..65e2edba 100644 --- a/bdsg/include/bdsg/snarl_distance_index.hpp +++ b/bdsg/include/bdsg/snarl_distance_index.hpp @@ -547,6 +547,9 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab ///For 0 or 1, returns the sentinel facing in. Otherwise return the child as a chain going START_END net_handle_t get_snarl_child_from_rank(const net_handle_t& snarl, const size_t& rank) const; + ///Get the number of children of a snarl (not counting boundary nodes) + size_t get_snarl_child_count(const net_handle_t& net) const; + /// Does this net handle store distances? bool has_distances(const net_handle_t& net) const; /// Does the distance index in general store distances? @@ -1734,6 +1737,7 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab bool end_node_rev; bool is_trivial; bool is_simple; + bool is_regular = false; bool is_tip = false; bool is_root_snarl = false; bool include_distances = true; diff --git a/bdsg/src/snarl_distance_index.cpp b/bdsg/src/snarl_distance_index.cpp index 40e12064..21b0706b 100644 --- a/bdsg/src/snarl_distance_index.cpp +++ b/bdsg/src/snarl_distance_index.cpp @@ -763,6 +763,14 @@ net_handle_t SnarlDistanceIndex::get_snarl_child_from_rank(const net_handle_t& s } } +size_t SnarlDistanceIndex::get_snarl_child_count(const net_handle_t& net) const { + if (is_simple_snarl(net)) { + return SimpleSnarlRecord(net, &snarl_tree_records).get_node_count(); + } else { + return SnarlRecord(net, &snarl_tree_records).get_node_count(); + } +} + bool SnarlDistanceIndex::has_distances(const net_handle_t& net) const { return has_distances(SnarlTreeRecord(net, &snarl_tree_records).get_record_type()); } From 5acf1f4d68aad0c97b7e22414bb441c9fe24e9d4 Mon Sep 17 00:00:00 2001 From: Zia Truong <194475824+electricEpilith@users.noreply.github.com> Date: Mon, 30 Mar 2026 00:40:41 -0700 Subject: [PATCH 44/75] Fix duplicate get_snarl_child_count declaration/definition from merge The merge of origin/hublabel-debug introduced duplicate declarations in the header (lines 439 and 550) and duplicate definitions in the .cpp (lines 631 and 648). Removed the older versions, keeping the ones from hublabel-debug which have better comments and slightly cleaner impl. Co-Authored-By: Claude Sonnet 4.6 --- bdsg/include/bdsg/snarl_distance_index.hpp | 3 --- bdsg/src/snarl_distance_index.cpp | 8 -------- 2 files changed, 11 deletions(-) diff --git a/bdsg/include/bdsg/snarl_distance_index.hpp b/bdsg/include/bdsg/snarl_distance_index.hpp index 1c23ee8b..8bab77d5 100644 --- a/bdsg/include/bdsg/snarl_distance_index.hpp +++ b/bdsg/include/bdsg/snarl_distance_index.hpp @@ -546,9 +546,6 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab ///For 0 or 1, returns the sentinel facing in. Otherwise return the child as a chain going START_END net_handle_t get_snarl_child_from_rank(const net_handle_t& snarl, const size_t& rank) const; - ///Get the number of children of a snarl (not counting boundary nodes) - size_t get_snarl_child_count(const net_handle_t& net) const; - /// Does this net handle store distances? bool has_distances(const net_handle_t& net) const; /// Does the distance index in general store distances? diff --git a/bdsg/src/snarl_distance_index.cpp b/bdsg/src/snarl_distance_index.cpp index 66048e2e..cea2c5bd 100644 --- a/bdsg/src/snarl_distance_index.cpp +++ b/bdsg/src/snarl_distance_index.cpp @@ -628,14 +628,6 @@ net_handle_t SnarlDistanceIndex::get_snarl_child_from_rank(const net_handle_t& s } } -size_t SnarlDistanceIndex::get_snarl_child_count(const net_handle_t& net) const { - if (is_simple_snarl(net)) { - return SimpleSnarlRecord(net, &snarl_tree_records).get_node_count(); - } else { - return SnarlRecord(net, &snarl_tree_records).get_node_count(); - } -} - bool SnarlDistanceIndex::has_distances(const net_handle_t& net) const { return has_distances(SnarlTreeRecord(net, &snarl_tree_records).get_record_type()); } From fdb9a74a18ddb053a456aabaead5f5dd180625a2 Mon Sep 17 00:00:00 2001 From: Zia <194475824+electricEpilith@users.noreply.github.com> Date: Wed, 1 Apr 2026 22:56:39 -0700 Subject: [PATCH 45/75] upgrade version number to 5 Co-Authored-By: Claude Sonnet 4.6 --- bdsg/include/bdsg/snarl_distance_index.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bdsg/include/bdsg/snarl_distance_index.hpp b/bdsg/include/bdsg/snarl_distance_index.hpp index 75c1db7d..14c864f2 100644 --- a/bdsg/include/bdsg/snarl_distance_index.hpp +++ b/bdsg/include/bdsg/snarl_distance_index.hpp @@ -825,9 +825,9 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab // While the version number is 4, store it in a bit masked way // to avoid getting confused with old indexes without version numbers // that start with component count - const static size_t CURRENT_VERSION_NUMBER = 4; + const static size_t CURRENT_VERSION_NUMBER = 5; // A verion to allow though but warn about - const static size_t WARN_VERSION_NUMBER = 3; + const static size_t WARN_VERSION_NUMBER = 4; /// Arbitrary large number which doens't overflow the number of bits we give const static size_t VERSION_NUMBER_SENTINEL = (1 << 10) - 1; From 09f9d6ed933aa7cf4ff54efbc4f04268443f9dba Mon Sep 17 00:00:00 2001 From: Zia Truong <194475824+electricEpilith@users.noreply.github.com> Date: Tue, 7 Apr 2026 12:31:57 -0700 Subject: [PATCH 46/75] fix conflict from accidentally doing the same version upgrade twice --- bdsg/include/bdsg/snarl_distance_index.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bdsg/include/bdsg/snarl_distance_index.hpp b/bdsg/include/bdsg/snarl_distance_index.hpp index 5d6f47fb..a8d3f85d 100644 --- a/bdsg/include/bdsg/snarl_distance_index.hpp +++ b/bdsg/include/bdsg/snarl_distance_index.hpp @@ -954,7 +954,7 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab // to avoid getting confused with old indexes without version numbers // that start with component count const static size_t CURRENT_VERSION_NUMBER = 5; - // A verion to allow though but warn about + // A version to allow though but warn about const static size_t WARN_VERSION_NUMBER = 4; /// Arbitrary large number which doens't overflow the number of bits we give const static size_t VERSION_NUMBER_SENTINEL = (1 << 10) - 1; From 2c3c0db592380193abbf47504952e75a937e17d3 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 22 Apr 2026 17:11:19 -0400 Subject: [PATCH 47/75] Add CHOverlay output --- bdsg/include/bdsg/ch.hpp | 6 ++++++ bdsg/src/ch.cpp | 27 +++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/bdsg/include/bdsg/ch.hpp b/bdsg/include/bdsg/ch.hpp index 1dd8ab7b..2b31d9e4 100644 --- a/bdsg/include/bdsg/ch.hpp +++ b/bdsg/include/bdsg/ch.hpp @@ -10,6 +10,8 @@ file for quickly playing around with stuff #include #include +#include + //#define debug_binary_intersection //#define debug_hhl_query @@ -77,6 +79,10 @@ typedef struct EdgeProp { typedef boost::adjacency_list CHOverlay; typedef boost::filtered_graph> ContractedGraph; +/// Allow outputting CHOverlay objects. Output text does not end with a +/// newline. +std::ostream& operator<<(std::ostream& out, const CHOverlay& ov); + /** * Build the intermediate hub labeling computation data structure ("Boost * graph") from a HashGraph. diff --git a/bdsg/src/ch.cpp b/bdsg/src/ch.cpp index ac2cfd2b..0c6c3a9e 100644 --- a/bdsg/src/ch.cpp +++ b/bdsg/src/ch.cpp @@ -42,6 +42,33 @@ NODE_UINT rev_bgid(NODE_UINT n) { return n ^ 1; } +std::ostream& operator<<(std::ostream& out, const CHOverlay& ov) { + out << "Vertices: " << num_vertices(ov) << ", Edges: " << num_edges(ov) << std::endl; + out << "--- Nodes ---" << std::endl; + for (auto v : boost::make_iterator_range(vertices(ov))) { + const NodeProp& np = ov[v]; + out << "Node " << v << ": seqlen=" << np.seqlen + << " max_out=" << np.max_out + << " contracted_neighbors=" << np.contracted_neighbors + << " level=" << np.level + << " arc_cover=" << np.arc_cover + << " contracted=" << (np.contracted ? "true" : "false") + // Skip new_id since it is not always initialized; it's only + // initialized when make_contraction_hierarchy is run. + << std::endl; + } + out << "--- Edges ---"; + for (auto e : boost::make_iterator_range(edges(ov))) { + const EdgeProp& ep = ov[e]; + out << std::endl << "Edge " << source(e, ov) << " -> " << target(e, ov) + << ": contracted=" << (ep.contracted ? "true" : "false") + << " weight=" << ep.weight + << " arc_cover=" << ep.arc_cover + << " ori=" << (ep.ori ? "true" : "false"); + // Make sure not to end with a newline. + } +} + CHOverlay make_boost_graph(const bdsg::HashGraph& hg) { NODE_UINT node_count = hg.get_node_count(); CHOverlay g(node_count*2); From a7602fd4a462ca617502640022c6f1dd9109b13f Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 22 Apr 2026 18:05:03 -0400 Subject: [PATCH 48/75] Address some of my own review comments --- .../bdsg/overlays/vectorizable_overlays.hpp | 4 + bdsg/include/bdsg/snarl_distance_index.hpp | 89 +++++++++++-------- bdsg/src/ch.cpp | 8 +- bdsg/src/snarl_distance_index.cpp | 27 +++--- bdsg/src/vectorizable_overlays.cpp | 4 +- 5 files changed, 71 insertions(+), 61 deletions(-) diff --git a/bdsg/include/bdsg/overlays/vectorizable_overlays.hpp b/bdsg/include/bdsg/overlays/vectorizable_overlays.hpp index cf53cedc..1f86a6d7 100644 --- a/bdsg/include/bdsg/overlays/vectorizable_overlays.hpp +++ b/bdsg/include/bdsg/overlays/vectorizable_overlays.hpp @@ -191,6 +191,10 @@ class VectorizableOverlay : virtual public VectorizableHandleGraph, virtual publ sdsl::bit_vector s_bv; sdsl::rank_support_v<1> s_bv_rank; sdsl::bit_vector::select_1_type s_bv_select; + + /// When doing multithreaded overlya construction, what's the minimum + /// number of items per thread? This limits thread count on small graphs. + static const size_t MIN_ITEMS_PER_THREAD; }; diff --git a/bdsg/include/bdsg/snarl_distance_index.hpp b/bdsg/include/bdsg/snarl_distance_index.hpp index a8d3f85d..c17ad549 100644 --- a/bdsg/include/bdsg/snarl_distance_index.hpp +++ b/bdsg/include/bdsg/snarl_distance_index.hpp @@ -647,6 +647,7 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab // Because the record_t encodes a complex taxonomy of snarls not *quite* // decomposable to flags, we use these accessors to look at facets of it. + /// Return true if records of the given type have stored distances. constexpr static bool has_distances(record_t type) { return type == DISTANCED_NODE || type == DISTANCED_TRIVIAL_SNARL || type == DISTANCED_SIMPLE_SNARL @@ -655,31 +656,39 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab || type == DISTANCED_ROOT_SNARL || type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN; } + /// Return true if the given record type represents a root snarl. constexpr static bool is_root_snarl(record_t type) { return type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL; } + /// Return true if the given record type represents a root or a root snarl. constexpr static bool is_any_root(record_t type) { return is_root_snarl(type) || type == ROOT; } + /// Return true if the given record type represents a node. constexpr static bool is_node(record_t type) { return type == NODE || type == DISTANCED_NODE; } + /// Return true if the given record type represents a chain. constexpr static bool is_chain(record_t type) { return type == CHAIN || type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN; } + /// Return true if the given record type represents a trivial snarl. constexpr static bool is_trivial_snarl(record_t type) { return type == TRIVIAL_SNARL || type == DISTANCED_TRIVIAL_SNARL; } + /// Return true if the given record type represents a simple (but not a + /// trivial) snarl. constexpr static bool is_simple_snarl(record_t type) { return type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL; } + /// Return true if the given record type represents an oversized snarl. constexpr static bool is_oversized_snarl(record_t type) { return type == OVERSIZED_SNARL || type == OVERSIZED_REGULAR_SNARL; @@ -707,15 +716,21 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab || type == DISTANCED_SNARL || type == OVERSIZED_SNARL; } + /// Return true if the given record type represents a snarl that is not + /// simple or trivial. constexpr static bool is_nonsimple_snarl(record_t type) { return is_nonroot_nonsimple_snarl(type) || is_root_snarl(type); } + /// Return true if the given record type represents a snarl that is not + /// simple or trivial, and also isn't a root snarl. constexpr static bool is_nonroot_nontrivial_snarl(record_t type) { return is_nonroot_nonsimple_snarl(type) || type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL; } + /// Return true if the given record type represents a snarl that is not + /// trivial. constexpr static bool is_nontrivial_snarl(record_t type) { return is_nonroot_nontrivial_snarl(type) || is_root_snarl(type); @@ -728,15 +743,19 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab || is_nonroot_nontrivial_snarl(type) || is_trivial_snarl(type); } - + + /// Encode the type of a root snarl that may or may not have distances. constexpr static record_t encode_root_snarl(bool has_distances) { return has_distances ? DISTANCED_ROOT_SNARL : ROOT_SNARL; } - + + /// Encode the type of a simple snarl that may or may not have distances. constexpr static record_t encode_simple_snarl(bool has_distances) { return has_distances ? DISTANCED_SIMPLE_SNARL : SIMPLE_SNARL; } + /// Encode the type of a snarl that isn't a root snarl or a simple (or trivial) snarl. + /// It may have distances, it may be regular, and it may be oversized. constexpr static record_t encode_nonroot_nonsimple_snarl(bool has_distances, bool is_regular, bool is_oversized) { if (is_oversized) { if (!has_distances) { @@ -753,11 +772,14 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab return has_distances ? DISTANCED_SNARL : SNARL; } } - + + /// Encode the type of a node that may or may not have distances. constexpr static record_t encode_node(bool has_distances) { return has_distances ? DISTANCED_NODE : NODE; } - + + /// Encode the type of a chain. + /// It may have distances, and it may be a multicomponent chain. constexpr static record_t encode_chain(bool has_distances, bool is_multicomponent) { if (is_multicomponent) { if (!has_distances) { @@ -769,10 +791,6 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab } } - - - - ///Given the type of the record, return the handle type. Some record types can represent multiple things, ///for example a simple snarl record is used to represent a snarl, and the nodes/trivial chains in it. ///This will return whatever is higher on the snarl tree. A simple snarl will be considered a snarl, @@ -1917,17 +1935,18 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab /// that some positions in the vector are empty temporary indexes for /// nonexistent nodes. vector temp_node_records; - + + /// Look up a chain from a temporary record reference. + /// Throws an error if the reference is not to a chain or is out of bounds. inline TemporaryChainRecord& get_chain(const temp_record_ref_t& ref) { - if (ref.first != TEMP_CHAIN) { - throw std::invalid_argument("Trying to look up a non-chain as a chain"); - } - if (ref.second >= temp_chain_records.size()) { - throw std::out_of_range("Trying to look up chain " + std::to_string(ref.second) + " but temporary index only has " + std::to_string(temp_chain_records.size()) + " chains"); - } - return temp_chain_records[ref.second]; + // Delegate to the const version and un-const the result. See + // + return const_cast(std::as_const(*this).get_chain(ref)); } + /// Look up a chain from a temporary record reference. + /// Throws an error if the reference is not to a chain or is out of bounds. + /// This version can be used when the object is const. inline const TemporaryChainRecord& get_chain(const temp_record_ref_t& ref) const { if (ref.first != TEMP_CHAIN) { throw std::invalid_argument("Trying to look up a non-chain as a chain"); @@ -1937,17 +1956,16 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab } return temp_chain_records[ref.second]; } - + + /// Look up a snarl from a temporary record reference. + /// Throws an error if the reference is not to a snarl or is out of bounds. inline TemporarySnarlRecord& get_snarl(const temp_record_ref_t& ref) { - if (ref.first != TEMP_SNARL) { - throw std::invalid_argument("Trying to look up a non-snarl as a snarl"); - } - if (ref.second >= temp_snarl_records.size()) { - throw std::out_of_range("Trying to look up snarl " + std::to_string(ref.second) + " but temporary index only has " + std::to_string(temp_snarl_records.size()) + " snarls"); - } - return temp_snarl_records[ref.second]; + return const_cast(std::as_const(*this).get_snarl(ref)); } - + + /// Look up a snarl from a temporary record reference. + /// Throws an error if the reference is not to a snarl or is out of bounds. + /// This version can be used when the object is const. inline const TemporarySnarlRecord& get_snarl(const temp_record_ref_t& ref) const { if (ref.first != TEMP_SNARL) { throw std::invalid_argument("Trying to look up a non-snarl as a snarl"); @@ -1957,21 +1975,16 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab } return temp_snarl_records[ref.second]; } - + + /// Look up a node from a temporary record reference. + /// Throws an error if the reference is not to a node or is out of bounds. inline TemporaryNodeRecord& get_node(const temp_record_ref_t& ref) { - if (ref.first != TEMP_NODE) { - throw std::invalid_argument("Trying to look up a non-node as a node"); - } - if (ref.second < min_node_id) { - throw std::out_of_range("Trying to look up node " + std::to_string(ref.second) + " but temporary index starts at node " + std::to_string(min_node_id)); - } - if (ref.second >= temp_node_records.size() + min_node_id) { - throw std::out_of_range("Trying to look up node " + std::to_string(ref.second) + " but temporary index only goes up until node " + std::to_string(temp_node_records.size() + min_node_id)); - } - // Nodes use a node ID in the ref, not an index. - return temp_node_records[ref.second - min_node_id]; + return const_cast(std::as_const(*this).get_node(ref)); } - + + /// Look up a node from a temporary record reference. + /// Throws an error if the reference is not to a node or is out of bounds. + /// This version can be used when the object is const. inline const TemporaryNodeRecord& get_node(const temp_record_ref_t& ref) const { if (ref.first != TEMP_NODE) { throw std::invalid_argument("Trying to look up a non-node as a node"); diff --git a/bdsg/src/ch.cpp b/bdsg/src/ch.cpp index 0c6c3a9e..21276c15 100644 --- a/bdsg/src/ch.cpp +++ b/bdsg/src/ch.cpp @@ -67,6 +67,7 @@ std::ostream& operator<<(std::ostream& out, const CHOverlay& ov) { << " ori=" << (ep.ori ? "true" : "false"); // Make sure not to end with a newline. } + return out; } CHOverlay make_boost_graph(const bdsg::HashGraph& hg) { @@ -743,13 +744,6 @@ ItrType get_dist_itr(ItrType start_itr, ItrType hub_itr) { void down_dijk(int node, CHOverlay& ov, vector& node_dists, vector>& labels, vector>& labels_back) { auto in_node = node; - // TODO: We used to add -ov[node].seqlen to labels_back[node] for the hub - // ov[node].new_id. But this involved doing unsigned overflow shenanigans, - // and gave us values in the labels that are maximally wide and can't later - // be packed into the reduced bit width in a SnarlDistanceIndex. - // - // The tests didn't seem to cover a case where these entries were needed, so - // we just don't do that anymore. std::priority_queue, vector>, greater>> q; auto [_, __] = out_edges(in_node, ov); diff --git a/bdsg/src/snarl_distance_index.cpp b/bdsg/src/snarl_distance_index.cpp index cea2c5bd..42df7fb4 100644 --- a/bdsg/src/snarl_distance_index.cpp +++ b/bdsg/src/snarl_distance_index.cpp @@ -387,8 +387,6 @@ net_handle_t SnarlDistanceIndex::get_parent(const net_handle_t& child) const { //If this is a simple snarl and a node or chain, then the parent offset doesn't change if (get_handle_type(child) == NODE_HANDLE) { // If this is a node, then return it as a chain - // TODO: Why can a simple snarl need to look like a node itself? - // TODO: Why can a simple snarl need to look like a chain? Because the node needs to look like a chain? #ifdef debug_parent std::cerr << "We were looking at a simple snarl as a node; project it as a chain." << std::endl; #endif @@ -450,6 +448,8 @@ net_handle_t SnarlDistanceIndex::get_bound(const net_handle_t& snarl, bool get_e // snarl record looking like a chain (maybe because the node it was // looking like needs to look like a chain now). ChainRecord promises // to know how to interpret all of them. + // TODO: the concepts involved in things looking like other things + // should be documented somewhere. ChainRecord chain_record(snarl, &snarl_tree_records); size_t offset; size_t node_offset; @@ -1062,11 +1062,12 @@ size_t SnarlDistanceIndex::distance_in_parent(const net_handle_t& parent, cerr << "\t\tChild parents are " << net_handle_as_string(canonical(child1_parent)) << " and " << net_handle_as_string(canonical(child2_parent)) << endl; if (canonical(parent) != canonical(child1_parent) || canonical(parent) != canonical(child2_parent)) { - std::cerr << "Error: parent mismatch!" << std::endl; - std::cerr << as_integer(canonical(parent)) << " = " << net_handle_as_string(canonical(parent)) << std::endl; - std::cerr << as_integer(canonical(child1_parent)) << " = " << net_handle_as_string(canonical(child1_parent)) << std::endl; - std::cerr << as_integer(canonical(child2_parent)) << " = " << net_handle_as_string(canonical(child2_parent)) << std::endl; - assert(false); + std::stringstream ss; + ss << "Error: parent mismatch!" << std::endl; + ss << as_integer(canonical(parent)) << " = " << net_handle_as_string(canonical(parent)) << std::endl; + ss << as_integer(canonical(child1_parent)) << " = " << net_handle_as_string(canonical(child1_parent)) << std::endl; + ss << as_integer(canonical(child2_parent)) << " = " << net_handle_as_string(canonical(child2_parent)) << std::endl; + throw std::runtime_error(ss.str()); } #endif @@ -1127,9 +1128,8 @@ size_t SnarlDistanceIndex::distance_in_parent(const net_handle_t& parent, } else if (is_chain(parent)) { if (get_record_handle_type(get_record_type(snarl_tree_records->at(get_record_offset(parent)))) == NODE_HANDLE || get_record_handle_type(get_record_type(snarl_tree_records->at(get_record_offset(parent)))) == SNARL_HANDLE) { - // TODO: Why would this happen? #ifdef debug_distances - std::cerr << "=>They are not reachable because this chain is really a node or snarl(?!)" << std::endl; + std::cerr << "=>They are not reachable because this \"chain\" is really a node or snarl" << std::endl; #endif return std::numeric_limits::max(); } @@ -1308,10 +1308,7 @@ size_t SnarlDistanceIndex::distance_in_parent(const net_handle_t& parent, // appears in us. (So even not-reversed won't mean local forward // orientation if is_reversed_in_parent() is true for that child). // - // TODO: Probably need to also flip for is_reversed_in_parent() to - // account for this. - // - // TODO: dir1 and dir2 aren't just normal is_reverse flags. + // Note that dir1 and dir2 aren't just normal is_reverse flags. // // For a sentinel rank 1 (end node) as rank1, dir1 false needs to mean into the snarl (so start of end node, reverse strand). // For a sentinel rank 0 (start node) as rank1, dir1 false needs to mean into the snarl (so end of start node, forward strand). @@ -4150,7 +4147,7 @@ SnarlDistanceIndex::SnarlRecord::SnarlRecord (net_handle_t net, const bdsg::yomo size_t SnarlDistanceIndex::SnarlRecord::distance_vector_size(record_t type, size_t node_count) { if (!is_nonsimple_snarl(type)) { - throw runtime_error("error: this is not a snarl"); + throw runtime_error("error: trying to get size of distance matrix for something other than a snarl that would have one"); } if (has_distances(type)) { if (is_oversized_snarl(type)) { @@ -5212,7 +5209,7 @@ size_t SnarlDistanceIndex::ChainRecord::get_distance(size_t rank1, bool left_sid if (rank1 == rank2) { distance = reverse_loop1; #ifdef debug_distances - std::cerr << "Distance on left slef loop is " << distance << std::endl; + std::cerr << "Distance on left self loop is " << distance << std::endl; #endif } else { diff --git a/bdsg/src/vectorizable_overlays.cpp b/bdsg/src/vectorizable_overlays.cpp index 979cd2b4..d480f228 100644 --- a/bdsg/src/vectorizable_overlays.cpp +++ b/bdsg/src/vectorizable_overlays.cpp @@ -4,6 +4,8 @@ namespace bdsg { +const size_t VectorizableOverlay::MIN_ITEMS_PER_THREAD = 1024; + VectorizableOverlay::VectorizableOverlay(const HandleGraph* graph) : underlying_graph(graph) { assert(underlying_graph != nullptr); @@ -175,7 +177,7 @@ void VectorizableOverlay::index_nodes_and_edges() { // We limit threading on small inputs. auto limited_threads = [&](size_t batch) { - return std::max(1, std::min(batch / 1024, get_thread_count())); + return std::max(1, std::min(batch / MIN_ITEMS_PER_THREAD, get_thread_count())); }; // Make edge PMHF. Does its own threading. Do it first so we can drop the edge buffer. From a169ecaa26e899049d71c4856700c0f384b28d08 Mon Sep 17 00:00:00 2001 From: Zia <194475824+electricEpilith@users.noreply.github.com> Date: Sun, 26 Apr 2026 14:07:09 -0700 Subject: [PATCH 49/75] prevent sdsl conflicts planned by Claude Opus 4.7 --- CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8104de01..7d599be8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -160,7 +160,9 @@ include(ExternalProject) # sdsl-lite (gives an "sdsl" target) set(BUILD_SHARED_LIBS ON CACHE BOOL "Build sdsl-lite shared libraries") -add_subdirectory("${bdsg_DIR}/deps/sdsl-lite") +if (NOT TARGET sdsl) + add_subdirectory("${bdsg_DIR}/deps/sdsl-lite") +endif() if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin") # It produces divsufsort and divsufsort64 targets that don't know they need OMP on Mac. set_target_properties(divsufsort PROPERTIES LINK_FLAGS "-lomp") From 531e77e030d7a1b53d7f558df2b89fafcfc887de Mon Sep 17 00:00:00 2001 From: Zia <194475824+electricEpilith@users.noreply.github.com> Date: Sun, 26 Apr 2026 14:09:47 -0700 Subject: [PATCH 50/75] prevent libhandlegraph conflicts planned by Claude Opus 4.7 --- CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7d599be8..8cceb015 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -189,7 +189,9 @@ elseif (TARGET handlegraph_objs) message("Using libhandlegraph built by another CMake") else () message("Using bundled libhandlegraph") - add_subdirectory("${bdsg_DIR}/deps/libhandlegraph") + if (NOT TARGET handlegraph_shared AND NOT TARGET handlegraph) + add_subdirectory("${bdsg_DIR}/deps/libhandlegraph") + endif() endif() From 5aff1a5f2562a6a0cf6332191c6645a5bfc746e8 Mon Sep 17 00:00:00 2001 From: Zia <194475824+electricEpilith@users.noreply.github.com> Date: Sun, 26 Apr 2026 14:11:00 -0700 Subject: [PATCH 51/75] prevent hopscotch conflicts planned by Claude Opus 4.7 --- CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8cceb015..478bbb1b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -170,7 +170,9 @@ if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin") endif() # hopscotch_map (required by DYNAMIC, gives a "tsl::hopscotch_map" target) -add_subdirectory("${bdsg_DIR}/deps/hopscotch-map") +if (NOT TARGET tsl::hopscotch_map) + add_subdirectory("${bdsg_DIR}/deps/hopscotch-map") +endif() # DYNAMIC (header only) # Does not ship its own install step or define a target, so we make our own target From 789ad6daf74192b477fa49fb066a8cf0df76a16d Mon Sep 17 00:00:00 2001 From: Zia <194475824+electricEpilith@users.noreply.github.com> Date: Sun, 26 Apr 2026 14:12:06 -0700 Subject: [PATCH 52/75] prevent mio conflicts planned by Claude Opus 4.7 --- CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 478bbb1b..c97ac6e7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -209,7 +209,9 @@ add_library(sparsepp INTERFACE) target_include_directories(sparsepp INTERFACE "${bdsg_DIR}/deps/sparsepp/") # mio (header only) -add_subdirectory("${bdsg_DIR}/deps/mio") +if (NOT TARGET mio::mio) + add_subdirectory("${bdsg_DIR}/deps/mio") +endif() if (BUILD_PYTHON_BINDINGS) From 78584c4483c96879212618a1b5d10bc18b2c81ab Mon Sep 17 00:00:00 2001 From: electricEpilith <194475824+electricEpilith@users.noreply.github.com> Date: Fri, 8 May 2026 15:07:22 -0700 Subject: [PATCH 53/75] Update bdsg/include/bdsg/internal/indexing_iterator.hpp fix typos Co-authored-by: Adam Novak --- bdsg/include/bdsg/internal/indexing_iterator.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bdsg/include/bdsg/internal/indexing_iterator.hpp b/bdsg/include/bdsg/internal/indexing_iterator.hpp index 447d09d7..71f7c3f7 100644 --- a/bdsg/include/bdsg/internal/indexing_iterator.hpp +++ b/bdsg/include/bdsg/internal/indexing_iterator.hpp @@ -86,7 +86,7 @@ class IndexingIterator { bool operator>(const IndexingIterator& other) const; /// Determine if this iterator is at or after another. - /// Result is undefined if itrators are to different collecitons. + /// Result is undefined if iterators are to different collections. bool operator>=(const IndexingIterator& other) const; private: From d6dffddccfde938817e8e472f6c4b99ec2fb5ef9 Mon Sep 17 00:00:00 2001 From: electricEpilith <194475824+electricEpilith@users.noreply.github.com> Date: Fri, 8 May 2026 15:08:30 -0700 Subject: [PATCH 54/75] Fix collections misspelling Co-authored-by: Adam Novak --- bdsg/include/bdsg/internal/indexing_iterator.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bdsg/include/bdsg/internal/indexing_iterator.hpp b/bdsg/include/bdsg/internal/indexing_iterator.hpp index 71f7c3f7..63e0ca44 100644 --- a/bdsg/include/bdsg/internal/indexing_iterator.hpp +++ b/bdsg/include/bdsg/internal/indexing_iterator.hpp @@ -82,7 +82,7 @@ class IndexingIterator { bool operator<=(const IndexingIterator& other) const; /// Determine if this iterator is strictly after another. - /// Result is undefined if iterators are to different collecitons. + /// Result is undefined if iterators are to different collections. bool operator>(const IndexingIterator& other) const; /// Determine if this iterator is at or after another. From 9882957847568c27f97e2719796057e385b5b699 Mon Sep 17 00:00:00 2001 From: electricEpilith <194475824+electricEpilith@users.noreply.github.com> Date: Fri, 8 May 2026 15:08:57 -0700 Subject: [PATCH 55/75] Fix typos Co-authored-by: Adam Novak --- bdsg/include/bdsg/internal/indexing_iterator.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bdsg/include/bdsg/internal/indexing_iterator.hpp b/bdsg/include/bdsg/internal/indexing_iterator.hpp index 63e0ca44..50d145fe 100644 --- a/bdsg/include/bdsg/internal/indexing_iterator.hpp +++ b/bdsg/include/bdsg/internal/indexing_iterator.hpp @@ -68,7 +68,7 @@ class IndexingIterator { /// Indexing into iterator. Even though we type this as reference, remember /// that we don't actually implement writing to our "references" and just /// use the value type. - /// Result is undefined if itrators are to different collecitons. + /// Result is undefined if iterators are to different collections. reference operator[](difference_type offset) const; // Comaprable iterator methods (TODO: Is there an STL concept name for this?) From 90289e8254f2c3e920e624427ef3b93b0e128de0 Mon Sep 17 00:00:00 2001 From: electricEpilith <194475824+electricEpilith@users.noreply.github.com> Date: Fri, 8 May 2026 15:09:17 -0700 Subject: [PATCH 56/75] Fix collections typo Co-authored-by: Adam Novak --- bdsg/include/bdsg/internal/indexing_iterator.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bdsg/include/bdsg/internal/indexing_iterator.hpp b/bdsg/include/bdsg/internal/indexing_iterator.hpp index 50d145fe..02d97eea 100644 --- a/bdsg/include/bdsg/internal/indexing_iterator.hpp +++ b/bdsg/include/bdsg/internal/indexing_iterator.hpp @@ -74,7 +74,7 @@ class IndexingIterator { // Comaprable iterator methods (TODO: Is there an STL concept name for this?) /// Determine if this iterator is strictly before another. - /// Result is undefined if iterators are to different collecitons. + /// Result is undefined if iterators are to different collections. bool operator<(const IndexingIterator& other) const; /// Determine if this iterator is before or at another. From 772d34cbee935827c2a0ee3c294a982065ccd0b3 Mon Sep 17 00:00:00 2001 From: electricEpilith <194475824+electricEpilith@users.noreply.github.com> Date: Fri, 8 May 2026 15:09:37 -0700 Subject: [PATCH 57/75] Update bdsg/include/bdsg/internal/indexing_iterator.hpp Co-authored-by: Adam Novak --- bdsg/include/bdsg/internal/indexing_iterator.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bdsg/include/bdsg/internal/indexing_iterator.hpp b/bdsg/include/bdsg/internal/indexing_iterator.hpp index 02d97eea..7c56c72e 100644 --- a/bdsg/include/bdsg/internal/indexing_iterator.hpp +++ b/bdsg/include/bdsg/internal/indexing_iterator.hpp @@ -78,7 +78,7 @@ class IndexingIterator { bool operator<(const IndexingIterator& other) const; /// Determine if this iterator is before or at another. - /// Result is undefined if iterators are to different collecitons. + /// Result is undefined if iterators are to different collections. bool operator<=(const IndexingIterator& other) const; /// Determine if this iterator is strictly after another. From 2d120642db7059c0905ea980a7c03088848f5acb Mon Sep 17 00:00:00 2001 From: Zia <194475824+electricEpilith@users.noreply.github.com> Date: Tue, 12 May 2026 22:53:34 -0700 Subject: [PATCH 58/75] CI needs to have Boost to support contraction hierarchy code --- .github/workflows/testmac.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/testmac.yml b/.github/workflows/testmac.yml index dd4d5541..990ff287 100644 --- a/.github/workflows/testmac.yml +++ b/.github/workflows/testmac.yml @@ -26,7 +26,7 @@ jobs: - name: Run build and test run: | set -e - brew install libomp doxygen jansson + brew install libomp doxygen jansson boost mkdir -p build cd build cmake .. -DRUN_DOXYGEN=ON -DPYTHON_EXECUTABLE="$(which python3)" From 83cea1f8a178c4e10dcebe87b398a88da5c86d27 Mon Sep 17 00:00:00 2001 From: Zia <194475824+electricEpilith@users.noreply.github.com> Date: Wed, 13 May 2026 02:36:55 -0700 Subject: [PATCH 59/75] consolidate contraction-hierarchy/hub-labeling stuff planning help from Claude Opus 4.7 --- bdsg/include/bdsg/ch.hpp | 244 ++++++++++------ bdsg/include/bdsg/hublabel.hpp | 65 ----- bdsg/include/bdsg/landmark.hpp | 270 ------------------ bdsg/src/hublabel.cpp | 394 -------------------------- bdsg/src/landmark.cpp | 501 --------------------------------- 5 files changed, 164 insertions(+), 1310 deletions(-) delete mode 100644 bdsg/include/bdsg/hublabel.hpp delete mode 100644 bdsg/include/bdsg/landmark.hpp delete mode 100644 bdsg/src/hublabel.cpp delete mode 100644 bdsg/src/landmark.cpp diff --git a/bdsg/include/bdsg/ch.hpp b/bdsg/include/bdsg/ch.hpp index 2b31d9e4..4aedbe29 100644 --- a/bdsg/include/bdsg/ch.hpp +++ b/bdsg/include/bdsg/ch.hpp @@ -1,27 +1,72 @@ /* -file for quickly playing around with stuff +file for the contraction hierarchy method */ -#include "landmark.hpp" -#include "hublabel.hpp" +#include +#include #include -#include -#include #include -#include +#include +#include +#include +#include +#include +#include #include -//#define debug_binary_intersection -//#define debug_hhl_query +// #define debug_binary_intersection +// #define debug_hhl_query namespace bdsg { +// inf implementation is largest possible int +#define INF_INT numeric_limits::max() +#define DIST_NBITS 32 +#define DIST_UINT uint32_t +typedef uint32_t NODE_UINT; +typedef int NodeId; +typedef int NodesideId; + +typedef struct HubRecord { + NodeId hub{}; + DIST_UINT dist{}; + + HubRecord() : hub{0}, dist{INF_INT} {} + HubRecord(NodeId hid, DIST_UINT min_dist) : hub{hid}, dist{min_dist} {} + + auto operator<=>(const HubRecord &r2) const { return hub <=> r2.hub; } + + auto operator<=>(const NodeId &n) const { return hub <=> n; } +} HubRecord; + +/// Allow promoting a DIST_UINT to a different type, translating infinities to +/// the type's max limit. +template OtherInt promote_distance(DIST_UINT val) { + if (val == INF_INT) { + return std::numeric_limits::max(); + } + return (OtherInt)val; +} + +/// Allow demoting a DIST_UINT from a different type, translating infinities +/// from the type's max limit and erroring on unrepresentably large values. +template DIST_UINT demote_distance(OtherInt val) { + if (val == std::numeric_limits::max()) { + return INF_INT; + } + if (val > (OtherInt)INF_INT) { + throw std::overflow_error( + "Cannot store excessively wide value " + std::to_string(val) + " in " + + std::to_string(DIST_NBITS) + " bits for hub labeling"); + } + return (DIST_UINT)val; +} /** * For a handle graph indexed with HHL, get the HHL rank ("Boost graph ID") for * an orientation of a node, as a source or destination. */ -NODE_UINT bgid(const handle_t& h, const bdsg::HashGraph& hg); +NODE_UINT bgid(const handle_t &h, const bdsg::HashGraph &hg); /** * For a net graph indexed with HHL, get the HHL rank for an orientation of a @@ -46,8 +91,8 @@ NODE_UINT bgid(const handle_t& h, const bdsg::HashGraph& hg); NODE_UINT bgid(size_t net_rank, bool is_reverse, bool is_source); /** - * For a handle or net graph indexed with HHL, take the HHL rank of an orientation of - * a node and get that of the opposite orientation of a node. + * For a handle or net graph indexed with HHL, take the HHL rank of an + * orientation of a node and get that of the opposite orientation of a node. * * For handle graphs, ranks are the same for source and destination. * @@ -56,32 +101,35 @@ NODE_UINT bgid(size_t net_rank, bool is_reverse, bool is_source); */ NODE_UINT rev_bgid(NODE_UINT n); - typedef struct NodeProp { // This is initialized by make_boost_graph() DIST_UINT seqlen; DIST_UINT max_out = 0; NODE_UINT contracted_neighbors = 0; NODE_UINT level = 0; - NODE_UINT arc_cover = 1; + NODE_UINT arc_cover = 1; bool contracted = false; // This is left uninitialized until make_contraction_hierarchy() is run. NODE_UINT new_id; } NodeProp; -typedef struct EdgeProp { +typedef struct EdgeProp { bool contracted = false; DIST_UINT weight = 0; NODE_UINT arc_cover = 1; bool ori = true; -} EdgeProp; +} EdgeProp; -typedef boost::adjacency_list CHOverlay; -typedef boost::filtered_graph> ContractedGraph; +typedef boost::adjacency_list + CHOverlay; +typedef boost::filtered_graph> + ContractedGraph; /// Allow outputting CHOverlay objects. Output text does not end with a /// newline. -std::ostream& operator<<(std::ostream& out, const CHOverlay& ov); +std::ostream &operator<<(std::ostream &out, const CHOverlay &ov); /** * Build the intermediate hub labeling computation data structure ("Boost @@ -92,50 +140,62 @@ std::ostream& operator<<(std::ostream& out, const CHOverlay& ov); * For later queries, orientations of nodes are assigned ranks as provided by * the bgid() function. */ -CHOverlay make_boost_graph(const bdsg::HashGraph& hg); +CHOverlay make_boost_graph(const bdsg::HashGraph &hg); /** * Build the intermediate hub labeling computation data structure ("Boost * graph") for the net graph of a snarl in a TemporaryDistanceIndex. * - * all_children must contain the child chains and nodes of the snarl, as well as the bounding nodes of the snarl, in any order. + * all_children must contain the child chains and nodes of the snarl, as well as + * the bounding nodes of the snarl, in any order. * - * For later queries, orientations of children or the snarl boundary nodes are assigned query ranks based on their snarl distance index rank. + * For later queries, orientations of children or the snarl boundary nodes are + * assigned query ranks based on their snarl distance index rank. * - * The snarl distance index ranks are 0 and 1 for the start and end nodes of the snarl, and the rank_in_parent field of the temporary index for each child. + * The snarl distance index ranks are 0 and 1 for the start and end nodes of the + * snarl, and the rank_in_parent field of the temporary index for each child. */ -CHOverlay make_boost_graph(const bdsg::SnarlDistanceIndex::TemporaryDistanceIndex& temp_index, const SnarlDistanceIndex::temp_record_ref_t& snarl_index, const SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record, const vector>& all_children, const HandleGraph* graph); +CHOverlay make_boost_graph( + const bdsg::SnarlDistanceIndex::TemporaryDistanceIndex &temp_index, + const SnarlDistanceIndex::temp_record_ref_t &snarl_index, + const SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord + &temp_snarl_record, + const vector> &all_children, + const HandleGraph *graph); -int edge_diff(ContractedGraph::vertex_descriptor nid, ContractedGraph& ch, CHOverlay& ov, vector& node_dists, int hop_limit); +int edge_diff(ContractedGraph::vertex_descriptor nid, ContractedGraph &ch, + CHOverlay &ov, vector &node_dists, int hop_limit); -void contract(CHOverlay::vertex_descriptor nid, ContractedGraph& ch, CHOverlay& ov, vector& node_dists, vector& shouldnt_contract, int hop_limit); +void contract(CHOverlay::vertex_descriptor nid, ContractedGraph &ch, + CHOverlay &ov, vector &node_dists, + vector &shouldnt_contract, int hop_limit); /** * Find the contraction hierarchy order for the graph. * * Initializes the new_id field of each NodeProb in the graph. */ -void make_contraction_hierarchy(CHOverlay& ov); - +void make_contraction_hierarchy(CHOverlay &ov); + template ItrType get_dist_itr(ItrType start_itr, ItrType hub_itr) { auto node_count = *start_itr; - auto last_fwd_end_bound_itr = next(start_itr, 1+node_count); + auto last_fwd_end_bound_itr = next(start_itr, 1 + node_count); if (hub_itr >= next(start_itr, *last_fwd_end_bound_itr)) { - //backwards label - auto first_back_bound_itr = next(start_itr, 1+node_count+1); - auto last_back_bound_itr = next(start_itr, 1+node_count+1+node_count); + // backwards label + auto first_back_bound_itr = next(start_itr, 1 + node_count + 1); + auto last_back_bound_itr = next(start_itr, 1 + node_count + 1 + node_count); auto jump_to_dist = (*last_back_bound_itr) - *first_back_bound_itr; return next(hub_itr, jump_to_dist); } else { - //forwards label - auto first_fwd_bound_itr = next(start_itr, 1); - auto last_fwd_bound_itr = next(start_itr, 1+node_count); + // forwards label + auto first_fwd_bound_itr = next(start_itr, 1); + auto last_fwd_bound_itr = next(start_itr, 1 + node_count); auto jump_to_dist = (*last_fwd_bound_itr) - *first_fwd_bound_itr; - return next(hub_itr, jump_to_dist); + return next(hub_itr, jump_to_dist); } -} +} -DIST_UINT binary_intersection_ch(vector& v1, vector& v2); +DIST_UINT binary_intersection_ch(vector &v1, vector &v2); /* * Do binary intersection to find shared labels for two vertices. * @@ -147,34 +207,37 @@ DIST_UINT binary_intersection_ch(vector& v1, vector& v2); * bounds will be in the slots after. */ template -DIST_UINT binary_intersection_ch(ItrType start_itr, size_t v1_start_bound_index, size_t v2_start_bound_index) { +DIST_UINT binary_intersection_ch(ItrType start_itr, size_t v1_start_bound_index, + size_t v2_start_bound_index) { auto v1_start_bound_itr = next(start_itr, v1_start_bound_index); auto v1_end_bound_itr = next(v1_start_bound_itr, 1); auto v2_start_bound_itr = next(start_itr, v2_start_bound_index); auto v2_end_bound_itr = next(v2_start_bound_itr, 1); - auto v1_start_itr = next(start_itr, *v1_start_bound_itr); + auto v1_start_itr = next(start_itr, *v1_start_bound_itr); auto v1_end_itr = next(start_itr, *v1_end_bound_itr); #ifdef debug_binary_intersection - std::cerr << "Found " << v1_end_itr - v1_start_itr << " labels for vertex 1" << std::endl; + std::cerr << "Found " << v1_end_itr - v1_start_itr << " labels for vertex 1" + << std::endl; #endif - auto v2_start_itr = next(start_itr, *v2_start_bound_itr); + auto v2_start_itr = next(start_itr, *v2_start_bound_itr); auto v2_end_itr = next(start_itr, *v2_end_bound_itr); #ifdef debug_binary_intersection - std::cerr << "Found " << v2_end_itr - v2_start_itr << " labels for vertex 2" << std::endl; + std::cerr << "Found " << v2_end_itr - v2_start_itr << " labels for vertex 2" + << std::endl; #endif auto v1_range = ranges::subrange(v1_start_itr, v1_end_itr); - auto v2_range = ranges::subrange(v2_start_itr, v2_end_itr); + auto v2_range = ranges::subrange(v2_start_itr, v2_end_itr); + + auto &key_vec = v1_range.size() < v2_range.size() ? v1_range : v2_range; + auto &search_vec = v1_range.size() < v2_range.size() ? v2_range : v1_range; - auto& key_vec = v1_range.size() < v2_range.size() ? v1_range : v2_range; - auto& search_vec = v1_range.size() < v2_range.size() ? v2_range : v1_range; - auto search_start_itr = search_vec.begin(); - auto search_end_itr = search_vec.end(); + auto search_end_itr = search_vec.end(); DIST_UINT min_dist = INF_INT; for (auto it = key_vec.begin(); it < key_vec.end(); it++) { #ifdef debug_binary_intersection @@ -183,31 +246,36 @@ DIST_UINT binary_intersection_ch(ItrType start_itr, size_t v1_start_bound_index, auto k = *it; auto k_dist_itr = get_dist_itr(start_itr, it); #ifdef debug_binary_intersection - cerr << "Distance for k " << k << " is " << *k_dist_itr << ", at: " << distance(start_itr,k_dist_itr) << endl; - cerr << "searching for " << k << " between " << distance(start_itr,search_start_itr) << " & " << distance(start_itr,search_end_itr) << endl; + cerr << "Distance for k " << k << " is " << *k_dist_itr + << ", at: " << distance(start_itr, k_dist_itr) << endl; + cerr << "searching for " << k << " between " + << distance(start_itr, search_start_itr) << " & " + << distance(start_itr, search_end_itr) << endl; #endif - search_start_itr = lower_bound(search_start_itr, search_end_itr, k); + search_start_itr = lower_bound(search_start_itr, search_end_itr, k); if (search_start_itr == search_end_itr) { #ifdef debug_binary_intersection std::cerr << "No more search results possible" << std::endl; #endif return min_dist; - } + } if (*search_start_itr == k) { #ifdef debug_binary_intersection - cerr << "match found, key: " << *search_start_itr << ", at " << distance(start_itr,search_start_itr) << endl; + cerr << "match found, key: " << *search_start_itr << ", at " + << distance(start_itr, search_start_itr) << endl; #endif auto dist_itr = get_dist_itr(start_itr, search_start_itr); DIST_UINT d = *(dist_itr) + *(k_dist_itr); #ifdef debug_binary_intersection - cerr << "dist for key is: " << *dist_itr << ", at " << distance(start_itr,dist_itr) << endl; + cerr << "dist for key is: " << *dist_itr << ", at " + << distance(start_itr, dist_itr) << endl; cerr << "total dist is: " << d << endl; #endif min_dist = min(min_dist, d); } - } - return min_dist; -} + } + return min_dist; +} /** * Query stored hub label data for a minimum distance. @@ -230,58 +298,74 @@ DIST_UINT hhl_query(ItrType start_itr, size_t rank1, size_t rank2) { size_t label_count = *start_itr; #ifdef debug_hhl_query - std::cerr << "Making hub label query on " << label_count << " labels" << std::endl; + std::cerr << "Making hub label query on " << label_count << " labels" + << std::endl; #endif // Bounds start after the label count, and at the rank of the first // vertex past there we find the start bound for the first vertex. - auto start_index_1 = 1+rank1; + auto start_index_1 = 1 + rank1; #ifdef debug_hhl_query - std::cerr << "Start bound for forward label for rank " << rank1 << " is at index " << start_index_1 << " past there" << std::endl; + std::cerr << "Start bound for forward label for rank " << rank1 + << " is at index " << start_index_1 << " past there" << std::endl; #endif // And there's a final end value for the first set of labels before we go on // to the bounds where we would find the start bound for the second vertex. - auto start_index_2 = 1+label_count+1+rank2; + auto start_index_2 = 1 + label_count + 1 + rank2; #ifdef debug_hhl_query - std::cerr << "Start bound for reverse label for rank " << rank2 << " is at index " << start_index_2 << " past there" << std::endl; + std::cerr << "Start bound for reverse label for rank " << rank2 + << " is at index " << start_index_2 << " past there" << std::endl; #endif - - DIST_UINT dist = binary_intersection_ch(start_itr, start_index_1, start_index_2); + DIST_UINT dist = + binary_intersection_ch(start_itr, start_index_1, start_index_2); - return dist; -} + return dist; +} -void down_dijk(int node, CHOverlay& ov, vector& node_dists, vector>& labels, vector>& labels_rev); +void down_dijk(int node, CHOverlay &ov, vector &node_dists, + vector> &labels, + vector> &labels_rev); -void down_dijk_rev(int node, CHOverlay& ov, vector& node_dists, vector>& labels, vector>& labels_rev); +void down_dijk_rev(int node, CHOverlay &ov, vector &node_dists, + vector> &labels, + vector> &labels_rev); -void test_dijk(int node, CHOverlay& ov, vector& node_dists, vector>& labels, vector>& labels_rev); +void test_dijk(int node, CHOverlay &ov, vector &node_dists, + vector> &labels, + vector> &labels_rev); -void test_dijk_rev(int node, CHOverlay& ov, vector& node_dists, vector>& labels, vector>& labels_rev); +void test_dijk_rev(int node, CHOverlay &ov, vector &node_dists, + vector> &labels, + vector> &labels_rev); -void create_labels(vector>& labels, vector>& labels_rev, CHOverlay& ov); +void create_labels(vector> &labels, + vector> &labels_rev, CHOverlay &ov); /** * Puts hub labels in a flat vector form - * + * * Structure: * - offsets are relative to start of flat vector - * - extra offset in each of fwd and back offset sets at the end so that end of ranges can be found - * -- subtracting the extra offset by the first offset of its set gets the distance to the corresponding dist of a hub + * - extra offset in each of fwd and back offset sets at the end so that end of + * ranges can be found + * -- subtracting the extra offset by the first offset of its set gets the + * distance to the corresponding dist of a hub * * The layout is: - * label count | start offsets (fwd) | start offsets (back) | fwd label hubs | fwd label dists | back label hubs | back label dists -*/ -vector pack_labels(const vector>& labels, const vector>& labels_back); + * label count | start offsets (fwd) | start offsets (back) | fwd label hubs | + * fwd label dists | back label hubs | back label dists + */ +vector pack_labels(const vector> &labels, + const vector> &labels_back); -//not necessary stuff -void write_to_csv(CHOverlay& ov, string out_path); +// not necessary stuff +void write_to_csv(CHOverlay &ov, string out_path); -void write_to_gr(CHOverlay& ov, string out_path); +void write_to_gr(CHOverlay &ov, string out_path); vector read_node_order(string in_path); -} +} // namespace bdsg diff --git a/bdsg/include/bdsg/hublabel.hpp b/bdsg/include/bdsg/hublabel.hpp deleted file mode 100644 index 885f370e..00000000 --- a/bdsg/include/bdsg/hublabel.hpp +++ /dev/null @@ -1,65 +0,0 @@ -/* -header file for hub labeling stuff -*/ -#include "landmark.hpp" -#include - -namespace bdsg { - -typedef unordered_map NsDistMap; -typedef struct HubRecord { - NodeId hub {}; - DIST_UINT dist {}; - - HubRecord() : hub{0}, dist{INF_INT} {} - HubRecord(NodeId hid, DIST_UINT min_dist) : hub{hid}, dist{min_dist} {} - - auto operator<=>(const HubRecord& r2) const { - return hub <=> r2.hub; - } - - auto operator<=>(const NodeId& n) const { - return hub <=> n; - } - //based off https://uscilab.github.io/cereal/serialization_functions.html - template - void serialize(Archive& a) { - a(hub, dist); - } -} HubRecord; - -typedef pair,vector> HubRecsPair; -typedef uint32_t NODE_UINT; - - -//first vec stores hub nodes whose left ns the label's ns reaches -//second vec stores hub nodes whose right ns the label's ns reaches -typedef pair,vector> HubsPair; -typedef pair Label; - -typedef tuple QueueObj2; -bool pqcomp2(const QueueObj2& o1, const QueueObj2& o2); -typedef std::priority_queue, function> PriorityQueue2; - -typedef tuple HwQueueObj; -bool pqcomp_hw(const HwQueueObj& o1, const HwQueueObj& o2); -typedef std::priority_queue, function> HwPriorityQueue; - -bool label_contains(HubRecsPair& label, NodesideId query); - -DIST_UINT get_label_dist(HubRecsPair& label, NodesideId query); - -DIST_UINT hl_build_intersect(HubRecsPair& l1, HubRecsPair& l2, vector& node_lens); - -DIST_UINT get_degree(NodeId node, bdsg::HashGraph& g); - -void pruned_dijkstra(bdsg::HashGraph& g, NodesideId start, vector& labels, vector& path_lengths, vector& rank_to_ns, vector& ns_to_rank, vector& rank_node_lens,int stop_dist = INF_INT); - -DIST_UINT hl_query(NodesideId i, NodesideId j, vector& labels, bdsg::HashGraph& g); -DIST_UINT hl_query(NodesideId i, NodesideId j, vector& labels, vector& rank_node_lens); - -vector make_labels(bdsg::HashGraph& g); - - - -} diff --git a/bdsg/include/bdsg/landmark.hpp b/bdsg/include/bdsg/landmark.hpp deleted file mode 100644 index 417ecb65..00000000 --- a/bdsg/include/bdsg/landmark.hpp +++ /dev/null @@ -1,270 +0,0 @@ -#ifndef LANDMARK_HPP -#define LANDMARK_HPP - -#include -#include -#include -#include -#include -#include -//#include -//#include - -using namespace std; -namespace bdsg { -//inf implementation is largest possible int -#define INF_INT numeric_limits::max() -#define DIST_NBITS 32 -#define DIST_UINT uint32_t -#define ARR2D_OFFSET 1 - -/// Allow promoting a DIST_UINT to a different type, translating infinities to the type's max limit. -template -OtherInt promote_distance(DIST_UINT val) { - if (val == INF_INT) { - return std::numeric_limits::max(); - } - return (OtherInt) val; -} - -/// Allow demoting a DIST_UINT from a different type, translating infinities -/// from the type's max limit and erroring on unrepresentably large values. -template -DIST_UINT demote_distance(OtherInt val) { - if (val == std::numeric_limits::max()) { - return INF_INT; - } - if (val > (OtherInt) INF_INT) { - throw std::overflow_error("Cannot store excessively wide value " + std::to_string(val) + " in " + std::to_string(DIST_NBITS) + " bits for hub labeling"); - } - return (DIST_UINT) val; -} - -/// Sum two distances, propagating infinities. -/// Does not check for overlfow. -/// TODO: We're not really sure if our distances are ints or uints and we freely mix them when we shouldn't. -int addInt(int a, int b); - -typedef int NodeId; -typedef int NodesideId; -typedef enum EnterDir {OTHER_NODESIDE=0,OTHER_NODE=1} EnterDir; -typedef boost::multi_array Array2D; -typedef unordered_map Ball; -class SdslArray2D { -private: - vector> arr2d; - uint8_t offset = ARR2D_OFFSET; - static DIST_UINT get_inf() { - bitset uint_bits; - uint_bits.reset().flip(DIST_NBITS - 1); - return static_cast(uint_bits.to_ulong()); - } - DIST_UINT Inf_UInt = 0; - DIST_UINT row_count = 0; - DIST_UINT col_count = 0; - static int toOut(DIST_UINT entry) { - if (entry == 0) { - return INF_INT; - } else { - return static_cast(entry-ARR2D_OFFSET); - } - } -public: - using size_type = DIST_UINT; - SdslArray2D(int nrow, int ncol) { - row_count = nrow; col_count = ncol; - arr2d.resize(row_count); - for (DIST_UINT i = 0; i < row_count; i++) { - sdsl::int_vector sdsl_row(col_count, Inf_UInt, DIST_NBITS); - arr2d[i] = std::move(sdsl_row); - } - } - - SdslArray2D(Array2D& a) { - auto ashape = a.shape(); - row_count = ashape[0]; col_count = ashape[1]; - arr2d.resize(row_count); - for (DIST_UINT i = 0; i < row_count; i++) { - sdsl::int_vector sdsl_row(col_count, Inf_UInt, DIST_NBITS); - for (DIST_UINT j = 0; j < col_count; j++) { - int entry = a[i][j]; - if (entry == INF_INT) { - sdsl_row[j] = Inf_UInt; - } else { - sdsl_row[j] = static_cast(entry)+offset; - } - } - arr2d[i] = std::move(sdsl_row); - } - } - - sdsl::int_vector& operator[](int i) { - return arr2d[i]; - } - - DIST_UINT get(int i, int j) { - DIST_UINT entry = arr2d[i][j]; - if (entry == 0) { - return INF_INT; - } else { - return entry-offset; - } - } - - void set(int i, int j, DIST_UINT val) { - if (val == INF_INT) { - arr2d[i][j] = 0; - } else { - arr2d[i][j] = val+offset; - } - } - - size_type serialize(ostream& out, sdsl::structure_tree_node* prev_node=nullptr, string structure_name="SdslArr2d") const { - sdsl::structure_tree_node* arr_node = sdsl::structure_tree::add_child(prev_node, structure_name, sdsl::util::class_name(*this)); - size_type bytes_count = 0; - DIST_UINT nrows = arr2d.size(); - bytes_count += sdsl::write_member(nrows, out, arr_node, "nrows"); - for (auto& row: arr2d) { - bytes_count += row.serialize(out, arr_node, "arr2d_row"); - } - return bytes_count; - } - - void load(istream& in) { - DIST_UINT nrows; - sdsl::read_member(nrows, in); - arr2d.resize(nrows); - for (DIST_UINT i = 0; i < nrows; i++) { - arr2d[i].load(in); - } - row_count = nrows; col_count = arr2d[0].size(); - } - - void bit_compress() { - for (auto& row: arr2d) { - sdsl::util::bit_compress(row); - } - } - - array shape() { - array shape_arr = {row_count, col_count}; - return shape_arr; - } - - auto col_view(DIST_UINT col) { - auto get_col_entry = [col] (sdsl::int_vector& row) { - return row[col]; - }; - - auto col_itr = ranges::views::transform(ranges::views::transform(arr2d, get_col_entry), toOut); - return col_itr; - } -}; - -/* -following functions assume input graph node ids go from 1...N -*/ - -/* -Converts a node_id to a, or both nodeside_ids -left and return_both should not both be True -*/ -//using bool for indicating side just like for direction in follow_edges -NodesideId node_to_nodeside(NodeId node_id, bool left); - -array get_node_nodesides(NodeId node_id); - -NodeId nodeside_to_node(NodesideId nodeside_id); - -tuple nodeside_to_node_tuple(NodesideId nodeside_id); - -bool nodeside_left (NodesideId nodeside_id); - -NodesideId other_nodeside(int nodeside_id); - -NodesideId handle_entry_nodeside(const handle_t& handle, HashGraph& g); - -string nodeside_string(NodesideId nodeside); - - -/// landmark selection - -/* -returns ids of nodesides with largest degree -*/ -//helper -int nodeside_degree(NodesideId ns_id, HashGraph& g); - -vector get_highest_degree_nodesides(bdsg::HashGraph& g, int top_num, int nodeside_count); - -vector get_landmark_nodes(bdsg::HashGraph& g, int top_ns_num, int nodeside_count); - -/* -Notes: -- modifies the table passed in -*/ -void fill_other_nodeside_dists(SdslArray2D& dist_table, HashGraph& g); - -/* -trying insert-Dijkstra over decrease-key dijkstra -see https://stackoverflow.com/questions/9255620/why-does-dijkstras-algorithm-use-decrease-key - -start is the id of the origin node -dijkstra goes from start node to other nodes' nodesides -*/ -typedef tuple QueueObj; -bool pqcomp1(const QueueObj& o1, const QueueObj& o2); - -typedef std::priority_queue, function> PriorityQueue; - -PriorityQueue initQueue( - int nodeside_count, - NodesideId start_ns -); - - -/* -Called when new nodeside is discovered. -If path to the nodeside is the shortest found so far: - update path_lengths and add new queue entry for it -*/ -bool discover_nodeside(NodesideId new_nodeside, EnterDir enter_direction, int new_len, vector>& path_lengths, PriorityQueue& nodeside_queue); - - -SdslArray2D dijkstra(bdsg::HashGraph& g, NodesideId start, NodesideId stop_ns = -1, int stop_dist = INF_INT, bool ball_ver = false); - -/* -notes: - - lm_nodes should not be empty -*/ -SdslArray2D get_lm2ns_dist_table(vector& lm_nodes, HashGraph& g); - -void get_closest_lm_ind(SdslArray2D& dist_table, vector& ind_vec, vector& min_dist_vec, int nodeside_count); - -/* -based off https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/msr-tr-2009-84.pdf [Chen et al. (2009)] -*/ -Ball get_ball_contents(NodesideId ns, int dist_limit, HashGraph& g); - - -vector find_balls(vector& closest_lm_dist_vec, HashGraph& g, int min_ball_size = 0); - -typedef struct OracleInfo { - vector& lm_nodes; - vector& closest_lm_inds; - vector& closest_lm_dists; - vector& balls; - SdslArray2D& lm_sides_to_ns; -} OracleInfo; - - -/* -query algorithm from Chen et al. (2009) -with modifications for our purpose -*/ - -int oracle_query(NodesideId source, NodesideId target, OracleInfo& oracle, HashGraph& g); - -} - -#endif diff --git a/bdsg/src/hublabel.cpp b/bdsg/src/hublabel.cpp deleted file mode 100644 index 60f19603..00000000 --- a/bdsg/src/hublabel.cpp +++ /dev/null @@ -1,394 +0,0 @@ -/* -hub labeling implementations - -based on the pruned landmark labeling approach by Akiba et al. (2013) -(https://dl.acm.org/doi/abs/10.1145/2463676.2465315) - -Code heavily references https://github.com/yb47438/An-Experimental-Study-on-Hub-Labeling-based-Shortest-Path-Algorithms/blame/7f43a91bcc47a3e74a52cf8fd034bd33a367920b/src/construction.h -*/ -#include "bdsg/hublabel.hpp" - -namespace bdsg { -bool notInf(int d) { - return d != INF_INT; -} - -void sorted_vec_insert(vector& v, NodeId item) { - auto ins_itr = upper_bound(v.begin(), v.end(), item); - v.insert(ins_itr, item); -} -void sorted_vec_insert(vector& v, HubRecord& item) { - auto ins_itr = lower_bound(v.begin(), v.end(), item); - if (ins_itr == v.end() || ins_itr->hub != item.hub) { - v.insert(ins_itr, item); - } -} - -bool pqcomp2(const QueueObj2& o1, const QueueObj2& o2) { - return get<0>(o1) > get<0>(o2); -}; - -bool pqcomp_hw(const QueueObj2& o1, const QueueObj2& o2) { - return get<0>(o1) > get<0>(o2); -}; - -void hub_insert(Label& label, NodesideId ns) { - NodeId hub = nodeside_to_node(ns); - if (nodeside_left(ns)) { - sorted_vec_insert(label.first.first, hub); - } else { - sorted_vec_insert(label.first.second, hub); - } -} -void hub_insert(HubRecsPair& label, NodesideId ns, DIST_UINT dist) { - NodeId hub = nodeside_to_node(ns); - HubRecord rec(hub, dist); - if (nodeside_left(ns)) { - sorted_vec_insert(label.first, rec); - } else { - sorted_vec_insert(label.second, rec); - } -} - -void hub_emplace(HubRecsPair& label, NodesideId ns, DIST_UINT dist) { - NodeId hub = nodeside_to_node(ns); - //HubRecord rec(hub, dist); - if (nodeside_left(ns)) { - label.first.emplace_back(hub, dist); - } else { - label.second.emplace_back(hub, dist); - } -} - -/* -assumes both input vectors are sorted least to greatest -*/ -void merge_intersection(vector& v1, vector& v2, vector& out) { - auto p1 = v1.begin(); auto p2 = v2.begin(); - while (p1 != v1.end() && p2 != v2.end()) { - if (*p1 < *p2) { - p1=next(p1); - } else { - if (*p1 > *p2) { - p2=next(p2); - } else { - out.push_back(*p1); - p1=next(p1); p2=next(p2); - } - } - } -} - -DIST_UINT merge_intersection_dist(vector& v1, vector& v2, bdsg::HashGraph& g) { - DIST_UINT min_dist = INF_INT; - auto p1 = v1.begin(); auto p2 = v2.begin(); - while (p1 != v1.end() && p2 != v2.end()) { - if (p1->hub < p2->hub) { - p1=next(p1); - } else { - if (p1->hub > p2->hub) { - p2=next(p2); - } else { - DIST_UINT d = p1->dist + g.get_length(g.get_handle(p1->hub,false)) + p2->dist; - min_dist = min(min_dist, d); - p1=next(p1); p2=next(p2); - } - } - } - - return min_dist; -} - -/* -assumes both input vectors are sorted least to greatest -*/ -void binary_intersection(vector& v1, vector& v2, vector& out) { - vector& key_vec = v1.size() < v2.size() ? v1 : v2; - vector& search_vec = v1.size() < v2.size() ? v2 : v1; - - auto start_itr = search_vec.begin(); - auto end_itr = search_vec.end(); - for (auto k: key_vec) { - start_itr = lower_bound(start_itr, end_itr, k); - if (*start_itr == k) { - out.push_back(k); - } else { - if (start_itr == end_itr) { - break; - } - } - } -} -DIST_UINT binary_intersection_dist(vector& v1, vector& v2, bdsg::HashGraph& g) { - vector& key_vec = v1.size() < v2.size() ? v1 : v2; - vector& search_vec = v1.size() < v2.size() ? v2 : v1; - - auto start_itr = search_vec.begin(); - auto end_itr = search_vec.end(); - DIST_UINT min_dist = INF_INT; - for (auto k: key_vec) { - start_itr = lower_bound(start_itr, end_itr, k); - if (start_itr == end_itr) { - return min_dist; - } - if (start_itr->hub == k.hub) { - DIST_UINT d = start_itr->dist + g.get_length(g.get_handle(k.hub,false)) + k.dist; - min_dist = min(min_dist, d); - } - } - return min_dist; -} -DIST_UINT binary_intersection_dist(vector& v1, vector& v2, vector& rank_node_lens) { - vector& key_vec = v1.size() < v2.size() ? v1 : v2; - vector& search_vec = v1.size() < v2.size() ? v2 : v1; - - auto start_itr = search_vec.begin(); - auto end_itr = search_vec.end(); - DIST_UINT min_dist = INF_INT; - for (auto k: key_vec) { - start_itr = lower_bound(start_itr, end_itr, k); - if (start_itr == end_itr) { - return min_dist; - } - if (start_itr->hub == k.hub) { - DIST_UINT d = start_itr->dist + rank_node_lens[k.hub] + k.dist; - min_dist = min(min_dist, d); - } - } - return min_dist; -} -DIST_UINT binary_intersection_dist(vector& v1, vector& v2) { - vector& key_vec = v1.size() < v2.size() ? v1 : v2; - vector& search_vec = v1.size() < v2.size() ? v2 : v1; - - auto start_itr = search_vec.begin(); - auto end_itr = search_vec.end(); - DIST_UINT min_dist = INF_INT; - for (auto k: key_vec) { - start_itr = lower_bound(start_itr, end_itr, k); - if (start_itr == end_itr) { - return min_dist; - } - if (start_itr->hub == k.hub) { - DIST_UINT d = start_itr->dist + k.dist; - min_dist = min(min_dist, d); - } - } - return min_dist; -} - -vector hub_vec_intersect(vector& v1, vector& v2) { - vector shared_nodes; - binary_intersection(v1, v2, shared_nodes); - return shared_nodes; -} - -DIST_UINT hl_build_intersect(HubRecsPair& l1, HubRecsPair& l2, bdsg::HashGraph& g) { - DIST_UINT min1 = binary_intersection_dist(l1.first, l2.second, g); - DIST_UINT min2 = binary_intersection_dist(l1.second, l2.first, g); - - return min(min1, min2); -} - -DIST_UINT hl_build_intersect(HubRecsPair& l1, HubRecsPair& l2, vector& rank_node_lens) { - DIST_UINT min1 = binary_intersection_dist(l1.first, l2.second, rank_node_lens); - DIST_UINT min2 = binary_intersection_dist(l1.second, l2.first, rank_node_lens); - - return min(min1, min2); -} - -DIST_UINT hl_build_intersect(HubRecsPair& l1, HubRecsPair& l2) { - DIST_UINT min1 = binary_intersection_dist(l1.first, l2.second); - DIST_UINT min2 = binary_intersection_dist(l1.second, l2.first); - - return min(min1, min2); -} - -bool label_contains(HubRecsPair& label, NodesideId query) { - NodeId query_node = nodeside_to_node(query); - if (nodeside_left(query)) { - return binary_search(label.first.begin(), label.first.end(), query_node); - } else { - return binary_search(label.second.begin(), label.second.end(), query_node); - } -} - -DIST_UINT get_label_dist(HubRecsPair& label, NodesideId query) { - NodeId query_node = nodeside_to_node(query); - if (nodeside_left(query)) { - return (*lower_bound(label.first.begin(), label.first.end(), query_node)).dist; - } else { - return (*lower_bound(label.second.begin(), label.second.end(), query_node)).dist; - } -} - -void set_label_dist(HubRecsPair& label, NodesideId query, DIST_UINT new_dist) { - NodeId query_node = nodeside_to_node(query); - if (nodeside_left(query)) { - (*lower_bound(label.first.begin(), label.first.end(), query_node)).dist = new_dist; - } else { - (*lower_bound(label.second.begin(), label.second.end(), query_node)).dist = new_dist; - } -} - -bool prune_check(HubRecsPair& label, vector& rank_node_lens, vector& rank_dists, vector& rank_dists2, const DIST_UINT& cur_dist) { - bool prune = false; - for (auto& rec: label.first) { - if (rank_dists2[rec.hub] == INF_INT) { continue; } - const DIST_UINT hl_dist = rec.dist + rank_node_lens[rec.hub] + rank_dists2[rec.hub]; - if (hl_dist <= cur_dist) { return true; } - } - for (auto& rec: label.second) { - if (rank_dists[rec.hub] == INF_INT) { continue; } - const DIST_UINT hl_dist = rec.dist + rank_node_lens[rec.hub] + rank_dists[rec.hub]; - if (hl_dist <= cur_dist) { return true; } - } - return prune; -} - -void pruned_dijkstra(bdsg::HashGraph& g, NodesideId start, vector& labels, vector& path_lengths, vector& rank_to_ns, vector& ns_to_rank, vector& rank_node_lens, int stop_dist) { - auto node_count = g.get_node_count(); - DIST_UINT nodeside_count = node_count*2; - vector parent(nodeside_count, INF_INT); - - const auto rank_start = ns_to_rank[start]; - bool self_loop = false; - - vector rank_dists(node_count+1, INF_INT); - vector rank_dists2(node_count+1, INF_INT); - for (auto& rec: labels[ns_to_rank[start]].first) { - rank_dists[rec.hub] = rec.dist; - } - for (auto& rec: labels[ns_to_rank[start]].second) { - rank_dists2[rec.hub] = rec.dist; - } - - //place initial OTHER_NODE nodesides - PriorityQueue2 nodeside_queue{pqcomp2}; - handle_t start_handle = g.get_handle(nodeside_to_node(start), nodeside_left(start)); - g.follow_edges(start_handle, false, [&](const handle_t& new_h){ - NodesideId new_nodeside = handle_entry_nodeside(new_h, g); - path_lengths[new_nodeside] = 0; - nodeside_queue.emplace(0, new_nodeside); - }); - - auto not_stop_cond = [stop_dist] (const QueueObj2& top_item) { - bool not_stop_dist = (get<0>(top_item) <= stop_dist) && (get<0>(top_item) != INF_INT); - return not_stop_dist; - }; - NodesideId last_ns = start; - while ((!nodeside_queue.empty()) && not_stop_cond(nodeside_queue.top())) { - const QueueObj2 item = nodeside_queue.top(); nodeside_queue.pop(); - const DIST_UINT cur_dist = get<0>(item); - const NodesideId cur_nodeside = get<1>(item); - - //gotta have this since priorities of C++ priority queue elements can't be updated - //all nodesides on queue are reached through OTHER_NODE direction - if (cur_dist != path_lengths[cur_nodeside]){ - continue; - } - last_ns = cur_nodeside; - if (cur_nodeside != start) [[likely]] { - //check if we can prune here - const auto rank_cur_ns = ns_to_rank[cur_nodeside]; - DIST_UINT label_dist = label_contains(labels[rank_cur_ns], rank_start) ? get_label_dist(labels[rank_cur_ns], rank_start) : INF_INT; - if (label_dist <= cur_dist) { - continue; - } - - if (prune_check(labels[rank_cur_ns], rank_node_lens, rank_dists, rank_dists2, cur_dist)) { continue; } - - hub_emplace(labels[rank_cur_ns], rank_start, cur_dist); - } - - const NodeId cur_node = nodeside_to_node(cur_nodeside); - //cross "edge" to other nodeside - const handle_t cur_handle = g.get_handle(cur_node, !nodeside_left(cur_nodeside)); - const DIST_UINT cur_handle_len = g.get_length(cur_handle); - const DIST_UINT new_dist = cur_dist + cur_handle_len; - - //now find the new nodesides reached through the OTHER_NODE direction - g.follow_edges(cur_handle, false, [&](const handle_t& new_h){ - const NodesideId new_ns = handle_entry_nodeside(new_h, g); - if (new_dist < path_lengths[new_ns]) { - path_lengths[new_ns] = new_dist; - nodeside_queue.emplace(new_dist, new_ns); - parent[new_ns] = cur_nodeside; - } - }); - } - -} - - - -DIST_UINT hl_query(NodesideId i, NodesideId j, vector& labels, bdsg::HashGraph& g) { - if (i == j) { return 0; } - return label_contains(labels[i], j) ? get_label_dist(labels[i], j) : hl_build_intersect(labels[i], labels[j], g); -} -DIST_UINT hl_query(NodesideId i, NodesideId j, vector& labels, vector& rank_node_lens) { - if (i == j) { return 0; } - NodesideId important = min(i, j); - NodesideId less_important = max(i, j); - - return label_contains(labels[less_important], important) ? get_label_dist(labels[less_important], important) : hl_build_intersect(labels[less_important], labels[important], rank_node_lens); -} -DIST_UINT hl_query2(NodesideId i, NodesideId j, vector& labels) { - if (i == j) { return 0; } - NodesideId important = min(i, j); - NodesideId less_important = max(i, j); - - return label_contains(labels[less_important], important) ? get_label_dist(labels[less_important], important) : hl_build_intersect(labels[less_important], labels[important]); -} - -DIST_UINT get_degree(NodeId node, bdsg::HashGraph& g) { - const handle_t h = g.get_handle(node, false); - return g.get_degree(h, true) * g.get_degree(h, false); -} - -vector make_labels(bdsg::HashGraph& g) { - int node_count = g.get_node_count(); - int nodeside_count = g.get_node_count()*2; - - vector labels; labels.resize(nodeside_count); - - auto node_ordering_view = ranges::iota_view(1,node_count+1); - vector node_ordering(node_ordering_view.begin(), node_ordering_view.end()); - sort(node_ordering.begin(), node_ordering.end(), [&](NodeId n1, NodeId n2) { - return get_degree(n1, g) > get_degree(n2, g); - }); - - vector rank_node_lens(node_count+1, 0); - - vector ns_ordering(nodeside_count, 0); - - vector rank(nodeside_count, 0); - for (size_t i = 0; i < node_ordering.size(); i++) { - const NodeId node = node_ordering[i]; - const auto nodesides = get_node_nodesides(node); - rank[nodesides[0]] = i*2; - rank[nodesides[1]] = i*2+1; - ns_ordering[i*2] = nodesides[0]; - ns_ordering[i*2+1] = nodesides[1]; - rank_node_lens[i+1] = g.get_length(g.get_handle(node, false)); - } - - vector path_lengths; - //for (int i = 0; i < 2; i++) { - path_lengths = vector(nodeside_count, INF_INT); - - - for (size_t i = 0; i < node_ordering.size(); i++) { - NodeId node = node_ordering[i]; - const auto nodesides = get_node_nodesides(node); - for (NodesideId ns: nodesides) { - pruned_dijkstra(g, ns, labels, path_lengths, ns_ordering, rank, rank_node_lens); - //for (int i = 0; i < 2; i++) { - std::fill(path_lengths.begin(), path_lengths.end(), INF_INT); - - } - } - return labels; -} - -} diff --git a/bdsg/src/landmark.cpp b/bdsg/src/landmark.cpp deleted file mode 100644 index e457c012..00000000 --- a/bdsg/src/landmark.cpp +++ /dev/null @@ -1,501 +0,0 @@ -#include "bdsg/landmark.hpp" - -using namespace std; -namespace bdsg { - -int addInt(int a, int b) { - if (a == INF_INT || b == INF_INT) { - return INF_INT; - } - return a+b; -} - -bool pqcomp1(const QueueObj& o1, const QueueObj& o2) { - return get<0>(o1) > get<0>(o2); -}; - -/* -following functions assume input graph node ids go from 1...N -*/ - -/* -Converts a node_id to a nodeside_id -*/ -//using bool for indicating side just like for direction in follow_edges -NodesideId node_to_nodeside(NodeId node_id, bool left) { - NodesideId ns_id = (node_id-1)*2; - - if (!left) { - ns_id += 1; - } - - return ns_id; -} - -array get_node_nodesides(NodeId node_id) { - NodesideId ns_id = (node_id-1)*2; - array both = {ns_id, ns_id+1}; - return both; -} - -NodeId nodeside_to_node(NodesideId nodeside_id) { - return nodeside_id / 2 + 1; -} - -tuple nodeside_to_node_tuple(NodesideId nodeside_id) { - tuple t = {nodeside_id / 2 + 1, (nodeside_id%2==0)}; - return t; -} - -bool nodeside_left (NodesideId nodeside_id) { - return nodeside_id%2==0; -} - -NodesideId other_nodeside(int nodeside_id) { - return nodeside_id ^ 1; -} - -NodesideId handle_entry_nodeside(const handle_t& handle, HashGraph& g) { - return node_to_nodeside(g.get_id(handle), !g.get_is_reverse(handle)); -} - -string nodeside_string(NodesideId nodeside) { - bool is_left = nodeside_left(nodeside); - string dir = is_left ? "left" : "right"; - NodeId node = nodeside_to_node(nodeside); - - stringstream stream; - stream << node << " (" << dir << ")"; - return stream.str(); -} - -/// landmark selection - -/* -returns ids of nodesides with largest degree -*/ -//helper -int nodeside_degree(NodesideId ns_id, HashGraph& g) { - tuple t = nodeside_to_node_tuple(ns_id); - NodeId node_id = get<0>(t); - bool is_left = get<1>(t); - handle_t node_handle = g.get_handle(node_id); - return g.get_degree(node_handle, is_left); -} - -vector get_highest_degree_nodesides(bdsg::HashGraph& g, int top_num, int nodeside_count) { - vector ns_degrees; ns_degrees.resize(nodeside_count); - vector ns; ns.resize(nodeside_count); - for (int i = 0; i < nodeside_count; i++) { - ns_degrees[i]=nodeside_degree(i, g); - ns[i] = i; - } - - std::sort(ns.begin(), ns.end(), [&ns_degrees](NodesideId n1, NodesideId n2){ - return ns_degrees[n1] > ns_degrees[n2]; - }); - - auto top_span = span(ns).subspan(0, top_num); - vector top_deg_ns(top_span.begin(), top_span.end()); - return top_deg_ns; -} - -vector get_landmark_nodes(bdsg::HashGraph& g, int top_ns_num, int nodeside_count) { - vector highest_deg_ns = get_highest_degree_nodesides(g, top_ns_num, nodeside_count); - vector possible_lm; possible_lm.resize(top_ns_num); - ranges::transform(highest_deg_ns.begin(), highest_deg_ns.end(), possible_lm.begin(), nodeside_to_node); - sort(possible_lm.begin(),possible_lm.end()); - auto new_lm_end_it = unique(possible_lm.begin(), possible_lm.end()); - possible_lm.resize(distance(possible_lm.begin(), new_lm_end_it)); - - return possible_lm; -} - -/* -Notes: -- modifies the table passed in -*/ -void fill_other_nodeside_dists(SdslArray2D& dist_table, HashGraph& g) { - auto table_shape = dist_table.shape(); - for (DIST_UINT i = 0; i < table_shape[0]; i++) { - for (DIST_UINT ns = 0; ns < table_shape[1]; ns++) { - if (dist_table.get(i,ns) != INF_INT) { - NodesideId other_ns = other_nodeside(ns); - NodeId node = nodeside_to_node(ns); - dist_table.set(i, other_ns, min(addInt(dist_table.get(i,ns), demote_distance(g.get_length(g.get_handle(node)))), static_cast(dist_table.get(i,other_ns)))); - } - } - } -} - -/* -trying insert-Dijkstra over decrease-key dijkstra -see https://stackoverflow.com/questions/9255620/why-does-dijkstras-algorithm-use-decrease-key - -start is the id of the origin node -dijkstra goes from start node to other nodes' nodesides -*/ - -PriorityQueue initQueue( - int nodeside_count, - NodesideId start_ns -) { - vector init_objs; init_objs.resize(2); - /* - for (int i = 0; i < nodeside_count; i++) { - init_objs[i] = make_tuple(INF_INT, OTHER_NODESIDE, i); - init_objs[i+nodeside_count] = make_tuple(INF_INT, OTHER_NODE, i); - }*/ - - //NodesideId start_node_other_ns = other_nodeside(start_ns); - //treating start_ns as arrived to from OTHER_NODESIDE so exit is to another node - //don't exit out other direction, that's covered by the other nodeside of the node - init_objs[0] = make_tuple(0, OTHER_NODESIDE, start_ns); - //dummy queue obj - init_objs[1] = make_tuple(INF_INT, OTHER_NODE, start_ns); - /* - for (QueueObj qo: init_objs) { - auto [a,b,c] = qo; - nodeside_queue.push(qo); - } */ - PriorityQueue nodeside_queue{pqcomp1, move(init_objs)}; - return nodeside_queue; -} - -/* -Called when new nodeside is discovered. -If path to the nodeside is the shortest found so far: - update path_lengths and add new queue entry for it -*/ -bool discover_nodeside(NodesideId new_nodeside, EnterDir enter_direction, int new_len, vector>& path_lengths, PriorityQueue& nodeside_queue) { - - if (new_len < path_lengths[enter_direction][new_nodeside]) { - nodeside_queue.push({new_len, enter_direction, new_nodeside}); - path_lengths[enter_direction][new_nodeside] = new_len; - } - return true; -}; - -//arguments after second are optional -SdslArray2D dijkstra(bdsg::HashGraph& g, NodesideId start, NodesideId stop_ns, int stop_dist, bool ball_ver) { - //code in this function based off https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm#Pseudocode and its subsections - int nodeside_count = g.get_node_count()*2; - - #define LEFT_PATH true - #define RIGHT_PATH false - - NodesideId start_ns = start; - //handle_t start_node_handle = g.get_handle(nodeside_to_node(start_ns)); - - - int inf = INF_INT; - - /*auto is_start = [start_ns](NodesideId ns) { - return (ns == start_ns); - }; */ - - //Need to see each nodeside twice; need to keep track of two distances, one when entering from other nodeside - //the other when entering from a completely different node - //variables called enter_direction keep track of direction of entry - PriorityQueue nodeside_queue = initQueue(nodeside_count, start_ns); - - //matrix to store minimum distances found so far - vector> path_lengths(2); - for (auto& v: path_lengths) { - v.resize(nodeside_count); - fill(v.begin(), v.end(), inf); - } - - path_lengths[OTHER_NODESIDE][start_ns] = 0; - - vector> which_path(2); - - //keep track of which nodesides have self-loops - vector self_loop; self_loop.resize(nodeside_count); - fill(self_loop.begin(), self_loop.end(), false); - - - //Stop conditions: - // - empty queue - // - stop_ns visited from the OTHER_NODE direction - // - stop_dist reached or exceeded by next queue item - auto not_stop_cond = [stop_ns,stop_dist] (const QueueObj& top_item) { - bool not_stop_ns = !(get<1>(top_item) == OTHER_NODE && get<2>(top_item) == stop_ns); - bool not_stop_dist = (get<0>(top_item) <= stop_dist) && (get<0>(top_item) != INF_INT); - return not_stop_ns && not_stop_dist; - }; - while ((!nodeside_queue.empty()) && not_stop_cond(nodeside_queue.top())) { - QueueObj item = nodeside_queue.top(); nodeside_queue.pop(); - int cur_dist = get<0>(item); - EnterDir enter_direction = get<1>(item); - NodesideId cur_nodeside = get<2>(item); - - //gotta have this since priorities of C++ priority queue elements can't be updated - if (cur_dist != path_lengths[enter_direction][cur_nodeside]){ - continue; - } - - NodeId node_id = nodeside_to_node(cur_nodeside); - - - - if (enter_direction == OTHER_NODESIDE) { - //got here from the other nodeside of cur_nodeside's node - handle_t handle = g.get_handle(node_id, nodeside_left(cur_nodeside)); - - g.follow_edges(handle, false, [&g,cur_nodeside,&handle,cur_dist,&self_loop,&path_lengths,&nodeside_queue](const handle_t& new_h){ - NodesideId new_nodeside = handle_entry_nodeside(new_h,g); - if (new_nodeside == cur_nodeside) { - self_loop[new_nodeside] = true; - } - - return discover_nodeside( - new_nodeside, OTHER_NODE, - cur_dist, - path_lengths, nodeside_queue - ); - }); - - } - else { - //came here from a nodeside of a node that =/= cur_nodeside's node - handle_t handle = g.get_handle(node_id, !nodeside_left(cur_nodeside)); - int handle_len = demote_distance(g.get_length(handle)); - - NodesideId other_ns = other_nodeside(cur_nodeside); - //handle_t cur_handle = self_loop[cur_nodeside] ? g.flip(handle):handle; - if (other_ns == cur_nodeside) { - self_loop[other_ns] = true; - } - discover_nodeside( - other_ns, OTHER_NODESIDE, - addInt(cur_dist, handle_len), - path_lengths, nodeside_queue - ); - } - - } - - SdslArray2D res(1, nodeside_count); - for (int i = 0; i < nodeside_count; i++) { - res.set(0, i, path_lengths[OTHER_NODE][i]); - } - - if (ball_ver) { - //fill in OTHER_NODESIDE dists for balls - fill_other_nodeside_dists(res, g); - } - return res; -} - -/* -dijkstra edge cases -*/ - -/* -notes: - - lm_nodes should not be empty -*/ -SdslArray2D get_lm2ns_dist_table(vector& lm_nodes, HashGraph& g) { - size_t nodeside_count = g.get_node_count()*2; - //rows: one row for each landmark nodeside (two rows per landmark) - //cols: one column for each nodeside of the graph - SdslArray2D table(lm_nodes.size()*2, nodeside_count); - - for (size_t i = 0; i < lm_nodes.size(); i++) { - NodeId lm_node = lm_nodes[i]; - array lm_nodesides = get_node_nodesides(lm_node); - - for (size_t ns_i = 0; ns_i < lm_nodesides.size(); ns_i++) { - SdslArray2D dists = dijkstra(g, lm_nodesides[ns_i]); - - size_t row = i*2+ns_i; - for (size_t col = 0; col < nodeside_count; col++) { - table.set(row, col, dists.get(0,col)); - } - } - } - - return table; -} - -void get_closest_lm_ind(SdslArray2D& dist_table, vector& ind_vec, vector& min_dist_vec, int nodeside_count) { - //typedef boost::multi_array_types::index_range index_range; - ind_vec.resize(nodeside_count); - min_dist_vec.resize(nodeside_count); - for (int ns = 0; ns < nodeside_count; ns++) { - //Array2D::index_gen ind_gen; - //Array2D::array_view<1>::type ns_col_view = dist_table[ind_gen[index_range(0,dist_table.shape()[0])][ns]]; - auto ns_col_view = dist_table.col_view(ns); - auto min_itr = min_element(ns_col_view.begin(), ns_col_view.end()); - ind_vec[ns] = distance(ns_col_view.begin(), min_itr) / 2; - int min_dist = *min_itr; - min_dist_vec[ns] = min_dist; - } -} -/* -TEST_CASE("simple get_closest_lm_ind test","") { - HashGraph gt; - handle_t h = gt.create_handle("A"); - handle_t h2 = gt.create_handle("AC"); - handle_t h3 = gt.create_handle("ACG"); - handle_t h4 = gt.create_handle("ACGT"); - handle_t h5 = gt.create_handle("ACGTA"); - handle_t h6 = gt.create_handle("ACGTAC"); - - gt.create_edge(h,h2); gt.create_edge(h,h3); - gt.create_edge(h2,h4); gt.create_edge(h3,h5); - gt.create_edge(h4,h6); gt.create_edge(h5,h6); - gt.create_edge(h,h6); - - int nodeside_count = gt.get_node_count()*2; - - NodeId l1 = gt.get_id(h); - NodeId l2 = gt.get_id(h6); - vector lm_nodes = {l1, l2}; - Array2D dist_table = get_lm2ns_dist_table(lm_nodes, gt); - - vector closest_lm_ind_vec; closest_lm_ind_vec.resize(nodeside_count); - vector closest_lm_dist_vec; closest_lm_dist_vec.resize(nodeside_count); - get_closest_lm_ind(dist_table, closest_lm_ind_vec, closest_lm_dist_vec, nodeside_count); - - vector ind_ans_key = {0,1,0,1,0,1,0,1,0,1,0,0}; - for (int i = 0; i dist_ans_key = {INF_INT,0,0,4,0,5,2,0,3,0,0,INF_INT}; - for (int i = 0; i find_balls(vector& closest_lm_dist_vec, HashGraph& g, int min_ball_size) { - int nodeside_count = g.get_node_count()*2; - vector balls; balls.resize(nodeside_count); - for (int i = 0; i < nodeside_count; i++) { - int dist_limit = max(min_ball_size,closest_lm_dist_vec[i]); - balls[i] = get_ball_contents(i, dist_limit, g); - } - return balls; -} - -/* -query algorithm from Chen et al. (2009) -with modifications for our purpose -*/ - -int oracle_query(NodesideId source, NodesideId target, OracleInfo& oracle, HashGraph& g) { - #define ball_list oracle.balls - #define lm_node_vec oracle.lm_nodes - #define closest_lm_ind oracle.closest_lm_inds - #define lm_dist oracle.closest_lm_dists - #define lm_to_ns oracle.lm_sides_to_ns - - if (ball_list[source].contains(target)) { - return ball_list[source][target]; - } - - if (ball_list[target].contains(source)) { - return ball_list[target][source]; - } - - auto get_guess = [&](NodesideId ns) { - //typedef boost::multi_array_types::index_range index_range; - - NodeId lm = lm_node_vec[closest_lm_ind[ns]]; - //auto lm_nodesides = get_node_nodesides(lm); - int lm_length = demote_distance(g.get_length(g.get_handle(lm))); - int closest_lm_ns_ind = closest_lm_ind[ns]*2; - - //Array2D::index_gen ind_gen; - - //2 x nodeside_count table - //each row is a nodeside of the landmark - //col = nodeside id - //auto lm_table = lm_to_ns[ind_gen[index_range(closest_lm_ns_ind, closest_lm_ns_ind+2)][index_range()]]; - - //generate all possible distances - vector s; - for (int a: {lm_to_ns.get(closest_lm_ns_ind, source), lm_to_ns.get(closest_lm_ns_ind+1, source)}) { - for (int b: {lm_to_ns.get(closest_lm_ns_ind, target), lm_to_ns.get(closest_lm_ns_ind+1, target)}) { - int pos_dist=addInt(a,b); - s.push_back(pos_dist); - } - } - - //in case landmark has a self-loop - s[0] = addInt(s[0], addInt(lm_to_ns.get(closest_lm_ns_ind+1,node_to_nodeside(lm,false)), lm_length)); - s[3] = addInt(s[3], addInt(lm_to_ns.get(closest_lm_ns_ind, node_to_nodeside(lm,true)), lm_length)); - - int min_dist = addInt(*min_element(s.begin(), s.end()), lm_length); - - return min_dist; - }; - - return min(get_guess(target), get_guess(source)); -} - - -void printMArray(Array2D& arr) { - auto sh = arr.shape(); - int num_rows = sh[0]; int num_cols = sh[1]; - for (int i = 0; i < num_rows; i++) { - for (int j = 0; j < num_cols; j++) { - if (arr[i][j] == INF_INT) { - cerr << "inf "; - } else { - cerr << arr[i][j] << " "; - } - } - cerr << endl; - } -} - - -} - From 8e57a39148f20a6c3dc5ad8de8ba8ae2b8aa4f22 Mon Sep 17 00:00:00 2001 From: Zia <194475824+electricEpilith@users.noreply.github.com> Date: Wed, 13 May 2026 09:22:28 -0700 Subject: [PATCH 60/75] fix errors caused by const conflicts Co-Authored-By: Claude Sonnet 4.6 --- .../bdsg/snarl_distance_index.cpp | 2795 +++++++++++++---- 1 file changed, 2143 insertions(+), 652 deletions(-) diff --git a/bdsg/cmake_bindings/bdsg/snarl_distance_index.cpp b/bdsg/cmake_bindings/bdsg/snarl_distance_index.cpp index 9f3aa143..8563933b 100644 --- a/bdsg/cmake_bindings/bdsg/snarl_distance_index.cpp +++ b/bdsg/cmake_bindings/bdsg/snarl_distance_index.cpp @@ -15,679 +15,2170 @@ #include #include +#include +#include #include +#include #include -#include -#include #include #include -#include -#include - +#include #ifndef BINDER_PYBIND11_TYPE_CASTER - #define BINDER_PYBIND11_TYPE_CASTER - PYBIND11_DECLARE_HOLDER_TYPE(T, std::shared_ptr, false) - PYBIND11_DECLARE_HOLDER_TYPE(T, T*, false) - PYBIND11_MAKE_OPAQUE(std::shared_ptr) +#define BINDER_PYBIND11_TYPE_CASTER +PYBIND11_DECLARE_HOLDER_TYPE(T, std::shared_ptr, false) +PYBIND11_DECLARE_HOLDER_TYPE(T, T *, false) +PYBIND11_MAKE_OPAQUE(std::shared_ptr) #endif // bdsg::SnarlDistanceIndex file:bdsg/snarl_distance_index.hpp line:181 struct PyCallBack_bdsg_SnarlDistanceIndex : public bdsg::SnarlDistanceIndex { - using bdsg::SnarlDistanceIndex::SnarlDistanceIndex; + using bdsg::SnarlDistanceIndex::SnarlDistanceIndex; - void dissociate() override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "dissociate"); - if (overload) { - auto o = overload.operator()(); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::dissociate(); - } - void serialize(const class std::function & a0) const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "serialize"); - if (overload) { - auto o = overload.operator()(a0); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::serialize(a0); - } - void serialize(int a0) override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "serialize"); - if (overload) { - auto o = overload.operator()(a0); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::serialize(a0); - } - void deserialize(int a0) override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "deserialize"); - if (overload) { - auto o = overload.operator()(a0); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::deserialize(a0); - } - unsigned int get_magic_number() const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "get_magic_number"); - if (overload) { - auto o = overload.operator()(); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::get_magic_number(); - } - struct handlegraph::net_handle_t get_root() const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "get_root"); - if (overload) { - auto o = overload.operator()(); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::get_root(); - } - bool is_root(const struct handlegraph::net_handle_t & a0) const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "is_root"); - if (overload) { - auto o = overload.operator()(a0); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::is_root(a0); - } - bool is_snarl(const struct handlegraph::net_handle_t & a0) const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "is_snarl"); - if (overload) { - auto o = overload.operator()(a0); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::is_snarl(a0); - } - bool is_chain(const struct handlegraph::net_handle_t & a0) const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "is_chain"); - if (overload) { - auto o = overload.operator()(a0); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::is_chain(a0); - } - bool is_node(const struct handlegraph::net_handle_t & a0) const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "is_node"); - if (overload) { - auto o = overload.operator()(a0); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::is_node(a0); - } - bool is_sentinel(const struct handlegraph::net_handle_t & a0) const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "is_sentinel"); - if (overload) { - auto o = overload.operator()(a0); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::is_sentinel(a0); - } - struct handlegraph::net_handle_t get_net(const struct handlegraph::handle_t & a0, const class handlegraph::HandleGraph * a1) const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "get_net"); - if (overload) { - auto o = overload.operator()(a0, a1); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::get_net(a0, a1); - } - struct handlegraph::handle_t get_handle(const struct handlegraph::net_handle_t & a0, const class handlegraph::HandleGraph * a1) const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "get_handle"); - if (overload) { - auto o = overload.operator()(a0, a1); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::get_handle(a0, a1); - } - struct handlegraph::net_handle_t get_parent(const struct handlegraph::net_handle_t & a0) const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "get_parent"); - if (overload) { - auto o = overload.operator()(a0); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::get_parent(a0); - } - struct handlegraph::net_handle_t get_bound(const struct handlegraph::net_handle_t & a0, bool a1, bool a2) const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "get_bound"); - if (overload) { - auto o = overload.operator()(a0, a1, a2); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::get_bound(a0, a1, a2); - } - struct handlegraph::net_handle_t flip(const struct handlegraph::net_handle_t & a0) const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "flip"); - if (overload) { - auto o = overload.operator()(a0); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::flip(a0); - } - struct handlegraph::net_handle_t canonical(const struct handlegraph::net_handle_t & a0) const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "canonical"); - if (overload) { - auto o = overload.operator()(a0); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::canonical(a0); - } - enum handlegraph::SnarlDecomposition::endpoint_t starts_at(const struct handlegraph::net_handle_t & a0) const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "starts_at"); - if (overload) { - auto o = overload.operator()(a0); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::starts_at(a0); - } - enum handlegraph::SnarlDecomposition::endpoint_t ends_at(const struct handlegraph::net_handle_t & a0) const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "ends_at"); - if (overload) { - auto o = overload.operator()(a0); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::ends_at(a0); - } - bool for_each_child_impl(const struct handlegraph::net_handle_t & a0, const class std::function & a1) const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "for_each_child_impl"); - if (overload) { - auto o = overload.operator()(a0, a1); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::for_each_child_impl(a0, a1); - } - bool for_each_traversal_impl(const struct handlegraph::net_handle_t & a0, const class std::function & a1) const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "for_each_traversal_impl"); - if (overload) { - auto o = overload.operator()(a0, a1); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::for_each_traversal_impl(a0, a1); - } - bool follow_net_edges_impl(const struct handlegraph::net_handle_t & a0, const class handlegraph::HandleGraph * a1, bool a2, const class std::function & a3) const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "follow_net_edges_impl"); - if (overload) { - auto o = overload.operator()(a0, a1, a2, a3); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::follow_net_edges_impl(a0, a1, a2, a3); - } - struct handlegraph::net_handle_t get_parent_traversal(const struct handlegraph::net_handle_t & a0, const struct handlegraph::net_handle_t & a1) const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "get_parent_traversal"); - if (overload) { - auto o = overload.operator()(a0, a1); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::get_parent_traversal(a0, a1); - } - bool for_each_tippy_child_impl(const struct handlegraph::net_handle_t & a0, const class std::function & a1) const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "for_each_tippy_child_impl"); - if (overload) { - auto o = overload.operator()(a0, a1); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDecomposition::for_each_tippy_child_impl(a0, a1); - } - bool for_each_traversal_start_impl(const struct handlegraph::net_handle_t & a0, const class std::function & a1) const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "for_each_traversal_start_impl"); - if (overload) { - auto o = overload.operator()(a0, a1); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDecomposition::for_each_traversal_start_impl(a0, a1); - } - bool for_each_traversal_end_impl(const struct handlegraph::net_handle_t & a0, const class std::function & a1) const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "for_each_traversal_end_impl"); - if (overload) { - auto o = overload.operator()(a0, a1); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDecomposition::for_each_traversal_end_impl(a0, a1); - } - void serialize(const std::string & a0) override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "serialize"); - if (overload) { - auto o = overload.operator()(a0); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return TriviallySerializable::serialize(a0); - } - void deserialize(const std::string & a0) override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "deserialize"); - if (overload) { - auto o = overload.operator()(a0); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return TriviallySerializable::deserialize(a0); - } + void dissociate() override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), "dissociate"); + if (overload) { + auto o = overload.operator()(); + if (pybind11::detail::cast_is_temporary_value_reference::value) { + static pybind11::detail::override_caster_t caster; + return pybind11::detail::cast_ref(std::move(o), caster); + } + return pybind11::detail::cast_safe(std::move(o)); + } + return SnarlDistanceIndex::dissociate(); + } + void serialize(const class std::function + &a0) const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), "serialize"); + if (overload) { + auto o = + overload.operator()(a0); + if (pybind11::detail::cast_is_temporary_value_reference::value) { + static pybind11::detail::override_caster_t caster; + return pybind11::detail::cast_ref(std::move(o), caster); + } + return pybind11::detail::cast_safe(std::move(o)); + } + return SnarlDistanceIndex::serialize(a0); + } + void serialize(int a0) override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), "serialize"); + if (overload) { + auto o = + overload.operator()(a0); + if (pybind11::detail::cast_is_temporary_value_reference::value) { + static pybind11::detail::override_caster_t caster; + return pybind11::detail::cast_ref(std::move(o), caster); + } + return pybind11::detail::cast_safe(std::move(o)); + } + return SnarlDistanceIndex::serialize(a0); + } + void deserialize(int a0) override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), "deserialize"); + if (overload) { + auto o = + overload.operator()(a0); + if (pybind11::detail::cast_is_temporary_value_reference::value) { + static pybind11::detail::override_caster_t caster; + return pybind11::detail::cast_ref(std::move(o), caster); + } + return pybind11::detail::cast_safe(std::move(o)); + } + return SnarlDistanceIndex::deserialize(a0); + } + unsigned int get_magic_number() const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), + "get_magic_number"); + if (overload) { + auto o = overload.operator()(); + if (pybind11::detail::cast_is_temporary_value_reference< + unsigned int>::value) { + static pybind11::detail::override_caster_t caster; + return pybind11::detail::cast_ref(std::move(o), caster); + } + return pybind11::detail::cast_safe(std::move(o)); + } + return SnarlDistanceIndex::get_magic_number(); + } + struct handlegraph::net_handle_t get_root() const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), "get_root"); + if (overload) { + auto o = overload.operator()(); + if (pybind11::detail::cast_is_temporary_value_reference< + struct handlegraph::net_handle_t>::value) { + static pybind11::detail::override_caster_t< + struct handlegraph::net_handle_t> + caster; + return pybind11::detail::cast_ref( + std::move(o), caster); + } + return pybind11::detail::cast_safe( + std::move(o)); + } + return SnarlDistanceIndex::get_root(); + } + bool is_root(const struct handlegraph::net_handle_t &a0) const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), "is_root"); + if (overload) { + auto o = + overload.operator()(a0); + if (pybind11::detail::cast_is_temporary_value_reference::value) { + static pybind11::detail::override_caster_t caster; + return pybind11::detail::cast_ref(std::move(o), caster); + } + return pybind11::detail::cast_safe(std::move(o)); + } + return SnarlDistanceIndex::is_root(a0); + } + bool is_snarl(const struct handlegraph::net_handle_t &a0) const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), "is_snarl"); + if (overload) { + auto o = + overload.operator()(a0); + if (pybind11::detail::cast_is_temporary_value_reference::value) { + static pybind11::detail::override_caster_t caster; + return pybind11::detail::cast_ref(std::move(o), caster); + } + return pybind11::detail::cast_safe(std::move(o)); + } + return SnarlDistanceIndex::is_snarl(a0); + } + bool is_chain(const struct handlegraph::net_handle_t &a0) const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), "is_chain"); + if (overload) { + auto o = + overload.operator()(a0); + if (pybind11::detail::cast_is_temporary_value_reference::value) { + static pybind11::detail::override_caster_t caster; + return pybind11::detail::cast_ref(std::move(o), caster); + } + return pybind11::detail::cast_safe(std::move(o)); + } + return SnarlDistanceIndex::is_chain(a0); + } + bool is_node(const struct handlegraph::net_handle_t &a0) const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), "is_node"); + if (overload) { + auto o = + overload.operator()(a0); + if (pybind11::detail::cast_is_temporary_value_reference::value) { + static pybind11::detail::override_caster_t caster; + return pybind11::detail::cast_ref(std::move(o), caster); + } + return pybind11::detail::cast_safe(std::move(o)); + } + return SnarlDistanceIndex::is_node(a0); + } + bool is_sentinel(const struct handlegraph::net_handle_t &a0) const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), "is_sentinel"); + if (overload) { + auto o = + overload.operator()(a0); + if (pybind11::detail::cast_is_temporary_value_reference::value) { + static pybind11::detail::override_caster_t caster; + return pybind11::detail::cast_ref(std::move(o), caster); + } + return pybind11::detail::cast_safe(std::move(o)); + } + return SnarlDistanceIndex::is_sentinel(a0); + } + struct handlegraph::net_handle_t + get_net(const struct handlegraph::handle_t &a0, + const class handlegraph::HandleGraph *a1) const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), "get_net"); + if (overload) { + auto o = + overload.operator()(a0, a1); + if (pybind11::detail::cast_is_temporary_value_reference< + struct handlegraph::net_handle_t>::value) { + static pybind11::detail::override_caster_t< + struct handlegraph::net_handle_t> + caster; + return pybind11::detail::cast_ref( + std::move(o), caster); + } + return pybind11::detail::cast_safe( + std::move(o)); + } + return SnarlDistanceIndex::get_net(a0, a1); + } + struct handlegraph::handle_t + get_handle(const struct handlegraph::net_handle_t &a0, + const class handlegraph::HandleGraph *a1) const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), "get_handle"); + if (overload) { + auto o = + overload.operator()(a0, a1); + if (pybind11::detail::cast_is_temporary_value_reference< + struct handlegraph::handle_t>::value) { + static pybind11::detail::override_caster_t + caster; + return pybind11::detail::cast_ref( + std::move(o), caster); + } + return pybind11::detail::cast_safe( + std::move(o)); + } + return SnarlDistanceIndex::get_handle(a0, a1); + } + struct handlegraph::net_handle_t + get_parent(const struct handlegraph::net_handle_t &a0) const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), "get_parent"); + if (overload) { + auto o = + overload.operator()(a0); + if (pybind11::detail::cast_is_temporary_value_reference< + struct handlegraph::net_handle_t>::value) { + static pybind11::detail::override_caster_t< + struct handlegraph::net_handle_t> + caster; + return pybind11::detail::cast_ref( + std::move(o), caster); + } + return pybind11::detail::cast_safe( + std::move(o)); + } + return SnarlDistanceIndex::get_parent(a0); + } + struct handlegraph::net_handle_t + get_bound(const struct handlegraph::net_handle_t &a0, bool a1, + bool a2) const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), "get_bound"); + if (overload) { + auto o = overload.operator()( + a0, a1, a2); + if (pybind11::detail::cast_is_temporary_value_reference< + struct handlegraph::net_handle_t>::value) { + static pybind11::detail::override_caster_t< + struct handlegraph::net_handle_t> + caster; + return pybind11::detail::cast_ref( + std::move(o), caster); + } + return pybind11::detail::cast_safe( + std::move(o)); + } + return SnarlDistanceIndex::get_bound(a0, a1, a2); + } + struct handlegraph::net_handle_t + flip(const struct handlegraph::net_handle_t &a0) const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), "flip"); + if (overload) { + auto o = + overload.operator()(a0); + if (pybind11::detail::cast_is_temporary_value_reference< + struct handlegraph::net_handle_t>::value) { + static pybind11::detail::override_caster_t< + struct handlegraph::net_handle_t> + caster; + return pybind11::detail::cast_ref( + std::move(o), caster); + } + return pybind11::detail::cast_safe( + std::move(o)); + } + return SnarlDistanceIndex::flip(a0); + } + struct handlegraph::net_handle_t + canonical(const struct handlegraph::net_handle_t &a0) const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), "canonical"); + if (overload) { + auto o = + overload.operator()(a0); + if (pybind11::detail::cast_is_temporary_value_reference< + struct handlegraph::net_handle_t>::value) { + static pybind11::detail::override_caster_t< + struct handlegraph::net_handle_t> + caster; + return pybind11::detail::cast_ref( + std::move(o), caster); + } + return pybind11::detail::cast_safe( + std::move(o)); + } + return SnarlDistanceIndex::canonical(a0); + } + enum handlegraph::SnarlDecomposition::endpoint_t + starts_at(const struct handlegraph::net_handle_t &a0) const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), "starts_at"); + if (overload) { + auto o = + overload.operator()(a0); + if (pybind11::detail::cast_is_temporary_value_reference< + enum handlegraph::SnarlDecomposition::endpoint_t>::value) { + static pybind11::detail::override_caster_t< + enum handlegraph::SnarlDecomposition::endpoint_t> + caster; + return pybind11::detail::cast_ref< + enum handlegraph::SnarlDecomposition::endpoint_t>(std::move(o), + caster); + } + return pybind11::detail::cast_safe< + enum handlegraph::SnarlDecomposition::endpoint_t>(std::move(o)); + } + return SnarlDistanceIndex::starts_at(a0); + } + enum handlegraph::SnarlDecomposition::endpoint_t + ends_at(const struct handlegraph::net_handle_t &a0) const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), "ends_at"); + if (overload) { + auto o = + overload.operator()(a0); + if (pybind11::detail::cast_is_temporary_value_reference< + enum handlegraph::SnarlDecomposition::endpoint_t>::value) { + static pybind11::detail::override_caster_t< + enum handlegraph::SnarlDecomposition::endpoint_t> + caster; + return pybind11::detail::cast_ref< + enum handlegraph::SnarlDecomposition::endpoint_t>(std::move(o), + caster); + } + return pybind11::detail::cast_safe< + enum handlegraph::SnarlDecomposition::endpoint_t>(std::move(o)); + } + return SnarlDistanceIndex::ends_at(a0); + } + bool for_each_child_impl( + const struct handlegraph::net_handle_t &a0, + const class std::function + &a1) const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), + "for_each_child_impl"); + if (overload) { + auto o = + overload.operator()(a0, a1); + if (pybind11::detail::cast_is_temporary_value_reference::value) { + static pybind11::detail::override_caster_t caster; + return pybind11::detail::cast_ref(std::move(o), caster); + } + return pybind11::detail::cast_safe(std::move(o)); + } + return SnarlDistanceIndex::for_each_child_impl(a0, a1); + } + bool for_each_traversal_impl( + const struct handlegraph::net_handle_t &a0, + const class std::function + &a1) const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), + "for_each_traversal_impl"); + if (overload) { + auto o = + overload.operator()(a0, a1); + if (pybind11::detail::cast_is_temporary_value_reference::value) { + static pybind11::detail::override_caster_t caster; + return pybind11::detail::cast_ref(std::move(o), caster); + } + return pybind11::detail::cast_safe(std::move(o)); + } + return SnarlDistanceIndex::for_each_traversal_impl(a0, a1); + } + bool follow_net_edges_impl( + const struct handlegraph::net_handle_t &a0, + const class handlegraph::HandleGraph *a1, bool a2, + const class std::function + &a3) const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), + "follow_net_edges_impl"); + if (overload) { + auto o = overload.operator()( + a0, a1, a2, a3); + if (pybind11::detail::cast_is_temporary_value_reference::value) { + static pybind11::detail::override_caster_t caster; + return pybind11::detail::cast_ref(std::move(o), caster); + } + return pybind11::detail::cast_safe(std::move(o)); + } + return SnarlDistanceIndex::follow_net_edges_impl(a0, a1, a2, a3); + } + struct handlegraph::net_handle_t get_parent_traversal( + const struct handlegraph::net_handle_t &a0, + const struct handlegraph::net_handle_t &a1) const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), + "get_parent_traversal"); + if (overload) { + auto o = + overload.operator()(a0, a1); + if (pybind11::detail::cast_is_temporary_value_reference< + struct handlegraph::net_handle_t>::value) { + static pybind11::detail::override_caster_t< + struct handlegraph::net_handle_t> + caster; + return pybind11::detail::cast_ref( + std::move(o), caster); + } + return pybind11::detail::cast_safe( + std::move(o)); + } + return SnarlDistanceIndex::get_parent_traversal(a0, a1); + } + bool for_each_tippy_child_impl( + const struct handlegraph::net_handle_t &a0, + const class std::function + &a1) const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), + "for_each_tippy_child_impl"); + if (overload) { + auto o = + overload.operator()(a0, a1); + if (pybind11::detail::cast_is_temporary_value_reference::value) { + static pybind11::detail::override_caster_t caster; + return pybind11::detail::cast_ref(std::move(o), caster); + } + return pybind11::detail::cast_safe(std::move(o)); + } + return SnarlDecomposition::for_each_tippy_child_impl(a0, a1); + } + bool for_each_traversal_start_impl( + const struct handlegraph::net_handle_t &a0, + const class std::function + &a1) const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), + "for_each_traversal_start_impl"); + if (overload) { + auto o = + overload.operator()(a0, a1); + if (pybind11::detail::cast_is_temporary_value_reference::value) { + static pybind11::detail::override_caster_t caster; + return pybind11::detail::cast_ref(std::move(o), caster); + } + return pybind11::detail::cast_safe(std::move(o)); + } + return SnarlDecomposition::for_each_traversal_start_impl(a0, a1); + } + bool for_each_traversal_end_impl( + const struct handlegraph::net_handle_t &a0, + const class std::function + &a1) const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), + "for_each_traversal_end_impl"); + if (overload) { + auto o = + overload.operator()(a0, a1); + if (pybind11::detail::cast_is_temporary_value_reference::value) { + static pybind11::detail::override_caster_t caster; + return pybind11::detail::cast_ref(std::move(o), caster); + } + return pybind11::detail::cast_safe(std::move(o)); + } + return SnarlDecomposition::for_each_traversal_end_impl(a0, a1); + } + void serialize(const std::string &a0) override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), "serialize"); + if (overload) { + auto o = + overload.operator()(a0); + if (pybind11::detail::cast_is_temporary_value_reference::value) { + static pybind11::detail::override_caster_t caster; + return pybind11::detail::cast_ref(std::move(o), caster); + } + return pybind11::detail::cast_safe(std::move(o)); + } + return TriviallySerializable::serialize(a0); + } + void deserialize(const std::string &a0) override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), "deserialize"); + if (overload) { + auto o = + overload.operator()(a0); + if (pybind11::detail::cast_is_temporary_value_reference::value) { + static pybind11::detail::override_caster_t caster; + return pybind11::detail::cast_ref(std::move(o), caster); + } + return pybind11::detail::cast_safe(std::move(o)); + } + return TriviallySerializable::deserialize(a0); + } }; -void bind_bdsg_snarl_distance_index(std::function< pybind11::module &(std::string const &namespace_) > &M) -{ - { // bdsg::SnarlDistanceIndex file:bdsg/snarl_distance_index.hpp line:181 - pybind11::class_, PyCallBack_bdsg_SnarlDistanceIndex, handlegraph::SnarlDecomposition, handlegraph::TriviallySerializable> cl(M("bdsg"), "SnarlDistanceIndex", "The distance index, which also acts as a snarl decomposition.\n\n The distance index provides an interface to traverse the snarl tree and to\n find minimum distances between two sibling nodes in the snarl tree (eg\n between two chains that are children of the same snarl).\n\n It also provides a method for quickly calculating the minimum distance\n between two positions on the graph.\n\n The implementation here is tightly coupled with the filling-in code in vg\n (see vg::fill_in_distance_index()). To make a SnarlDistanceIndex that\n actually works, you have to construct the object, and then call\n get_snarl_tree_records() with zero or more TemporaryDistanceIndex objects\n for connected components, and a graph.\n\n The TemporaryDistanceIndex needs to have a variety of TemporaryRecord\n implementation classes (TemporaryChainRecord, TemporarySnarlRecord,\n TemporaryNodeRecord) set up and added to it; this all has to be done \"by\n hand\", as it were, because no code is in this library to help you do it.\n\n "); - cl.def( pybind11::init( [](){ return new bdsg::SnarlDistanceIndex(); }, [](){ return new PyCallBack_bdsg_SnarlDistanceIndex(); } ) ); - - pybind11::enum_(cl, "connectivity_t", pybind11::arithmetic(), "The connectivity of a net_handle- this defines the direction that the net_handle is traversed") - .value("START_START", bdsg::SnarlDistanceIndex::START_START) - .value("START_END", bdsg::SnarlDistanceIndex::START_END) - .value("START_TIP", bdsg::SnarlDistanceIndex::START_TIP) - .value("END_START", bdsg::SnarlDistanceIndex::END_START) - .value("END_END", bdsg::SnarlDistanceIndex::END_END) - .value("END_TIP", bdsg::SnarlDistanceIndex::END_TIP) - .value("TIP_START", bdsg::SnarlDistanceIndex::TIP_START) - .value("TIP_END", bdsg::SnarlDistanceIndex::TIP_END) - .value("TIP_TIP", bdsg::SnarlDistanceIndex::TIP_TIP) - .export_values(); - - - pybind11::enum_(cl, "net_handle_record_t", pybind11::arithmetic(), "Type of a net_handle_t, which may not be the type of the record\nThis is to allow a node record to be seen as a chain from the perspective of a handle") - .value("ROOT_HANDLE", bdsg::SnarlDistanceIndex::ROOT_HANDLE) - .value("NODE_HANDLE", bdsg::SnarlDistanceIndex::NODE_HANDLE) - .value("SNARL_HANDLE", bdsg::SnarlDistanceIndex::SNARL_HANDLE) - .value("CHAIN_HANDLE", bdsg::SnarlDistanceIndex::CHAIN_HANDLE) - .value("SENTINEL_HANDLE", bdsg::SnarlDistanceIndex::SENTINEL_HANDLE) - .export_values(); - - - pybind11::enum_(cl, "record_t", pybind11::arithmetic(), "A record_t is the type of structure that a record can be.\n The actual distance index is stored as a series of \"records\" for each snarl/node/chain. \n The record type defines what is stored in a record\n\nNODE, SNARL, and CHAIN indicate that they don't store distances.\nSIMPLE_SNARL is a snarl with all children connecting only to the boundary nodes in one direction (ie, a bubble).\nTRIVIAL_SNARL represents consecutive nodes in a chain. \nNODE represents a node that is a trivial chain. A node can only be the child of a snarl.\nOVERSIZED_SNARL only stores distances to the boundaries.\nROOT_SNARL represents a connected component of the root. It has no start or end node so \n its children technically belong to the root.\nMULTICOMPONENT_CHAIN can represent a chain with snarls that are not start-end connected.\n The chain is split up into components between these snarls, each node is tagged with\n which component it belongs to.") - .value("ROOT", bdsg::SnarlDistanceIndex::ROOT) - .value("NODE", bdsg::SnarlDistanceIndex::NODE) - .value("DISTANCED_NODE", bdsg::SnarlDistanceIndex::DISTANCED_NODE) - .value("TRIVIAL_SNARL", bdsg::SnarlDistanceIndex::TRIVIAL_SNARL) - .value("DISTANCED_TRIVIAL_SNARL", bdsg::SnarlDistanceIndex::DISTANCED_TRIVIAL_SNARL) - .value("SIMPLE_SNARL", bdsg::SnarlDistanceIndex::SIMPLE_SNARL) - .value("DISTANCED_SIMPLE_SNARL", bdsg::SnarlDistanceIndex::DISTANCED_SIMPLE_SNARL) - .value("SNARL", bdsg::SnarlDistanceIndex::SNARL) - .value("DISTANCED_SNARL", bdsg::SnarlDistanceIndex::DISTANCED_SNARL) - .value("OVERSIZED_SNARL", bdsg::SnarlDistanceIndex::OVERSIZED_SNARL) - .value("ROOT_SNARL", bdsg::SnarlDistanceIndex::ROOT_SNARL) - .value("DISTANCED_ROOT_SNARL", bdsg::SnarlDistanceIndex::DISTANCED_ROOT_SNARL) - .value("CHAIN", bdsg::SnarlDistanceIndex::CHAIN) - .value("DISTANCED_CHAIN", bdsg::SnarlDistanceIndex::DISTANCED_CHAIN) - .value("MULTICOMPONENT_CHAIN", bdsg::SnarlDistanceIndex::MULTICOMPONENT_CHAIN) - .value("CHILDREN", bdsg::SnarlDistanceIndex::CHILDREN) - .export_values(); +void bind_bdsg_snarl_distance_index( + std::function &M) { + { // bdsg::SnarlDistanceIndex file:bdsg/snarl_distance_index.hpp line:181 + pybind11::class_< + bdsg::SnarlDistanceIndex, std::shared_ptr, + PyCallBack_bdsg_SnarlDistanceIndex, handlegraph::SnarlDecomposition, + handlegraph::TriviallySerializable> + cl(M("bdsg"), "SnarlDistanceIndex", + "The distance index, which also acts as a snarl decomposition.\n\n " + "The distance index provides an interface to traverse the snarl " + "tree and to\n find minimum distances between two sibling nodes in " + "the snarl tree (eg\n between two chains that are children of the " + "same snarl).\n\n It also provides a method for quickly calculating " + "the minimum distance\n between two positions on the graph.\n\n The " + "implementation here is tightly coupled with the filling-in code in " + "vg\n (see vg::fill_in_distance_index()). To make a " + "SnarlDistanceIndex that\n actually works, you have to construct " + "the object, and then call\n get_snarl_tree_records() with zero or " + "more TemporaryDistanceIndex objects\n for connected components, " + "and a graph.\n\n The TemporaryDistanceIndex needs to have a " + "variety of TemporaryRecord\n implementation classes " + "(TemporaryChainRecord, TemporarySnarlRecord,\n " + "TemporaryNodeRecord) set up and added to it; this all has to be " + "done \"by\n hand\", as it were, because no code is in this library " + "to help you do it.\n\n "); + cl.def(pybind11::init( + []() { return new bdsg::SnarlDistanceIndex(); }, + []() { return new PyCallBack_bdsg_SnarlDistanceIndex(); })); + pybind11::enum_( + cl, "connectivity_t", pybind11::arithmetic(), + "The connectivity of a net_handle- this defines the direction that the " + "net_handle is traversed") + .value("START_START", bdsg::SnarlDistanceIndex::START_START) + .value("START_END", bdsg::SnarlDistanceIndex::START_END) + .value("START_TIP", bdsg::SnarlDistanceIndex::START_TIP) + .value("END_START", bdsg::SnarlDistanceIndex::END_START) + .value("END_END", bdsg::SnarlDistanceIndex::END_END) + .value("END_TIP", bdsg::SnarlDistanceIndex::END_TIP) + .value("TIP_START", bdsg::SnarlDistanceIndex::TIP_START) + .value("TIP_END", bdsg::SnarlDistanceIndex::TIP_END) + .value("TIP_TIP", bdsg::SnarlDistanceIndex::TIP_TIP) + .export_values(); - pybind11::enum_(cl, "temp_record_t", pybind11::arithmetic(), "") - .value("TEMP_CHAIN", bdsg::SnarlDistanceIndex::TEMP_CHAIN) - .value("TEMP_SNARL", bdsg::SnarlDistanceIndex::TEMP_SNARL) - .value("TEMP_NODE", bdsg::SnarlDistanceIndex::TEMP_NODE) - .value("TEMP_ROOT", bdsg::SnarlDistanceIndex::TEMP_ROOT) - .export_values(); + pybind11::enum_( + cl, "net_handle_record_t", pybind11::arithmetic(), + "Type of a net_handle_t, which may not be the type of the record\nThis " + "is to allow a node record to be seen as a chain from the perspective " + "of a handle") + .value("ROOT_HANDLE", bdsg::SnarlDistanceIndex::ROOT_HANDLE) + .value("NODE_HANDLE", bdsg::SnarlDistanceIndex::NODE_HANDLE) + .value("SNARL_HANDLE", bdsg::SnarlDistanceIndex::SNARL_HANDLE) + .value("CHAIN_HANDLE", bdsg::SnarlDistanceIndex::CHAIN_HANDLE) + .value("SENTINEL_HANDLE", bdsg::SnarlDistanceIndex::SENTINEL_HANDLE) + .export_values(); - cl.def("serialize", [](bdsg::SnarlDistanceIndex &o, const std::string & a0) -> void { return o.serialize(a0); }, "", pybind11::arg("filename")); - cl.def("deserialize", [](bdsg::SnarlDistanceIndex &o, const std::string & a0) -> void { return o.deserialize(a0); }, "", pybind11::arg("filename")); - cl.def("dissociate", (void (bdsg::SnarlDistanceIndex::*)()) &bdsg::SnarlDistanceIndex::dissociate, "C++: bdsg::SnarlDistanceIndex::dissociate() --> void"); - cl.def("serialize", (void (bdsg::SnarlDistanceIndex::*)(const class std::function &) const) &bdsg::SnarlDistanceIndex::serialize, "C++: bdsg::SnarlDistanceIndex::serialize(const class std::function &) const --> void", pybind11::arg("iteratee")); - cl.def("serialize", (void (bdsg::SnarlDistanceIndex::*)(int)) &bdsg::SnarlDistanceIndex::serialize, "C++: bdsg::SnarlDistanceIndex::serialize(int) --> void", pybind11::arg("fd")); - cl.def("deserialize", (void (bdsg::SnarlDistanceIndex::*)(int)) &bdsg::SnarlDistanceIndex::deserialize, "C++: bdsg::SnarlDistanceIndex::deserialize(int) --> void", pybind11::arg("fd")); - cl.def("get_magic_number", (unsigned int (bdsg::SnarlDistanceIndex::*)() const) &bdsg::SnarlDistanceIndex::get_magic_number, "C++: bdsg::SnarlDistanceIndex::get_magic_number() const --> unsigned int"); - cl.def("get_prefix", (std::string (bdsg::SnarlDistanceIndex::*)() const) &bdsg::SnarlDistanceIndex::get_prefix, "C++: bdsg::SnarlDistanceIndex::get_prefix() const --> std::string"); - cl.def("preload", [](bdsg::SnarlDistanceIndex const &o) -> void { return o.preload(); }, ""); - cl.def("preload", (void (bdsg::SnarlDistanceIndex::*)(bool) const) &bdsg::SnarlDistanceIndex::preload, "Allow for preloading the index for more accurate timing of algorithms\n that use it, if it fits in memory. If blocking is true, waits for the\n index to be paged in. Otherwise, just tells the OS that we will want to\n use it.\n\nC++: bdsg::SnarlDistanceIndex::preload(bool) const --> void", pybind11::arg("blocking")); - cl.def("maximum_distance", [](bdsg::SnarlDistanceIndex const &o, const long long & a0, const bool & a1, const unsigned long & a2, const long long & a3, const bool & a4, const unsigned long & a5) -> unsigned long { return o.maximum_distance(a0, a1, a2, a3, a4, a5); }, "", pybind11::arg("id1"), pybind11::arg("rev1"), pybind11::arg("offset1"), pybind11::arg("id2"), pybind11::arg("rev2"), pybind11::arg("offset2")); - cl.def("maximum_distance", [](bdsg::SnarlDistanceIndex const &o, const long long & a0, const bool & a1, const unsigned long & a2, const long long & a3, const bool & a4, const unsigned long & a5, bool const & a6) -> unsigned long { return o.maximum_distance(a0, a1, a2, a3, a4, a5, a6); }, "", pybind11::arg("id1"), pybind11::arg("rev1"), pybind11::arg("offset1"), pybind11::arg("id2"), pybind11::arg("rev2"), pybind11::arg("offset2"), pybind11::arg("unoriented_distance")); - cl.def("maximum_distance", (unsigned long (bdsg::SnarlDistanceIndex::*)(const long long, const bool, const unsigned long, const long long, const bool, const unsigned long, bool, const class handlegraph::HandleGraph *) const) &bdsg::SnarlDistanceIndex::maximum_distance, "Find an approximation of the maximum distance between two positions. \nThis isn't a true maximum- the only guarantee is that it's greater than or equal to the minimum distance.\n\nC++: bdsg::SnarlDistanceIndex::maximum_distance(const long long, const bool, const unsigned long, const long long, const bool, const unsigned long, bool, const class handlegraph::HandleGraph *) const --> unsigned long", pybind11::arg("id1"), pybind11::arg("rev1"), pybind11::arg("offset1"), pybind11::arg("id2"), pybind11::arg("rev2"), pybind11::arg("offset2"), pybind11::arg("unoriented_distance"), pybind11::arg("graph")); - cl.def("distance_in_parent", [](bdsg::SnarlDistanceIndex const &o, const struct handlegraph::net_handle_t & a0, const struct handlegraph::net_handle_t & a1, const struct handlegraph::net_handle_t & a2) -> unsigned long { return o.distance_in_parent(a0, a1, a2); }, "", pybind11::arg("parent"), pybind11::arg("child1"), pybind11::arg("child2")); - cl.def("distance_in_parent", [](bdsg::SnarlDistanceIndex const &o, const struct handlegraph::net_handle_t & a0, const struct handlegraph::net_handle_t & a1, const struct handlegraph::net_handle_t & a2, const class handlegraph::HandleGraph * a3) -> unsigned long { return o.distance_in_parent(a0, a1, a2, a3); }, "", pybind11::arg("parent"), pybind11::arg("child1"), pybind11::arg("child2"), pybind11::arg("graph")); - cl.def("distance_in_parent", (unsigned long (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &, const struct handlegraph::net_handle_t &, const struct handlegraph::net_handle_t &, const class handlegraph::HandleGraph *, unsigned long) const) &bdsg::SnarlDistanceIndex::distance_in_parent, "C++: bdsg::SnarlDistanceIndex::distance_in_parent(const struct handlegraph::net_handle_t &, const struct handlegraph::net_handle_t &, const struct handlegraph::net_handle_t &, const class handlegraph::HandleGraph *, unsigned long) const --> unsigned long", pybind11::arg("parent"), pybind11::arg("child1"), pybind11::arg("child2"), pybind11::arg("graph"), pybind11::arg("distance_limit")); - cl.def("distance_in_snarl", [](bdsg::SnarlDistanceIndex const &o, const struct handlegraph::net_handle_t & a0, const unsigned long & a1, const bool & a2, const unsigned long & a3, const bool & a4) -> unsigned long { return o.distance_in_snarl(a0, a1, a2, a3, a4); }, "", pybind11::arg("parent"), pybind11::arg("rank1"), pybind11::arg("right_side1"), pybind11::arg("rank2"), pybind11::arg("right_side2")); - cl.def("distance_in_snarl", [](bdsg::SnarlDistanceIndex const &o, const struct handlegraph::net_handle_t & a0, const unsigned long & a1, const bool & a2, const unsigned long & a3, const bool & a4, const class handlegraph::HandleGraph * a5) -> unsigned long { return o.distance_in_snarl(a0, a1, a2, a3, a4, a5); }, "", pybind11::arg("parent"), pybind11::arg("rank1"), pybind11::arg("right_side1"), pybind11::arg("rank2"), pybind11::arg("right_side2"), pybind11::arg("graph")); - cl.def("distance_in_snarl", (unsigned long (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &, const unsigned long &, const bool &, const unsigned long &, const bool &, const class handlegraph::HandleGraph *, unsigned long) const) &bdsg::SnarlDistanceIndex::distance_in_snarl, "C++: bdsg::SnarlDistanceIndex::distance_in_snarl(const struct handlegraph::net_handle_t &, const unsigned long &, const bool &, const unsigned long &, const bool &, const class handlegraph::HandleGraph *, unsigned long) const --> unsigned long", pybind11::arg("parent"), pybind11::arg("rank1"), pybind11::arg("right_side1"), pybind11::arg("rank2"), pybind11::arg("right_side2"), pybind11::arg("graph"), pybind11::arg("distance_limit")); - cl.def("max_distance_in_parent", [](bdsg::SnarlDistanceIndex const &o, const struct handlegraph::net_handle_t & a0, const struct handlegraph::net_handle_t & a1, const struct handlegraph::net_handle_t & a2) -> unsigned long { return o.max_distance_in_parent(a0, a1, a2); }, "", pybind11::arg("parent"), pybind11::arg("child1"), pybind11::arg("child2")); - cl.def("max_distance_in_parent", [](bdsg::SnarlDistanceIndex const &o, const struct handlegraph::net_handle_t & a0, const struct handlegraph::net_handle_t & a1, const struct handlegraph::net_handle_t & a2, const class handlegraph::HandleGraph * a3) -> unsigned long { return o.max_distance_in_parent(a0, a1, a2, a3); }, "", pybind11::arg("parent"), pybind11::arg("child1"), pybind11::arg("child2"), pybind11::arg("graph")); - cl.def("max_distance_in_parent", (unsigned long (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &, const struct handlegraph::net_handle_t &, const struct handlegraph::net_handle_t &, const class handlegraph::HandleGraph *, unsigned long) const) &bdsg::SnarlDistanceIndex::max_distance_in_parent, "Find the maximum distance between two children in the parent. \nThis is the same as distance_in_parent for everything except children of chains\n\nC++: bdsg::SnarlDistanceIndex::max_distance_in_parent(const struct handlegraph::net_handle_t &, const struct handlegraph::net_handle_t &, const struct handlegraph::net_handle_t &, const class handlegraph::HandleGraph *, unsigned long) const --> unsigned long", pybind11::arg("parent"), pybind11::arg("child1"), pybind11::arg("child2"), pybind11::arg("graph"), pybind11::arg("distance_limit")); - cl.def("distance_to_parent_bound", [](bdsg::SnarlDistanceIndex const &o, const struct handlegraph::net_handle_t & a0, bool const & a1, struct handlegraph::net_handle_t const & a2) -> unsigned long { return o.distance_to_parent_bound(a0, a1, a2); }, "", pybind11::arg("parent"), pybind11::arg("to_start"), pybind11::arg("child")); - cl.def("distance_to_parent_bound", (unsigned long (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &, bool, struct handlegraph::net_handle_t, class std::tuple) const) &bdsg::SnarlDistanceIndex::distance_to_parent_bound, "Get the distance from the child to the start or end bound of the parent.\nparent_and_child_types are hints to figure out the type of snarl/chain records the parent and child are.\ntuple of parent record type, parent handle type, child record type, child handle type.\nThis is really just used to see if the parent and child are trivial chains, so it might not be exactly what the actual record is.\n\nC++: bdsg::SnarlDistanceIndex::distance_to_parent_bound(const struct handlegraph::net_handle_t &, bool, struct handlegraph::net_handle_t, class std::tuple) const --> unsigned long", pybind11::arg("parent"), pybind11::arg("to_start"), pybind11::arg("child"), pybind11::arg("parent_and_child_types")); - cl.def("into_which_snarl", (class std::tuple (bdsg::SnarlDistanceIndex::*)(const long long &, const bool &) const) &bdsg::SnarlDistanceIndex::into_which_snarl, "If this node id and orientation is pointing into a snarl, then return the start.\nnode id and orientation pointing into the snarl, and if the snarl is trivial.\nReturns <0, false, false> if it doesn't point into a snarl.\n\nC++: bdsg::SnarlDistanceIndex::into_which_snarl(const long long &, const bool &) const --> class std::tuple", pybind11::arg("id"), pybind11::arg("reverse")); - cl.def("is_ordered_in_chain", (bool (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &, const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::is_ordered_in_chain, "Return true if child1 comes before child2 in the chain. \n\nC++: bdsg::SnarlDistanceIndex::is_ordered_in_chain(const struct handlegraph::net_handle_t &, const struct handlegraph::net_handle_t &) const --> bool", pybind11::arg("child1"), pybind11::arg("child2")); - cl.def("is_externally_start_end_connected", (bool (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t) const) &bdsg::SnarlDistanceIndex::is_externally_start_end_connected, "C++: bdsg::SnarlDistanceIndex::is_externally_start_end_connected(const struct handlegraph::net_handle_t) const --> bool", pybind11::arg("net")); - cl.def("is_externally_start_start_connected", (bool (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t) const) &bdsg::SnarlDistanceIndex::is_externally_start_start_connected, "C++: bdsg::SnarlDistanceIndex::is_externally_start_start_connected(const struct handlegraph::net_handle_t) const --> bool", pybind11::arg("net")); - cl.def("is_externally_end_end_connected", (bool (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t) const) &bdsg::SnarlDistanceIndex::is_externally_end_end_connected, "C++: bdsg::SnarlDistanceIndex::is_externally_end_end_connected(const struct handlegraph::net_handle_t) const --> bool", pybind11::arg("net")); - cl.def("lowest_common_ancestor", (struct std::pair (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &, const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::lowest_common_ancestor, "For two net handles, get a net handle lowest common ancestor.\nIf the lowest common ancestor is the root, then the two handles may be in\ndifferent connected components. In this case, return false.\n\nC++: bdsg::SnarlDistanceIndex::lowest_common_ancestor(const struct handlegraph::net_handle_t &, const struct handlegraph::net_handle_t &) const --> struct std::pair", pybind11::arg("net1"), pybind11::arg("net2")); - cl.def("node_length", (unsigned long (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::node_length, "Return the length of the net, which must represent a node (or sentinel of a snarl)\n\nC++: bdsg::SnarlDistanceIndex::node_length(const struct handlegraph::net_handle_t &) const --> unsigned long", pybind11::arg("net")); - cl.def("minimum_length", (unsigned long (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::minimum_length, "This is also the length of a net, but it can also be a snarl or chain. \nThe length of a chain includes the boundary nodes, a snarl does not.\nA looping chain only includes the start/end node once\n\nC++: bdsg::SnarlDistanceIndex::minimum_length(const struct handlegraph::net_handle_t &) const --> unsigned long", pybind11::arg("net")); - cl.def("maximum_length", (unsigned long (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::maximum_length, "C++: bdsg::SnarlDistanceIndex::maximum_length(const struct handlegraph::net_handle_t &) const --> unsigned long", pybind11::arg("net")); - cl.def("chain_minimum_length", (unsigned long (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::chain_minimum_length, "The length of a chain. If it is a multicomponent chain, then the length of \nthe last component, which is used for calculating distance, instead of inf \n\nC++: bdsg::SnarlDistanceIndex::chain_minimum_length(const struct handlegraph::net_handle_t &) const --> unsigned long", pybind11::arg("net")); - cl.def("node_id", (long long (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::node_id, "What is the node id of the node represented by this net handle.\nnet must be a node or a sentinel\n\nC++: bdsg::SnarlDistanceIndex::node_id(const struct handlegraph::net_handle_t &) const --> long long", pybind11::arg("net")); - cl.def("has_node", (bool (bdsg::SnarlDistanceIndex::*)(const long long) const) &bdsg::SnarlDistanceIndex::has_node, "Does the graph have this node?\n\nC++: bdsg::SnarlDistanceIndex::has_node(const long long) const --> bool", pybind11::arg("id")); - cl.def("is_reversed_in_parent", (bool (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::is_reversed_in_parent, "Only really relevant for nodes in chains, is the node\ntraversed backwards relative to the orientation of the chain\n\nC++: bdsg::SnarlDistanceIndex::is_reversed_in_parent(const struct handlegraph::net_handle_t &) const --> bool", pybind11::arg("net")); - cl.def("get_node_net_handle", [](bdsg::SnarlDistanceIndex const &o, const long long & a0) -> handlegraph::net_handle_t { return o.get_node_net_handle(a0); }, "", pybind11::arg("id")); - cl.def("get_node_net_handle", (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)(const long long, bool) const) &bdsg::SnarlDistanceIndex::get_node_net_handle, "Get a net handle from a node and, optionally, an orientation\n\nC++: bdsg::SnarlDistanceIndex::get_node_net_handle(const long long, bool) const --> struct handlegraph::net_handle_t", pybind11::arg("id"), pybind11::arg("rev")); - cl.def("get_max_tree_depth", (unsigned long (bdsg::SnarlDistanceIndex::*)() const) &bdsg::SnarlDistanceIndex::get_max_tree_depth, "How deep is the snarl tree? The root is 0, top-level chain is 1, etc\nOnly counts chains\n\nC++: bdsg::SnarlDistanceIndex::get_max_tree_depth() const --> unsigned long"); - cl.def("get_depth", (unsigned long (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::get_depth, "What is the depth of this net handle? Nodes and snarls get the depth of their parent.\nThe depth of the root is 0, the depth of its child chains is 1, the depth of the nodes and snarls that are \nchildren of those chains is also 1, and the chains that are children of those snarls have depth 2\n\nC++: bdsg::SnarlDistanceIndex::get_depth(const struct handlegraph::net_handle_t &) const --> unsigned long", pybind11::arg("net")); - cl.def("get_connected_component_number", (unsigned long (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::get_connected_component_number, "C++: bdsg::SnarlDistanceIndex::get_connected_component_number(const struct handlegraph::net_handle_t &) const --> unsigned long", pybind11::arg("net")); - cl.def("get_handle_from_connected_component", (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)(unsigned long) const) &bdsg::SnarlDistanceIndex::get_handle_from_connected_component, "Given the connected component number (from get_connected_component_number), get the\nroot-level handle pointing to it.\nIf the connected component is a root-level snarl, then this may return a \"root\" handle,\nbut it will actually point to the snarl\n\nC++: bdsg::SnarlDistanceIndex::get_handle_from_connected_component(unsigned long) const --> struct handlegraph::net_handle_t", pybind11::arg("num")); - cl.def("has_connectivity", (bool (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &, enum handlegraph::SnarlDecomposition::endpoint_t, enum handlegraph::SnarlDecomposition::endpoint_t) const) &bdsg::SnarlDistanceIndex::has_connectivity, "Is there a path between the start and end endpoints within the net handle?\n\nC++: bdsg::SnarlDistanceIndex::has_connectivity(const struct handlegraph::net_handle_t &, enum handlegraph::SnarlDecomposition::endpoint_t, enum handlegraph::SnarlDecomposition::endpoint_t) const --> bool", pybind11::arg("net"), pybind11::arg("start"), pybind11::arg("end")); - cl.def("has_external_connectivity", (bool (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &, enum handlegraph::SnarlDecomposition::endpoint_t, enum handlegraph::SnarlDecomposition::endpoint_t) const) &bdsg::SnarlDistanceIndex::has_external_connectivity, "Is there a path between the start and end endpoints outside the net handle?\nThis is used for children of the root\n\nC++: bdsg::SnarlDistanceIndex::has_external_connectivity(const struct handlegraph::net_handle_t &, enum handlegraph::SnarlDecomposition::endpoint_t, enum handlegraph::SnarlDecomposition::endpoint_t) const --> bool", pybind11::arg("net"), pybind11::arg("start"), pybind11::arg("end")); - cl.def("get_prefix_sum_value", (unsigned long (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::get_prefix_sum_value, "Get the prefix sum value for a node in a chain.\nFails if the parent of net is not a chain\n\nC++: bdsg::SnarlDistanceIndex::get_prefix_sum_value(const struct handlegraph::net_handle_t &) const --> unsigned long", pybind11::arg("net")); - cl.def("get_max_prefix_sum_value", (unsigned long (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::get_max_prefix_sum_value, "Get the maximum prefix sum value for a node in a chain.\nFails if the parent of net is not a chain\n\nC++: bdsg::SnarlDistanceIndex::get_max_prefix_sum_value(const struct handlegraph::net_handle_t &) const --> unsigned long", pybind11::arg("net")); - cl.def("get_forward_loop_value", (unsigned long (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::get_forward_loop_value, "Get the forward loop value for a node in a chain.\nFails if the parent of net is not a chain\n\nC++: bdsg::SnarlDistanceIndex::get_forward_loop_value(const struct handlegraph::net_handle_t &) const --> unsigned long", pybind11::arg("net")); - cl.def("get_reverse_loop_value", (unsigned long (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::get_reverse_loop_value, "Get the reverse value for a node in a chain.\nFails if the parent of net is not a chain\n\nC++: bdsg::SnarlDistanceIndex::get_reverse_loop_value(const struct handlegraph::net_handle_t &) const --> unsigned long", pybind11::arg("net")); - cl.def("get_chain_component", [](bdsg::SnarlDistanceIndex const &o, const struct handlegraph::net_handle_t & a0) -> unsigned long { return o.get_chain_component(a0); }, "", pybind11::arg("net")); - cl.def("get_chain_component", (unsigned long (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &, bool) const) &bdsg::SnarlDistanceIndex::get_chain_component, "C++: bdsg::SnarlDistanceIndex::get_chain_component(const struct handlegraph::net_handle_t &, bool) const --> unsigned long", pybind11::arg("net"), pybind11::arg("get_end")); - cl.def("get_root", (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)() const) &bdsg::SnarlDistanceIndex::get_root, "Get a net handle referring to a tip-to-tip traversal of the contents of the root snarl.\n\nC++: bdsg::SnarlDistanceIndex::get_root() const --> struct handlegraph::net_handle_t"); - cl.def("is_root", (bool (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::is_root, "Return true if the given handle refers to (a traversal of) the root\nsnarl, and false otherwise.\n\nC++: bdsg::SnarlDistanceIndex::is_root(const struct handlegraph::net_handle_t &) const --> bool", pybind11::arg("net")); - cl.def("is_root_snarl", (bool (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::is_root_snarl, "Return true if the given handle refers to (a traversal of) a snarl of the root,\nwhich is considered to be the root but actually refers to a subset of the children \nof the root that are connected\n\nC++: bdsg::SnarlDistanceIndex::is_root_snarl(const struct handlegraph::net_handle_t &) const --> bool", pybind11::arg("net")); - cl.def("is_snarl", (bool (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::is_snarl, "Returns true if the given net handle refers to (a traversal of) a snarl.\n\nC++: bdsg::SnarlDistanceIndex::is_snarl(const struct handlegraph::net_handle_t &) const --> bool", pybind11::arg("net")); - cl.def("is_dag", (bool (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::is_dag, "Return true if the given snarl is a DAG and false otherwise\nReturns true if the given net_handle_t is not a snarl\n\nC++: bdsg::SnarlDistanceIndex::is_dag(const struct handlegraph::net_handle_t &) const --> bool", pybind11::arg("snarl")); - cl.def("is_simple_snarl", (bool (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::is_simple_snarl, "Returns true if the given net handle refers to (a traversal of) a simple snarl\nA simple snarl is a bubble where each child node can only reach the boundary nodes,\nand each side of a node reaches a different boundary node\nThere may also be an edge connecting the two boundary nodes but no additional \nedges are allowed\n\nC++: bdsg::SnarlDistanceIndex::is_simple_snarl(const struct handlegraph::net_handle_t &) const --> bool", pybind11::arg("net")); - cl.def("is_regular_snarl", (bool (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::is_regular_snarl, "Returns true if the given net handle refers to (a traversal of) a regular snarl\nA regular snarl is the same as a simple snarl, except that the children may be\nnested chains, rather than being restricted to nodes \n\nC++: bdsg::SnarlDistanceIndex::is_regular_snarl(const struct handlegraph::net_handle_t &) const --> bool", pybind11::arg("net")); - cl.def("is_chain", (bool (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::is_chain, "Returns true if the given net handle refers to (a traversal of) a chain.\n\nC++: bdsg::SnarlDistanceIndex::is_chain(const struct handlegraph::net_handle_t &) const --> bool", pybind11::arg("net")); - cl.def("is_multicomponent_chain", (bool (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::is_multicomponent_chain, "Returns true if the given net handle refers to (a traversal of) a chain that is not start-end connected\n\nC++: bdsg::SnarlDistanceIndex::is_multicomponent_chain(const struct handlegraph::net_handle_t &) const --> bool", pybind11::arg("net")); - cl.def("is_looping_chain", (bool (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::is_looping_chain, "Returns true if the given net handle refers to (a traversal of) a chain that loops (a chain where the first and last node are the same).\n\nC++: bdsg::SnarlDistanceIndex::is_looping_chain(const struct handlegraph::net_handle_t &) const --> bool", pybind11::arg("net")); - cl.def("is_trivial_chain", (bool (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::is_trivial_chain, "Returns true if the given net handle refers to (a traversal of) a trivial chain that represents a single node.\n\nC++: bdsg::SnarlDistanceIndex::is_trivial_chain(const struct handlegraph::net_handle_t &) const --> bool", pybind11::arg("net")); - cl.def("is_node", (bool (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::is_node, "Returns true if the given net handle refers to (a traversal of) a single node, and thus has a corresponding handle_t.\n\nC++: bdsg::SnarlDistanceIndex::is_node(const struct handlegraph::net_handle_t &) const --> bool", pybind11::arg("net")); - cl.def("is_sentinel", (bool (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::is_sentinel, "Return true if the given net handle is a snarl bound sentinel (in either\ninward or outward orientation), and false otherwise.\n\nC++: bdsg::SnarlDistanceIndex::is_sentinel(const struct handlegraph::net_handle_t &) const --> bool", pybind11::arg("net")); - cl.def("get_net", (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::handle_t &, const class handlegraph::HandleGraph *) const) &bdsg::SnarlDistanceIndex::get_net, "Turn a handle to an oriented node into a net handle for a start-to-end or end-to-start traversal of the node, as appropriate.\n\nC++: bdsg::SnarlDistanceIndex::get_net(const struct handlegraph::handle_t &, const class handlegraph::HandleGraph *) const --> struct handlegraph::net_handle_t", pybind11::arg("handle"), pybind11::arg("graph")); - cl.def("get_handle", (struct handlegraph::handle_t (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &, const class handlegraph::HandleGraph *) const) &bdsg::SnarlDistanceIndex::get_handle, "For a net handle to a traversal of a single node, get the handle for that node in the orientation it is traversed.\nMay not be called for other net handles.\n\nC++: bdsg::SnarlDistanceIndex::get_handle(const struct handlegraph::net_handle_t &, const class handlegraph::HandleGraph *) const --> struct handlegraph::handle_t", pybind11::arg("net"), pybind11::arg("graph")); - cl.def("get_parent", (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::get_parent, "Get the parent snarl of a chain, or the parent chain of a snarl or node.\nIf the child is start-to-end or end-to-start, and the parent is a chain,\nthe chain comes out facing the same way, accounting for the relative\norientation of the child snarl or node in the chain. Otherwise,\neverything is produced as start-to-end, even if that is not actually a\nrealizable traversal of a snarl or chain. May not be called on the root\nsnarl.\n\nAlso works on snarl boundary sentinels.\n\nC++: bdsg::SnarlDistanceIndex::get_parent(const struct handlegraph::net_handle_t &) const --> struct handlegraph::net_handle_t", pybind11::arg("child")); - cl.def("get_bound", (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &, bool, bool) const) &bdsg::SnarlDistanceIndex::get_bound, "Get the bounding handle for the snarl or chain referenced by the given\nnet handle, getting the start or end facing in or out as appropriate.\n\nFor snarls, returns the bounding sentinel net handles. For chains,\nreturns net handles for traversals of the bounding nodes of the chain.\nIf the chain is a looping chain, then the start and end of the chain\nare the same, so the connectivity of the bound indicates which we're\nlooking at; the connectivity will be start-start if it is going \nbackwards in the node, and end-end if it is going forwards.\n\nIgnores traversal type.\n\nMay not be called on traversals of individual nodes.\n\nC++: bdsg::SnarlDistanceIndex::get_bound(const struct handlegraph::net_handle_t &, bool, bool) const --> struct handlegraph::net_handle_t", pybind11::arg("snarl"), pybind11::arg("get_end"), pybind11::arg("face_in")); - cl.def("get_node_from_sentinel", (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::get_node_from_sentinel, "Given the sentinel of a snarl, return a handle to the node representing it\n\nC++: bdsg::SnarlDistanceIndex::get_node_from_sentinel(const struct handlegraph::net_handle_t &) const --> struct handlegraph::net_handle_t", pybind11::arg("sentinel")); - cl.def("flip", (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::flip, "Return a net handle to the same snarl/chain/node in the opposite orientation.\nNo effect on tip-to-tip, start-to-start, or end-to-end net handles. Flips all the others.\n\nC++: bdsg::SnarlDistanceIndex::flip(const struct handlegraph::net_handle_t &) const --> struct handlegraph::net_handle_t", pybind11::arg("net")); - cl.def("canonical", (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::canonical, "Get a canonical traversal handle from any net handle. All handles to the\nsame net graph element have the same canonical traversal. That canonical\ntraversal must be realizable, and might not always be start-to-end or\neven consistently be the same kind of traversal for different snarls,\nchains, or nodes. Mostly useful to normalize for equality comparisons.\n\nAny root snarl will become just the root\nAnything without connectivity will get START_END\n\nC++: bdsg::SnarlDistanceIndex::canonical(const struct handlegraph::net_handle_t &) const --> struct handlegraph::net_handle_t", pybind11::arg("net")); - cl.def("start_end_traversal_of", (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::start_end_traversal_of, "Makes a start-end traversal of the net.\nFaster than canonical because it doesn't check the index for anything \n\nC++: bdsg::SnarlDistanceIndex::start_end_traversal_of(const struct handlegraph::net_handle_t &) const --> struct handlegraph::net_handle_t", pybind11::arg("net")); - cl.def("starts_at", (enum handlegraph::SnarlDecomposition::endpoint_t (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::starts_at, "Return the kind of location at which the given traversal starts.\n\nC++: bdsg::SnarlDistanceIndex::starts_at(const struct handlegraph::net_handle_t &) const --> enum handlegraph::SnarlDecomposition::endpoint_t", pybind11::arg("traversal")); - cl.def("ends_at", (enum handlegraph::SnarlDecomposition::endpoint_t (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::ends_at, "Return the kind of location at which the given traversal ends.\n\nC++: bdsg::SnarlDistanceIndex::ends_at(const struct handlegraph::net_handle_t &) const --> enum handlegraph::SnarlDecomposition::endpoint_t", pybind11::arg("traversal")); - cl.def("get_rank_in_parent", (unsigned long (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::get_rank_in_parent, "For a child of a snarl, the rank is used to calculate the distance\n\nC++: bdsg::SnarlDistanceIndex::get_rank_in_parent(const struct handlegraph::net_handle_t &) const --> unsigned long", pybind11::arg("net")); - cl.def("connected_component_count", (unsigned long (bdsg::SnarlDistanceIndex::*)() const) &bdsg::SnarlDistanceIndex::connected_component_count, "How many connected components are in this graph?\nThis returns the number of topological connected components, not necessarily the \nnumber of nodes in the top-level snarl \n\nC++: bdsg::SnarlDistanceIndex::connected_component_count() const --> unsigned long"); - cl.def("get_snarl_child_from_rank", (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &, const unsigned long &) const) &bdsg::SnarlDistanceIndex::get_snarl_child_from_rank, "Get the child of a snarl from its rank. This shouldn't be exposed to the public interface but I need it\nPlease don't use it\nFor 0 or 1, returns the sentinel facing in. Otherwise return the child as a chain going START_END\n\nC++: bdsg::SnarlDistanceIndex::get_snarl_child_from_rank(const struct handlegraph::net_handle_t &, const unsigned long &) const --> struct handlegraph::net_handle_t", pybind11::arg("snarl"), pybind11::arg("rank")); - cl.def("get_parent_traversal", (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &, const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::get_parent_traversal, "Get a net handle for traversals of a snarl or chain that contains\nthe given oriented bounding node traversals or sentinels. Given two\nsentinels for a snarl, produces a net handle to a start-to-end,\nend-to-end, end-to-start, or start-to-start traversal of that snarl.\nGiven handles to traversals of the bounding nodes of a chain, similarly\nproduces a net handle to a traversal of the chain.\n\nFor a chain, either or both handles can also be a snarl containing tips,\nfor a tip-to-start, tip-to-end, start-to-tip, end-to-tip, or tip-to-tip\ntraversal. Similarly, for a snarl, either or both handles can be a chain\nin the snarl that contains internal tips, or that has no edges on the\nappropriate end.\n\nMay only be called if a path actually exists between the given start\nand end.\n\nC++: bdsg::SnarlDistanceIndex::get_parent_traversal(const struct handlegraph::net_handle_t &, const struct handlegraph::net_handle_t &) const --> struct handlegraph::net_handle_t", pybind11::arg("traversal_start"), pybind11::arg("traversal_end")); - cl.def_static("has_distances", (const bool (*)(enum bdsg::SnarlDistanceIndex::record_t)) &bdsg::SnarlDistanceIndex::has_distances, "C++: bdsg::SnarlDistanceIndex::has_distances(enum bdsg::SnarlDistanceIndex::record_t) --> const bool", pybind11::arg("type")); - cl.def_static("get_record_handle_type", (const enum bdsg::SnarlDistanceIndex::net_handle_record_t (*)(enum bdsg::SnarlDistanceIndex::record_t)) &bdsg::SnarlDistanceIndex::get_record_handle_type, "Given the type of the record, return the handle type. Some record types can represent multiple things,\nfor example a simple snarl record is used to represent a snarl, and the nodes/trivial chains in it.\nThis will return whatever is higher on the snarl tree. A simple snarl will be considered a snarl,\na root snarl will be considered a root, etc\n\nC++: bdsg::SnarlDistanceIndex::get_record_handle_type(enum bdsg::SnarlDistanceIndex::record_t) --> const enum bdsg::SnarlDistanceIndex::net_handle_record_t", pybind11::arg("type")); - cl.def_static("get_record_offset", (const unsigned long (*)(const struct handlegraph::net_handle_t &)) &bdsg::SnarlDistanceIndex::get_record_offset, "The offset into records that this handle points to\n\nC++: bdsg::SnarlDistanceIndex::get_record_offset(const struct handlegraph::net_handle_t &) --> const unsigned long", pybind11::arg("net_handle")); - cl.def_static("get_node_record_offset", (const unsigned long (*)(const struct handlegraph::net_handle_t &)) &bdsg::SnarlDistanceIndex::get_node_record_offset, "The offset of a node in a trivial snarl (0 if it isn't a node in a trivial snarl)\n\nC++: bdsg::SnarlDistanceIndex::get_node_record_offset(const struct handlegraph::net_handle_t &) --> const unsigned long", pybind11::arg("net_handle")); - cl.def_static("get_connectivity", (const enum bdsg::SnarlDistanceIndex::connectivity_t (*)(const struct handlegraph::net_handle_t &)) &bdsg::SnarlDistanceIndex::get_connectivity, "C++: bdsg::SnarlDistanceIndex::get_connectivity(const struct handlegraph::net_handle_t &) --> const enum bdsg::SnarlDistanceIndex::connectivity_t", pybind11::arg("net_handle")); - cl.def_static("get_handle_type", (const enum bdsg::SnarlDistanceIndex::net_handle_record_t (*)(const struct handlegraph::net_handle_t &)) &bdsg::SnarlDistanceIndex::get_handle_type, "C++: bdsg::SnarlDistanceIndex::get_handle_type(const struct handlegraph::net_handle_t &) --> const enum bdsg::SnarlDistanceIndex::net_handle_record_t", pybind11::arg("net_handle")); - cl.def_static("get_net_handle_from_values", [](unsigned long const & a0, enum bdsg::SnarlDistanceIndex::connectivity_t const & a1, enum bdsg::SnarlDistanceIndex::net_handle_record_t const & a2) -> const handlegraph::net_handle_t { return bdsg::SnarlDistanceIndex::get_net_handle_from_values(a0, a1, a2); }, "", pybind11::arg("pointer"), pybind11::arg("connectivity"), pybind11::arg("type")); - cl.def_static("get_net_handle_from_values", (const struct handlegraph::net_handle_t (*)(unsigned long, enum bdsg::SnarlDistanceIndex::connectivity_t, enum bdsg::SnarlDistanceIndex::net_handle_record_t, unsigned long)) &bdsg::SnarlDistanceIndex::get_net_handle_from_values, "C++: bdsg::SnarlDistanceIndex::get_net_handle_from_values(unsigned long, enum bdsg::SnarlDistanceIndex::connectivity_t, enum bdsg::SnarlDistanceIndex::net_handle_record_t, unsigned long) --> const struct handlegraph::net_handle_t", pybind11::arg("pointer"), pybind11::arg("connectivity"), pybind11::arg("type"), pybind11::arg("node_offset")); - cl.def("get_net_handle", (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)(unsigned long, enum bdsg::SnarlDistanceIndex::connectivity_t) const) &bdsg::SnarlDistanceIndex::get_net_handle, "C++: bdsg::SnarlDistanceIndex::get_net_handle(unsigned long, enum bdsg::SnarlDistanceIndex::connectivity_t) const --> struct handlegraph::net_handle_t", pybind11::arg("pointer"), pybind11::arg("connectivity")); - cl.def("get_net_handle", (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)(unsigned long) const) &bdsg::SnarlDistanceIndex::get_net_handle, "C++: bdsg::SnarlDistanceIndex::get_net_handle(unsigned long) const --> struct handlegraph::net_handle_t", pybind11::arg("pointer")); - cl.def_static("get_node_pointer_offset", (const unsigned long (*)(const long long &, const long long &, unsigned long)) &bdsg::SnarlDistanceIndex::get_node_pointer_offset, "Get the offset into snarl_tree_records for the pointer to a node record.\n\nC++: bdsg::SnarlDistanceIndex::get_node_pointer_offset(const long long &, const long long &, unsigned long) --> const unsigned long", pybind11::arg("id"), pybind11::arg("min_node_id"), pybind11::arg("component_count")); - cl.def_static("endpoints_to_connectivity", (const enum bdsg::SnarlDistanceIndex::connectivity_t (*)(enum handlegraph::SnarlDecomposition::endpoint_t, enum handlegraph::SnarlDecomposition::endpoint_t)) &bdsg::SnarlDistanceIndex::endpoints_to_connectivity, "C++: bdsg::SnarlDistanceIndex::endpoints_to_connectivity(enum handlegraph::SnarlDecomposition::endpoint_t, enum handlegraph::SnarlDecomposition::endpoint_t) --> const enum bdsg::SnarlDistanceIndex::connectivity_t", pybind11::arg("start"), pybind11::arg("end")); - cl.def_static("get_start_endpoint", (const enum handlegraph::SnarlDecomposition::endpoint_t (*)(enum bdsg::SnarlDistanceIndex::connectivity_t)) &bdsg::SnarlDistanceIndex::get_start_endpoint, "C++: bdsg::SnarlDistanceIndex::get_start_endpoint(enum bdsg::SnarlDistanceIndex::connectivity_t) --> const enum handlegraph::SnarlDecomposition::endpoint_t", pybind11::arg("connectivity")); - cl.def_static("get_start_endpoint", (const enum handlegraph::SnarlDecomposition::endpoint_t (*)(struct handlegraph::net_handle_t)) &bdsg::SnarlDistanceIndex::get_start_endpoint, "C++: bdsg::SnarlDistanceIndex::get_start_endpoint(struct handlegraph::net_handle_t) --> const enum handlegraph::SnarlDecomposition::endpoint_t", pybind11::arg("net")); - cl.def_static("get_end_endpoint", (const enum handlegraph::SnarlDecomposition::endpoint_t (*)(enum bdsg::SnarlDistanceIndex::connectivity_t)) &bdsg::SnarlDistanceIndex::get_end_endpoint, "C++: bdsg::SnarlDistanceIndex::get_end_endpoint(enum bdsg::SnarlDistanceIndex::connectivity_t) --> const enum handlegraph::SnarlDecomposition::endpoint_t", pybind11::arg("connectivity")); - cl.def_static("get_end_endpoint", (const enum handlegraph::SnarlDecomposition::endpoint_t (*)(const struct handlegraph::net_handle_t &)) &bdsg::SnarlDistanceIndex::get_end_endpoint, "C++: bdsg::SnarlDistanceIndex::get_end_endpoint(const struct handlegraph::net_handle_t &) --> const enum handlegraph::SnarlDecomposition::endpoint_t", pybind11::arg("net")); - cl.def_static("connectivity_to_endpoints", (const struct std::pair (*)(const enum bdsg::SnarlDistanceIndex::connectivity_t &)) &bdsg::SnarlDistanceIndex::connectivity_to_endpoints, "C++: bdsg::SnarlDistanceIndex::connectivity_to_endpoints(const enum bdsg::SnarlDistanceIndex::connectivity_t &) --> const struct std::pair", pybind11::arg("connectivity")); - cl.def("set_snarl_size_limit", (void (bdsg::SnarlDistanceIndex::*)(unsigned long)) &bdsg::SnarlDistanceIndex::set_snarl_size_limit, "C++: bdsg::SnarlDistanceIndex::set_snarl_size_limit(unsigned long) --> void", pybind11::arg("size")); - cl.def("set_only_top_level_chain_distances", (void (bdsg::SnarlDistanceIndex::*)(bool)) &bdsg::SnarlDistanceIndex::set_only_top_level_chain_distances, "C++: bdsg::SnarlDistanceIndex::set_only_top_level_chain_distances(bool) --> void", pybind11::arg("only_chain")); - cl.def("net_handle_as_string", (std::string (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::net_handle_as_string, "C++: bdsg::SnarlDistanceIndex::net_handle_as_string(const struct handlegraph::net_handle_t &) const --> std::string", pybind11::arg("net")); - cl.def("traverse_decomposition", (bool (bdsg::SnarlDistanceIndex::*)(const class std::function &, const class std::function &, const class std::function &) const) &bdsg::SnarlDistanceIndex::traverse_decomposition, "C++: bdsg::SnarlDistanceIndex::traverse_decomposition(const class std::function &, const class std::function &, const class std::function &) const --> bool", pybind11::arg("snarl_iteratee"), pybind11::arg("chain_iteratee"), pybind11::arg("node_iteratee")); - cl.def("traverse_decomposition_helper", (bool (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &, const class std::function &, const class std::function &, const class std::function &) const) &bdsg::SnarlDistanceIndex::traverse_decomposition_helper, "C++: bdsg::SnarlDistanceIndex::traverse_decomposition_helper(const struct handlegraph::net_handle_t &, const class std::function &, const class std::function &, const class std::function &) const --> bool", pybind11::arg("net"), pybind11::arg("snarl_iteratee"), pybind11::arg("chain_iteratee"), pybind11::arg("node_iteratee")); - cl.def("print_self", (void (bdsg::SnarlDistanceIndex::*)() const) &bdsg::SnarlDistanceIndex::print_self, "C++: bdsg::SnarlDistanceIndex::print_self() const --> void"); - cl.def("print_descendants_of", (void (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t) const) &bdsg::SnarlDistanceIndex::print_descendants_of, "C++: bdsg::SnarlDistanceIndex::print_descendants_of(const struct handlegraph::net_handle_t) const --> void", pybind11::arg("net")); - cl.def("print_snarl_stats", (void (bdsg::SnarlDistanceIndex::*)() const) &bdsg::SnarlDistanceIndex::print_snarl_stats, "C++: bdsg::SnarlDistanceIndex::print_snarl_stats() const --> void"); - cl.def("write_snarls_to_json", (void (bdsg::SnarlDistanceIndex::*)() const) &bdsg::SnarlDistanceIndex::write_snarls_to_json, "C++: bdsg::SnarlDistanceIndex::write_snarls_to_json() const --> void"); - cl.def("validate_index", (void (bdsg::SnarlDistanceIndex::*)() const) &bdsg::SnarlDistanceIndex::validate_index, "C++: bdsg::SnarlDistanceIndex::validate_index() const --> void"); - cl.def("validate_descendants_of", (void (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t) const) &bdsg::SnarlDistanceIndex::validate_descendants_of, "C++: bdsg::SnarlDistanceIndex::validate_descendants_of(const struct handlegraph::net_handle_t) const --> void", pybind11::arg("net")); - cl.def("validate_ancestors_of", (void (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t) const) &bdsg::SnarlDistanceIndex::validate_ancestors_of, "C++: bdsg::SnarlDistanceIndex::validate_ancestors_of(const struct handlegraph::net_handle_t) const --> void", pybind11::arg("net")); - cl.def("get_usage", (class std::tuple (bdsg::SnarlDistanceIndex::*)()) &bdsg::SnarlDistanceIndex::get_usage, "C++: bdsg::SnarlDistanceIndex::get_usage() --> class std::tuple"); - cl.def_static("sum", (unsigned long (*)(const unsigned long &, const unsigned long &)) &bdsg::SnarlDistanceIndex::sum, "Add integers, returning max() if any of them are max()\n\nC++: bdsg::SnarlDistanceIndex::sum(const unsigned long &, const unsigned long &) --> unsigned long", pybind11::arg("val1"), pybind11::arg("val2")); - cl.def_static("minus", (unsigned long (*)(unsigned long, unsigned long)) &bdsg::SnarlDistanceIndex::minus, "C++: bdsg::SnarlDistanceIndex::minus(unsigned long, unsigned long) --> unsigned long", pybind11::arg("x"), pybind11::arg("y")); - cl.def_static("maximum", (unsigned long (*)(unsigned long, unsigned long)) &bdsg::SnarlDistanceIndex::maximum, "C++: bdsg::SnarlDistanceIndex::maximum(unsigned long, unsigned long) --> unsigned long", pybind11::arg("x"), pybind11::arg("y")); - cl.def_static("bit_width", (unsigned long (*)(unsigned long)) &bdsg::SnarlDistanceIndex::bit_width, "C++: bdsg::SnarlDistanceIndex::bit_width(unsigned long) --> unsigned long", pybind11::arg("value")); - cl.def("time_accesses", (void (bdsg::SnarlDistanceIndex::*)()) &bdsg::SnarlDistanceIndex::time_accesses, "C++: bdsg::SnarlDistanceIndex::time_accesses() --> void"); + pybind11::enum_( + cl, "record_t", pybind11::arithmetic(), + "A record_t is the type of structure that a record can be.\n The " + "actual distance index is stored as a series of \"records\" for each " + "snarl/node/chain. \n The record type defines what is stored in a " + "record\n\nNODE, SNARL, and CHAIN indicate that they don't store " + "distances.\nSIMPLE_SNARL is a snarl with all children connecting only " + "to the boundary nodes in one direction (ie, a bubble).\nTRIVIAL_SNARL " + "represents consecutive nodes in a chain. \nNODE represents a node " + "that is a trivial chain. A node can only be the child of a " + "snarl.\nOVERSIZED_SNARL only stores distances to the " + "boundaries.\nROOT_SNARL represents a connected component of the root. " + "It has no start or end node so \n its children technically belong " + "to the root.\nMULTICOMPONENT_CHAIN can represent a chain with snarls " + "that are not start-end connected.\n The chain is split up into " + "components between these snarls, each node is tagged with\n which " + "component it belongs to.") + .value("ROOT", bdsg::SnarlDistanceIndex::ROOT) + .value("NODE", bdsg::SnarlDistanceIndex::NODE) + .value("DISTANCED_NODE", bdsg::SnarlDistanceIndex::DISTANCED_NODE) + .value("TRIVIAL_SNARL", bdsg::SnarlDistanceIndex::TRIVIAL_SNARL) + .value("DISTANCED_TRIVIAL_SNARL", + bdsg::SnarlDistanceIndex::DISTANCED_TRIVIAL_SNARL) + .value("SIMPLE_SNARL", bdsg::SnarlDistanceIndex::SIMPLE_SNARL) + .value("DISTANCED_SIMPLE_SNARL", + bdsg::SnarlDistanceIndex::DISTANCED_SIMPLE_SNARL) + .value("SNARL", bdsg::SnarlDistanceIndex::SNARL) + .value("DISTANCED_SNARL", bdsg::SnarlDistanceIndex::DISTANCED_SNARL) + .value("OVERSIZED_SNARL", bdsg::SnarlDistanceIndex::OVERSIZED_SNARL) + .value("ROOT_SNARL", bdsg::SnarlDistanceIndex::ROOT_SNARL) + .value("DISTANCED_ROOT_SNARL", + bdsg::SnarlDistanceIndex::DISTANCED_ROOT_SNARL) + .value("CHAIN", bdsg::SnarlDistanceIndex::CHAIN) + .value("DISTANCED_CHAIN", bdsg::SnarlDistanceIndex::DISTANCED_CHAIN) + .value("MULTICOMPONENT_CHAIN", + bdsg::SnarlDistanceIndex::MULTICOMPONENT_CHAIN) + .value("CHILDREN", bdsg::SnarlDistanceIndex::CHILDREN) + .export_values(); - { // bdsg::SnarlDistanceIndex::TemporaryDistanceIndex file:bdsg/snarl_distance_index.hpp line:1524 - auto & enclosing_class = cl; - pybind11::class_> cl(enclosing_class, "TemporaryDistanceIndex", ""); - cl.def( pybind11::init( [](){ return new bdsg::SnarlDistanceIndex::TemporaryDistanceIndex(); } ) ); - cl.def( pybind11::init( [](bdsg::SnarlDistanceIndex::TemporaryDistanceIndex const &o){ return new bdsg::SnarlDistanceIndex::TemporaryDistanceIndex(o); } ) ); - cl.def_readwrite("min_node_id", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::min_node_id); - cl.def_readwrite("max_node_id", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::max_node_id); - cl.def_readwrite("root_structure_count", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::root_structure_count); - cl.def_readwrite("max_tree_depth", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::max_tree_depth); - cl.def_readwrite("max_index_size", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::max_index_size); - cl.def_readwrite("max_distance", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::max_distance); - cl.def_readwrite("components", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::components); - cl.def_readwrite("root_snarl_components", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::root_snarl_components); - cl.def_readwrite("temp_chain_records", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::temp_chain_records); - cl.def_readwrite("temp_snarl_records", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::temp_snarl_records); - cl.def_readwrite("temp_node_records", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::temp_node_records); - cl.def_readwrite("use_oversized_snarls", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::use_oversized_snarls); - cl.def("structure_start_end_as_string", (std::string (bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::*)(struct std::pair) const) &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::structure_start_end_as_string, "C++: bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::structure_start_end_as_string(struct std::pair) const --> std::string", pybind11::arg("index")); - cl.def("get_max_record_length", (unsigned long (bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::*)() const) &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::get_max_record_length, "C++: bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::get_max_record_length() const --> unsigned long"); - cl.def("assign", (class bdsg::SnarlDistanceIndex::TemporaryDistanceIndex & (bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::*)(const class bdsg::SnarlDistanceIndex::TemporaryDistanceIndex &)) &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::operator=, "C++: bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::operator=(const class bdsg::SnarlDistanceIndex::TemporaryDistanceIndex &) --> class bdsg::SnarlDistanceIndex::TemporaryDistanceIndex &", pybind11::return_value_policy::automatic, pybind11::arg("")); + pybind11::enum_( + cl, "temp_record_t", pybind11::arithmetic(), "") + .value("TEMP_CHAIN", bdsg::SnarlDistanceIndex::TEMP_CHAIN) + .value("TEMP_SNARL", bdsg::SnarlDistanceIndex::TEMP_SNARL) + .value("TEMP_NODE", bdsg::SnarlDistanceIndex::TEMP_NODE) + .value("TEMP_ROOT", bdsg::SnarlDistanceIndex::TEMP_ROOT) + .export_values(); - { // bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryRecord file:bdsg/snarl_distance_index.hpp line:1544 - auto & enclosing_class = cl; - pybind11::class_> cl(enclosing_class, "TemporaryRecord", ""); - cl.def( pybind11::init( [](bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryRecord const &o){ return new bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryRecord(o); } ) ); - cl.def( pybind11::init( [](){ return new bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryRecord(); } ) ); - cl.def("assign", (struct bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryRecord & (bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryRecord::*)(const struct bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryRecord &)) &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryRecord::operator=, "C++: bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryRecord::operator=(const struct bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryRecord &) --> struct bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryRecord &", pybind11::return_value_policy::automatic, pybind11::arg("")); - } + cl.def( + "serialize", + [](bdsg::SnarlDistanceIndex &o, const std::string &a0) -> void { + return o.serialize(a0); + }, + "", pybind11::arg("filename")); + cl.def( + "deserialize", + [](bdsg::SnarlDistanceIndex &o, const std::string &a0) -> void { + return o.deserialize(a0); + }, + "", pybind11::arg("filename")); + cl.def("dissociate", + (void (bdsg::SnarlDistanceIndex::*)())&bdsg::SnarlDistanceIndex:: + dissociate, + "C++: bdsg::SnarlDistanceIndex::dissociate() --> void"); + cl.def( + "serialize", + (void (bdsg::SnarlDistanceIndex::*)( + const class std::function &) + const) & + bdsg::SnarlDistanceIndex::serialize, + "C++: bdsg::SnarlDistanceIndex::serialize(const class " + "std::function &) const --> void", + pybind11::arg("iteratee")); + cl.def("serialize", + (void (bdsg::SnarlDistanceIndex::*)( + int))&bdsg::SnarlDistanceIndex::serialize, + "C++: bdsg::SnarlDistanceIndex::serialize(int) --> void", + pybind11::arg("fd")); + cl.def("deserialize", + (void (bdsg::SnarlDistanceIndex::*)( + int))&bdsg::SnarlDistanceIndex::deserialize, + "C++: bdsg::SnarlDistanceIndex::deserialize(int) --> void", + pybind11::arg("fd")); + cl.def("get_magic_number", + (unsigned int (bdsg::SnarlDistanceIndex::*)() const) & + bdsg::SnarlDistanceIndex::get_magic_number, + "C++: bdsg::SnarlDistanceIndex::get_magic_number() const --> " + "unsigned int"); + cl.def("get_prefix", + (std::string (bdsg::SnarlDistanceIndex::*)() const) & + bdsg::SnarlDistanceIndex::get_prefix, + "C++: bdsg::SnarlDistanceIndex::get_prefix() const --> std::string"); + cl.def( + "preload", + [](bdsg::SnarlDistanceIndex const &o) -> void { return o.preload(); }, + ""); + cl.def("preload", + (void (bdsg::SnarlDistanceIndex::*)(bool) const) & + bdsg::SnarlDistanceIndex::preload, + "Allow for preloading the index for more accurate timing of " + "algorithms\n that use it, if it fits in memory. If blocking is " + "true, waits for the\n index to be paged in. Otherwise, just tells " + "the OS that we will want to\n use it.\n\nC++: " + "bdsg::SnarlDistanceIndex::preload(bool) const --> void", + pybind11::arg("blocking")); + cl.def( + "maximum_distance", + [](bdsg::SnarlDistanceIndex const &o, const long long &a0, + const bool &a1, const unsigned long &a2, const long long &a3, + const bool &a4, const unsigned long &a5) -> unsigned long { + return o.maximum_distance(a0, a1, a2, a3, a4, a5); + }, + "", pybind11::arg("id1"), pybind11::arg("rev1"), + pybind11::arg("offset1"), pybind11::arg("id2"), pybind11::arg("rev2"), + pybind11::arg("offset2")); + cl.def( + "maximum_distance", + [](bdsg::SnarlDistanceIndex const &o, const long long &a0, + const bool &a1, const unsigned long &a2, const long long &a3, + const bool &a4, const unsigned long &a5, + bool const &a6) -> unsigned long { + return o.maximum_distance(a0, a1, a2, a3, a4, a5, a6); + }, + "", pybind11::arg("id1"), pybind11::arg("rev1"), + pybind11::arg("offset1"), pybind11::arg("id2"), pybind11::arg("rev2"), + pybind11::arg("offset2"), pybind11::arg("unoriented_distance")); + cl.def("maximum_distance", + (unsigned long (bdsg::SnarlDistanceIndex::*)( + const long long, const bool, const unsigned long, + const long long, const bool, const unsigned long, bool, + const class handlegraph::HandleGraph *) const) & + bdsg::SnarlDistanceIndex::maximum_distance, + "Find an approximation of the maximum distance between two " + "positions. \nThis isn't a true maximum- the only guarantee is that " + "it's greater than or equal to the minimum distance.\n\nC++: " + "bdsg::SnarlDistanceIndex::maximum_distance(const long long, const " + "bool, const unsigned long, const long long, const bool, const " + "unsigned long, bool, const class handlegraph::HandleGraph *) const " + "--> unsigned long", + pybind11::arg("id1"), pybind11::arg("rev1"), + pybind11::arg("offset1"), pybind11::arg("id2"), + pybind11::arg("rev2"), pybind11::arg("offset2"), + pybind11::arg("unoriented_distance"), pybind11::arg("graph")); + cl.def( + "distance_in_parent", + [](bdsg::SnarlDistanceIndex const &o, + const struct handlegraph::net_handle_t &a0, + const struct handlegraph::net_handle_t &a1, + const struct handlegraph::net_handle_t &a2) -> unsigned long { + return o.distance_in_parent(a0, a1, a2); + }, + "", pybind11::arg("parent"), pybind11::arg("child1"), + pybind11::arg("child2")); + cl.def( + "distance_in_parent", + [](bdsg::SnarlDistanceIndex const &o, + const struct handlegraph::net_handle_t &a0, + const struct handlegraph::net_handle_t &a1, + const struct handlegraph::net_handle_t &a2, + const class handlegraph::HandleGraph *a3) -> unsigned long { + return o.distance_in_parent(a0, a1, a2, a3); + }, + "", pybind11::arg("parent"), pybind11::arg("child1"), + pybind11::arg("child2"), pybind11::arg("graph")); + cl.def( + "distance_in_parent", + (unsigned long (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &, + const struct handlegraph::net_handle_t &, + const struct handlegraph::net_handle_t &, + const class handlegraph::HandleGraph *, unsigned long) const) & + bdsg::SnarlDistanceIndex::distance_in_parent, + "C++: bdsg::SnarlDistanceIndex::distance_in_parent(const struct " + "handlegraph::net_handle_t &, const struct handlegraph::net_handle_t " + "&, const struct handlegraph::net_handle_t &, const class " + "handlegraph::HandleGraph *, unsigned long) const --> unsigned long", + pybind11::arg("parent"), pybind11::arg("child1"), + pybind11::arg("child2"), pybind11::arg("graph"), + pybind11::arg("distance_limit")); + cl.def( + "distance_in_snarl", + [](bdsg::SnarlDistanceIndex const &o, + const struct handlegraph::net_handle_t &a0, const unsigned long &a1, + const bool &a2, const unsigned long &a3, + const bool &a4) -> unsigned long { + return o.distance_in_snarl(a0, a1, a2, a3, a4); + }, + "", pybind11::arg("parent"), pybind11::arg("rank1"), + pybind11::arg("right_side1"), pybind11::arg("rank2"), + pybind11::arg("right_side2")); + cl.def( + "distance_in_snarl", + [](bdsg::SnarlDistanceIndex const &o, + const struct handlegraph::net_handle_t &a0, const unsigned long &a1, + const bool &a2, const unsigned long &a3, const bool &a4, + const class handlegraph::HandleGraph *a5) -> unsigned long { + return o.distance_in_snarl(a0, a1, a2, a3, a4, a5); + }, + "", pybind11::arg("parent"), pybind11::arg("rank1"), + pybind11::arg("right_side1"), pybind11::arg("rank2"), + pybind11::arg("right_side2"), pybind11::arg("graph")); + cl.def("distance_in_snarl", + (unsigned long (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &, const unsigned long &, + const bool &, const unsigned long &, const bool &, + const class handlegraph::HandleGraph *, unsigned long) const) & + bdsg::SnarlDistanceIndex::distance_in_snarl, + "C++: bdsg::SnarlDistanceIndex::distance_in_snarl(const struct " + "handlegraph::net_handle_t &, const unsigned long &, const bool &, " + "const unsigned long &, const bool &, const class " + "handlegraph::HandleGraph *, unsigned long) const --> unsigned long", + pybind11::arg("parent"), pybind11::arg("rank1"), + pybind11::arg("right_side1"), pybind11::arg("rank2"), + pybind11::arg("right_side2"), pybind11::arg("graph"), + pybind11::arg("distance_limit")); + cl.def( + "max_distance_in_parent", + [](bdsg::SnarlDistanceIndex const &o, + const struct handlegraph::net_handle_t &a0, + const struct handlegraph::net_handle_t &a1, + const struct handlegraph::net_handle_t &a2) -> unsigned long { + return o.max_distance_in_parent(a0, a1, a2); + }, + "", pybind11::arg("parent"), pybind11::arg("child1"), + pybind11::arg("child2")); + cl.def( + "max_distance_in_parent", + [](bdsg::SnarlDistanceIndex const &o, + const struct handlegraph::net_handle_t &a0, + const struct handlegraph::net_handle_t &a1, + const struct handlegraph::net_handle_t &a2, + const class handlegraph::HandleGraph *a3) -> unsigned long { + return o.max_distance_in_parent(a0, a1, a2, a3); + }, + "", pybind11::arg("parent"), pybind11::arg("child1"), + pybind11::arg("child2"), pybind11::arg("graph")); + cl.def( + "max_distance_in_parent", + (unsigned long (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &, + const struct handlegraph::net_handle_t &, + const struct handlegraph::net_handle_t &, + const class handlegraph::HandleGraph *, unsigned long) const) & + bdsg::SnarlDistanceIndex::max_distance_in_parent, + "Find the maximum distance between two children in the parent. \nThis " + "is the same as distance_in_parent for everything except children of " + "chains\n\nC++: bdsg::SnarlDistanceIndex::max_distance_in_parent(const " + "struct handlegraph::net_handle_t &, const struct " + "handlegraph::net_handle_t &, const struct handlegraph::net_handle_t " + "&, const class handlegraph::HandleGraph *, unsigned long) const --> " + "unsigned long", + pybind11::arg("parent"), pybind11::arg("child1"), + pybind11::arg("child2"), pybind11::arg("graph"), + pybind11::arg("distance_limit")); + cl.def( + "distance_to_parent_bound", + [](bdsg::SnarlDistanceIndex const &o, + const struct handlegraph::net_handle_t &a0, bool const &a1, + struct handlegraph::net_handle_t const &a2) -> unsigned long { + return o.distance_to_parent_bound(a0, a1, a2); + }, + "", pybind11::arg("parent"), pybind11::arg("to_start"), + pybind11::arg("child")); + cl.def( + "distance_to_parent_bound", + (unsigned long (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &, bool, + struct handlegraph::net_handle_t, + class std::tuple< + enum bdsg::SnarlDistanceIndex::net_handle_record_t, + enum bdsg::SnarlDistanceIndex::net_handle_record_t, + enum bdsg::SnarlDistanceIndex::net_handle_record_t, + enum bdsg::SnarlDistanceIndex::net_handle_record_t>) const) & + bdsg::SnarlDistanceIndex::distance_to_parent_bound, + "Get the distance from the child to the start or end bound of the " + "parent.\nparent_and_child_types are hints to figure out the type of " + "snarl/chain records the parent and child are.\ntuple of parent record " + "type, parent handle type, child record type, child handle type.\nThis " + "is really just used to see if the parent and child are trivial " + "chains, so it might not be exactly what the actual record is.\n\nC++: " + "bdsg::SnarlDistanceIndex::distance_to_parent_bound(const struct " + "handlegraph::net_handle_t &, bool, struct handlegraph::net_handle_t, " + "class std::tuple) const --> unsigned " + "long", + pybind11::arg("parent"), pybind11::arg("to_start"), + pybind11::arg("child"), pybind11::arg("parent_and_child_types")); + cl.def( + "into_which_snarl", + (class std::tuple (bdsg::SnarlDistanceIndex::*)( + const long long &, const bool &) const) & + bdsg::SnarlDistanceIndex::into_which_snarl, + "If this node id and orientation is pointing into a snarl, then return " + "the start.\nnode id and orientation pointing into the snarl, and if " + "the snarl is trivial.\nReturns <0, false, false> if it doesn't point " + "into a snarl.\n\nC++: " + "bdsg::SnarlDistanceIndex::into_which_snarl(const long long &, const " + "bool &) const --> class std::tuple", + pybind11::arg("id"), pybind11::arg("reverse")); + cl.def("is_ordered_in_chain", + (bool (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &, + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::is_ordered_in_chain, + "Return true if child1 comes before child2 in the chain. \n\nC++: " + "bdsg::SnarlDistanceIndex::is_ordered_in_chain(const struct " + "handlegraph::net_handle_t &, const struct " + "handlegraph::net_handle_t &) const --> bool", + pybind11::arg("child1"), pybind11::arg("child2")); + cl.def("is_externally_start_end_connected", + (bool (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t) const) & + bdsg::SnarlDistanceIndex::is_externally_start_end_connected, + "C++: " + "bdsg::SnarlDistanceIndex::is_externally_start_end_connected(const " + "struct handlegraph::net_handle_t) const --> bool", + pybind11::arg("net")); + cl.def("is_externally_start_start_connected", + (bool (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t) const) & + bdsg::SnarlDistanceIndex::is_externally_start_start_connected, + "C++: " + "bdsg::SnarlDistanceIndex::is_externally_start_start_connected(" + "const struct handlegraph::net_handle_t) const --> bool", + pybind11::arg("net")); + cl.def( + "is_externally_end_end_connected", + (bool (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t) const) & + bdsg::SnarlDistanceIndex::is_externally_end_end_connected, + "C++: bdsg::SnarlDistanceIndex::is_externally_end_end_connected(const " + "struct handlegraph::net_handle_t) const --> bool", + pybind11::arg("net")); + cl.def( + "lowest_common_ancestor", + (struct std::pair ( + bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &, + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::lowest_common_ancestor, + "For two net handles, get a net handle lowest common ancestor.\nIf the " + "lowest common ancestor is the root, then the two handles may be " + "in\ndifferent connected components. In this case, return " + "false.\n\nC++: bdsg::SnarlDistanceIndex::lowest_common_ancestor(const " + "struct handlegraph::net_handle_t &, const struct " + "handlegraph::net_handle_t &) const --> struct std::pair", + pybind11::arg("net1"), pybind11::arg("net2")); + cl.def("node_length", + (unsigned long (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::node_length, + "Return the length of the net, which must represent a node (or " + "sentinel of a snarl)\n\nC++: " + "bdsg::SnarlDistanceIndex::node_length(const struct " + "handlegraph::net_handle_t &) const --> unsigned long", + pybind11::arg("net")); + cl.def("minimum_length", + (unsigned long (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::minimum_length, + "This is also the length of a net, but it can also be a snarl or " + "chain. \nThe length of a chain includes the boundary nodes, a " + "snarl does not.\nA looping chain only includes the start/end node " + "once\n\nC++: bdsg::SnarlDistanceIndex::minimum_length(const struct " + "handlegraph::net_handle_t &) const --> unsigned long", + pybind11::arg("net")); + cl.def("maximum_length", + (unsigned long (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::maximum_length, + "C++: bdsg::SnarlDistanceIndex::maximum_length(const struct " + "handlegraph::net_handle_t &) const --> unsigned long", + pybind11::arg("net")); + cl.def("chain_minimum_length", + (unsigned long (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::chain_minimum_length, + "The length of a chain. If it is a multicomponent chain, then the " + "length of \nthe last component, which is used for calculating " + "distance, instead of inf \n\nC++: " + "bdsg::SnarlDistanceIndex::chain_minimum_length(const struct " + "handlegraph::net_handle_t &) const --> unsigned long", + pybind11::arg("net")); + cl.def("node_id", + (long long (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::node_id, + "What is the node id of the node represented by this net " + "handle.\nnet must be a node or a sentinel\n\nC++: " + "bdsg::SnarlDistanceIndex::node_id(const struct " + "handlegraph::net_handle_t &) const --> long long", + pybind11::arg("net")); + cl.def("has_node", + (bool (bdsg::SnarlDistanceIndex::*)(const long long) const) & + bdsg::SnarlDistanceIndex::has_node, + "Does the graph have this node?\n\nC++: " + "bdsg::SnarlDistanceIndex::has_node(const long long) const --> bool", + pybind11::arg("id")); + cl.def("is_reversed_in_parent", + (bool (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::is_reversed_in_parent, + "Only really relevant for nodes in chains, is the node\ntraversed " + "backwards relative to the orientation of the chain\n\nC++: " + "bdsg::SnarlDistanceIndex::is_reversed_in_parent(const struct " + "handlegraph::net_handle_t &) const --> bool", + pybind11::arg("net")); + cl.def( + "get_node_net_handle", + [](bdsg::SnarlDistanceIndex const &o, const long long &a0) + -> handlegraph::net_handle_t { return o.get_node_net_handle(a0); }, + "", pybind11::arg("id")); + cl.def( + "get_node_net_handle", + (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)( + const long long, bool) const) & + bdsg::SnarlDistanceIndex::get_node_net_handle, + "Get a net handle from a node and, optionally, an orientation\n\nC++: " + "bdsg::SnarlDistanceIndex::get_node_net_handle(const long long, bool) " + "const --> struct handlegraph::net_handle_t", + pybind11::arg("id"), pybind11::arg("rev")); + cl.def("get_max_tree_depth", + (unsigned long (bdsg::SnarlDistanceIndex::*)() const) & + bdsg::SnarlDistanceIndex::get_max_tree_depth, + "How deep is the snarl tree? The root is 0, top-level chain is 1, " + "etc\nOnly counts chains\n\nC++: " + "bdsg::SnarlDistanceIndex::get_max_tree_depth() const --> unsigned " + "long"); + cl.def( + "get_depth", + (unsigned long (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::get_depth, + "What is the depth of this net handle? Nodes and snarls get the depth " + "of their parent.\nThe depth of the root is 0, the depth of its child " + "chains is 1, the depth of the nodes and snarls that are \nchildren of " + "those chains is also 1, and the chains that are children of those " + "snarls have depth 2\n\nC++: bdsg::SnarlDistanceIndex::get_depth(const " + "struct handlegraph::net_handle_t &) const --> unsigned long", + pybind11::arg("net")); + cl.def( + "get_connected_component_number", + (unsigned long (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::get_connected_component_number, + "C++: bdsg::SnarlDistanceIndex::get_connected_component_number(const " + "struct handlegraph::net_handle_t &) const --> unsigned long", + pybind11::arg("net")); + cl.def("get_handle_from_connected_component", + (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)( + unsigned long) const) & + bdsg::SnarlDistanceIndex::get_handle_from_connected_component, + "Given the connected component number (from " + "get_connected_component_number), get the\nroot-level handle " + "pointing to it.\nIf the connected component is a root-level snarl, " + "then this may return a \"root\" handle,\nbut it will actually " + "point to the snarl\n\nC++: " + "bdsg::SnarlDistanceIndex::get_handle_from_connected_component(" + "unsigned long) const --> struct handlegraph::net_handle_t", + pybind11::arg("num")); + cl.def("has_connectivity", + (bool (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &, + enum handlegraph::SnarlDecomposition::endpoint_t, + enum handlegraph::SnarlDecomposition::endpoint_t) const) & + bdsg::SnarlDistanceIndex::has_connectivity, + "Is there a path between the start and end endpoints within the net " + "handle?\n\nC++: bdsg::SnarlDistanceIndex::has_connectivity(const " + "struct handlegraph::net_handle_t &, enum " + "handlegraph::SnarlDecomposition::endpoint_t, enum " + "handlegraph::SnarlDecomposition::endpoint_t) const --> bool", + pybind11::arg("net"), pybind11::arg("start"), pybind11::arg("end")); + cl.def("has_external_connectivity", + (bool (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &, + enum handlegraph::SnarlDecomposition::endpoint_t, + enum handlegraph::SnarlDecomposition::endpoint_t) const) & + bdsg::SnarlDistanceIndex::has_external_connectivity, + "Is there a path between the start and end endpoints outside the " + "net handle?\nThis is used for children of the root\n\nC++: " + "bdsg::SnarlDistanceIndex::has_external_connectivity(const struct " + "handlegraph::net_handle_t &, enum " + "handlegraph::SnarlDecomposition::endpoint_t, enum " + "handlegraph::SnarlDecomposition::endpoint_t) const --> bool", + pybind11::arg("net"), pybind11::arg("start"), pybind11::arg("end")); + cl.def("get_prefix_sum_value", + (unsigned long (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::get_prefix_sum_value, + "Get the prefix sum value for a node in a chain.\nFails if the " + "parent of net is not a chain\n\nC++: " + "bdsg::SnarlDistanceIndex::get_prefix_sum_value(const struct " + "handlegraph::net_handle_t &) const --> unsigned long", + pybind11::arg("net")); + cl.def("get_max_prefix_sum_value", + (unsigned long (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::get_max_prefix_sum_value, + "Get the maximum prefix sum value for a node in a chain.\nFails if " + "the parent of net is not a chain\n\nC++: " + "bdsg::SnarlDistanceIndex::get_max_prefix_sum_value(const struct " + "handlegraph::net_handle_t &) const --> unsigned long", + pybind11::arg("net")); + cl.def("get_forward_loop_value", + (unsigned long (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::get_forward_loop_value, + "Get the forward loop value for a node in a chain.\nFails if the " + "parent of net is not a chain\n\nC++: " + "bdsg::SnarlDistanceIndex::get_forward_loop_value(const struct " + "handlegraph::net_handle_t &) const --> unsigned long", + pybind11::arg("net")); + cl.def("get_reverse_loop_value", + (unsigned long (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::get_reverse_loop_value, + "Get the reverse value for a node in a chain.\nFails if the parent " + "of net is not a chain\n\nC++: " + "bdsg::SnarlDistanceIndex::get_reverse_loop_value(const struct " + "handlegraph::net_handle_t &) const --> unsigned long", + pybind11::arg("net")); + cl.def( + "get_chain_component", + [](bdsg::SnarlDistanceIndex const &o, + const struct handlegraph::net_handle_t &a0) -> unsigned long { + return o.get_chain_component(a0); + }, + "", pybind11::arg("net")); + cl.def("get_chain_component", + (unsigned long (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &, bool) const) & + bdsg::SnarlDistanceIndex::get_chain_component, + "C++: bdsg::SnarlDistanceIndex::get_chain_component(const struct " + "handlegraph::net_handle_t &, bool) const --> unsigned long", + pybind11::arg("net"), pybind11::arg("get_end")); + cl.def( + "get_root", + (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)() + const) & + bdsg::SnarlDistanceIndex::get_root, + "Get a net handle referring to a tip-to-tip traversal of the contents " + "of the root snarl.\n\nC++: bdsg::SnarlDistanceIndex::get_root() const " + "--> struct handlegraph::net_handle_t"); + cl.def("is_root", + (bool (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::is_root, + "Return true if the given handle refers to (a traversal of) the " + "root\nsnarl, and false otherwise.\n\nC++: " + "bdsg::SnarlDistanceIndex::is_root(const struct " + "handlegraph::net_handle_t &) const --> bool", + pybind11::arg("net")); + cl.def("is_root_snarl", + (bool (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::is_root_snarl, + "Return true if the given handle refers to (a traversal of) a snarl " + "of the root,\nwhich is considered to be the root but actually " + "refers to a subset of the children \nof the root that are " + "connected\n\nC++: bdsg::SnarlDistanceIndex::is_root_snarl(const " + "struct handlegraph::net_handle_t &) const --> bool", + pybind11::arg("net")); + cl.def("is_snarl", + (bool (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::is_snarl, + "Returns true if the given net handle refers to (a traversal of) a " + "snarl.\n\nC++: bdsg::SnarlDistanceIndex::is_snarl(const struct " + "handlegraph::net_handle_t &) const --> bool", + pybind11::arg("net")); + cl.def("is_dag", + (bool (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::is_dag, + "Return true if the given snarl is a DAG and false " + "otherwise\nReturns true if the given net_handle_t is not a " + "snarl\n\nC++: bdsg::SnarlDistanceIndex::is_dag(const struct " + "handlegraph::net_handle_t &) const --> bool", + pybind11::arg("snarl")); + cl.def("is_simple_snarl", + (bool (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::is_simple_snarl, + "Returns true if the given net handle refers to (a traversal of) a " + "simple snarl\nA simple snarl is a bubble where each child node can " + "only reach the boundary nodes,\nand each side of a node reaches a " + "different boundary node\nThere may also be an edge connecting the " + "two boundary nodes but no additional \nedges are allowed\n\nC++: " + "bdsg::SnarlDistanceIndex::is_simple_snarl(const struct " + "handlegraph::net_handle_t &) const --> bool", + pybind11::arg("net")); + cl.def( + "is_regular_snarl", + (bool (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::is_regular_snarl, + "Returns true if the given net handle refers to (a traversal of) a " + "regular snarl\nA regular snarl is the same as a simple snarl, except " + "that the children may be\nnested chains, rather than being restricted " + "to nodes \n\nC++: bdsg::SnarlDistanceIndex::is_regular_snarl(const " + "struct handlegraph::net_handle_t &) const --> bool", + pybind11::arg("net")); + cl.def("is_chain", + (bool (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::is_chain, + "Returns true if the given net handle refers to (a traversal of) a " + "chain.\n\nC++: bdsg::SnarlDistanceIndex::is_chain(const struct " + "handlegraph::net_handle_t &) const --> bool", + pybind11::arg("net")); + cl.def("is_multicomponent_chain", + (bool (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::is_multicomponent_chain, + "Returns true if the given net handle refers to (a traversal of) a " + "chain that is not start-end connected\n\nC++: " + "bdsg::SnarlDistanceIndex::is_multicomponent_chain(const struct " + "handlegraph::net_handle_t &) const --> bool", + pybind11::arg("net")); + cl.def("is_looping_chain", + (bool (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::is_looping_chain, + "Returns true if the given net handle refers to (a traversal of) a " + "chain that loops (a chain where the first and last node are the " + "same).\n\nC++: bdsg::SnarlDistanceIndex::is_looping_chain(const " + "struct handlegraph::net_handle_t &) const --> bool", + pybind11::arg("net")); + cl.def("is_trivial_chain", + (bool (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::is_trivial_chain, + "Returns true if the given net handle refers to (a traversal of) a " + "trivial chain that represents a single node.\n\nC++: " + "bdsg::SnarlDistanceIndex::is_trivial_chain(const struct " + "handlegraph::net_handle_t &) const --> bool", + pybind11::arg("net")); + cl.def("is_node", + (bool (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::is_node, + "Returns true if the given net handle refers to (a traversal of) a " + "single node, and thus has a corresponding handle_t.\n\nC++: " + "bdsg::SnarlDistanceIndex::is_node(const struct " + "handlegraph::net_handle_t &) const --> bool", + pybind11::arg("net")); + cl.def("is_sentinel", + (bool (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::is_sentinel, + "Return true if the given net handle is a snarl bound sentinel (in " + "either\ninward or outward orientation), and false " + "otherwise.\n\nC++: bdsg::SnarlDistanceIndex::is_sentinel(const " + "struct handlegraph::net_handle_t &) const --> bool", + pybind11::arg("net")); + cl.def( + "get_net", + (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::handle_t &, + const class handlegraph::HandleGraph *) const) & + bdsg::SnarlDistanceIndex::get_net, + "Turn a handle to an oriented node into a net handle for a " + "start-to-end or end-to-start traversal of the node, as " + "appropriate.\n\nC++: bdsg::SnarlDistanceIndex::get_net(const struct " + "handlegraph::handle_t &, const class handlegraph::HandleGraph *) " + "const --> struct handlegraph::net_handle_t", + pybind11::arg("handle"), pybind11::arg("graph")); + cl.def( + "get_handle", + (struct handlegraph::handle_t (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &, + const class handlegraph::HandleGraph *) const) & + bdsg::SnarlDistanceIndex::get_handle, + "For a net handle to a traversal of a single node, get the handle for " + "that node in the orientation it is traversed.\nMay not be called for " + "other net handles.\n\nC++: bdsg::SnarlDistanceIndex::get_handle(const " + "struct handlegraph::net_handle_t &, const class " + "handlegraph::HandleGraph *) const --> struct handlegraph::handle_t", + pybind11::arg("net"), pybind11::arg("graph")); + cl.def( + "get_parent", + (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::get_parent, + "Get the parent snarl of a chain, or the parent chain of a snarl or " + "node.\nIf the child is start-to-end or end-to-start, and the parent " + "is a chain,\nthe chain comes out facing the same way, accounting for " + "the relative\norientation of the child snarl or node in the chain. " + "Otherwise,\neverything is produced as start-to-end, even if that is " + "not actually a\nrealizable traversal of a snarl or chain. May not be " + "called on the root\nsnarl.\n\nAlso works on snarl boundary " + "sentinels.\n\nC++: bdsg::SnarlDistanceIndex::get_parent(const struct " + "handlegraph::net_handle_t &) const --> struct " + "handlegraph::net_handle_t", + pybind11::arg("child")); + cl.def("get_bound", + (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &, bool, bool) const) & + bdsg::SnarlDistanceIndex::get_bound, + "Get the bounding handle for the snarl or chain referenced by the " + "given\nnet handle, getting the start or end facing in or out as " + "appropriate.\n\nFor snarls, returns the bounding sentinel net " + "handles. For chains,\nreturns net handles for traversals of the " + "bounding nodes of the chain.\nIf the chain is a looping chain, " + "then the start and end of the chain\nare the same, so the " + "connectivity of the bound indicates which we're\nlooking at; the " + "connectivity will be start-start if it is going \nbackwards in the " + "node, and end-end if it is going forwards.\n\nIgnores traversal " + "type.\n\nMay not be called on traversals of individual " + "nodes.\n\nC++: bdsg::SnarlDistanceIndex::get_bound(const struct " + "handlegraph::net_handle_t &, bool, bool) const --> struct " + "handlegraph::net_handle_t", + pybind11::arg("snarl"), pybind11::arg("get_end"), + pybind11::arg("face_in")); + cl.def("get_node_from_sentinel", + (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::get_node_from_sentinel, + "Given the sentinel of a snarl, return a handle to the node " + "representing it\n\nC++: " + "bdsg::SnarlDistanceIndex::get_node_from_sentinel(const struct " + "handlegraph::net_handle_t &) const --> struct " + "handlegraph::net_handle_t", + pybind11::arg("sentinel")); + cl.def( + "flip", + (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::flip, + "Return a net handle to the same snarl/chain/node in the opposite " + "orientation.\nNo effect on tip-to-tip, start-to-start, or end-to-end " + "net handles. Flips all the others.\n\nC++: " + "bdsg::SnarlDistanceIndex::flip(const struct handlegraph::net_handle_t " + "&) const --> struct handlegraph::net_handle_t", + pybind11::arg("net")); + cl.def("canonical", + (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::canonical, + "Get a canonical traversal handle from any net handle. All handles " + "to the\nsame net graph element have the same canonical traversal. " + "That canonical\ntraversal must be realizable, and might not always " + "be start-to-end or\neven consistently be the same kind of " + "traversal for different snarls,\nchains, or nodes. Mostly useful " + "to normalize for equality comparisons.\n\nAny root snarl will " + "become just the root\nAnything without connectivity will get " + "START_END\n\nC++: bdsg::SnarlDistanceIndex::canonical(const struct " + "handlegraph::net_handle_t &) const --> struct " + "handlegraph::net_handle_t", + pybind11::arg("net")); + cl.def("start_end_traversal_of", + (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::start_end_traversal_of, + "Makes a start-end traversal of the net.\nFaster than canonical " + "because it doesn't check the index for anything \n\nC++: " + "bdsg::SnarlDistanceIndex::start_end_traversal_of(const struct " + "handlegraph::net_handle_t &) const --> struct " + "handlegraph::net_handle_t", + pybind11::arg("net")); + cl.def("starts_at", + (enum handlegraph::SnarlDecomposition::endpoint_t ( + bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::starts_at, + "Return the kind of location at which the given traversal " + "starts.\n\nC++: bdsg::SnarlDistanceIndex::starts_at(const struct " + "handlegraph::net_handle_t &) const --> enum " + "handlegraph::SnarlDecomposition::endpoint_t", + pybind11::arg("traversal")); + cl.def("ends_at", + (enum handlegraph::SnarlDecomposition::endpoint_t ( + bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::ends_at, + "Return the kind of location at which the given traversal " + "ends.\n\nC++: bdsg::SnarlDistanceIndex::ends_at(const struct " + "handlegraph::net_handle_t &) const --> enum " + "handlegraph::SnarlDecomposition::endpoint_t", + pybind11::arg("traversal")); + cl.def( + "get_rank_in_parent", + (unsigned long (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::get_rank_in_parent, + "For a child of a snarl, the rank is used to calculate the " + "distance\n\nC++: bdsg::SnarlDistanceIndex::get_rank_in_parent(const " + "struct handlegraph::net_handle_t &) const --> unsigned long", + pybind11::arg("net")); + cl.def("connected_component_count", + (unsigned long (bdsg::SnarlDistanceIndex::*)() const) & + bdsg::SnarlDistanceIndex::connected_component_count, + "How many connected components are in this graph?\nThis returns the " + "number of topological connected components, not necessarily the " + "\nnumber of nodes in the top-level snarl \n\nC++: " + "bdsg::SnarlDistanceIndex::connected_component_count() const --> " + "unsigned long"); + cl.def("get_snarl_child_from_rank", + (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &, const unsigned long &) + const) & + bdsg::SnarlDistanceIndex::get_snarl_child_from_rank, + "Get the child of a snarl from its rank. This shouldn't be exposed " + "to the public interface but I need it\nPlease don't use it\nFor 0 " + "or 1, returns the sentinel facing in. Otherwise return the child " + "as a chain going START_END\n\nC++: " + "bdsg::SnarlDistanceIndex::get_snarl_child_from_rank(const struct " + "handlegraph::net_handle_t &, const unsigned long &) const --> " + "struct handlegraph::net_handle_t", + pybind11::arg("snarl"), pybind11::arg("rank")); + cl.def( + "get_parent_traversal", + (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &, + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::get_parent_traversal, + "Get a net handle for traversals of a snarl or chain that " + "contains\nthe given oriented bounding node traversals or sentinels. " + "Given two\nsentinels for a snarl, produces a net handle to a " + "start-to-end,\nend-to-end, end-to-start, or start-to-start traversal " + "of that snarl.\nGiven handles to traversals of the bounding nodes of " + "a chain, similarly\nproduces a net handle to a traversal of the " + "chain.\n\nFor a chain, either or both handles can also be a snarl " + "containing tips,\nfor a tip-to-start, tip-to-end, start-to-tip, " + "end-to-tip, or tip-to-tip\ntraversal. Similarly, for a snarl, either " + "or both handles can be a chain\nin the snarl that contains internal " + "tips, or that has no edges on the\nappropriate end.\n\nMay only be " + "called if a path actually exists between the given start\nand " + "end.\n\nC++: bdsg::SnarlDistanceIndex::get_parent_traversal(const " + "struct handlegraph::net_handle_t &, const struct " + "handlegraph::net_handle_t &) const --> struct " + "handlegraph::net_handle_t", + pybind11::arg("traversal_start"), pybind11::arg("traversal_end")); + cl.def_static("has_distances", + (bool (*)(enum bdsg::SnarlDistanceIndex::record_t))&bdsg:: + SnarlDistanceIndex::has_distances, + "C++: bdsg::SnarlDistanceIndex::has_distances(enum " + "bdsg::SnarlDistanceIndex::record_t) --> const bool", + pybind11::arg("type")); + cl.def_static( + "get_record_handle_type", + (const enum bdsg::SnarlDistanceIndex::net_handle_record_t (*)( + enum bdsg::SnarlDistanceIndex::record_t))&bdsg::SnarlDistanceIndex:: + get_record_handle_type, + "Given the type of the record, return the handle type. Some record " + "types can represent multiple things,\nfor example a simple snarl " + "record is used to represent a snarl, and the nodes/trivial chains in " + "it.\nThis will return whatever is higher on the snarl tree. A simple " + "snarl will be considered a snarl,\na root snarl will be considered a " + "root, etc\n\nC++: " + "bdsg::SnarlDistanceIndex::get_record_handle_type(enum " + "bdsg::SnarlDistanceIndex::record_t) --> const enum " + "bdsg::SnarlDistanceIndex::net_handle_record_t", + pybind11::arg("type")); + cl.def_static("get_record_offset", + (const unsigned long (*)( + const struct handlegraph::net_handle_t + &))&bdsg::SnarlDistanceIndex::get_record_offset, + "The offset into records that this handle points to\n\nC++: " + "bdsg::SnarlDistanceIndex::get_record_offset(const struct " + "handlegraph::net_handle_t &) --> const unsigned long", + pybind11::arg("net_handle")); + cl.def_static("get_node_record_offset", + (const unsigned long (*)( + const struct handlegraph::net_handle_t + &))&bdsg::SnarlDistanceIndex::get_node_record_offset, + "The offset of a node in a trivial snarl (0 if it isn't a " + "node in a trivial snarl)\n\nC++: " + "bdsg::SnarlDistanceIndex::get_node_record_offset(const " + "struct handlegraph::net_handle_t &) --> const unsigned long", + pybind11::arg("net_handle")); + cl.def_static("get_connectivity", + (const enum bdsg::SnarlDistanceIndex::connectivity_t (*)( + const struct handlegraph::net_handle_t + &))&bdsg::SnarlDistanceIndex::get_connectivity, + "C++: bdsg::SnarlDistanceIndex::get_connectivity(const " + "struct handlegraph::net_handle_t &) --> const enum " + "bdsg::SnarlDistanceIndex::connectivity_t", + pybind11::arg("net_handle")); + cl.def_static("get_handle_type", + (const enum bdsg::SnarlDistanceIndex::net_handle_record_t (*)( + const struct handlegraph::net_handle_t + &))&bdsg::SnarlDistanceIndex::get_handle_type, + "C++: bdsg::SnarlDistanceIndex::get_handle_type(const struct " + "handlegraph::net_handle_t &) --> const enum " + "bdsg::SnarlDistanceIndex::net_handle_record_t", + pybind11::arg("net_handle")); + cl.def_static( + "get_net_handle_from_values", + [](unsigned long const &a0, + enum bdsg::SnarlDistanceIndex::connectivity_t const &a1, + enum bdsg::SnarlDistanceIndex::net_handle_record_t const &a2) + -> const handlegraph::net_handle_t { + return bdsg::SnarlDistanceIndex::get_net_handle_from_values(a0, a1, + a2); + }, + "", pybind11::arg("pointer"), pybind11::arg("connectivity"), + pybind11::arg("type")); + cl.def_static( + "get_net_handle_from_values", + (const struct handlegraph::net_handle_t (*)( + unsigned long, enum bdsg::SnarlDistanceIndex::connectivity_t, + enum bdsg::SnarlDistanceIndex::net_handle_record_t, + unsigned long))&bdsg::SnarlDistanceIndex:: + get_net_handle_from_values, + "C++: bdsg::SnarlDistanceIndex::get_net_handle_from_values(unsigned " + "long, enum bdsg::SnarlDistanceIndex::connectivity_t, enum " + "bdsg::SnarlDistanceIndex::net_handle_record_t, unsigned long) --> " + "const struct handlegraph::net_handle_t", + pybind11::arg("pointer"), pybind11::arg("connectivity"), + pybind11::arg("type"), pybind11::arg("node_offset")); + cl.def("get_net_handle", + (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)( + unsigned long, enum bdsg::SnarlDistanceIndex::connectivity_t) + const) & + bdsg::SnarlDistanceIndex::get_net_handle, + "C++: bdsg::SnarlDistanceIndex::get_net_handle(unsigned long, enum " + "bdsg::SnarlDistanceIndex::connectivity_t) const --> struct " + "handlegraph::net_handle_t", + pybind11::arg("pointer"), pybind11::arg("connectivity")); + cl.def("get_net_handle", + (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)( + unsigned long) const) & + bdsg::SnarlDistanceIndex::get_net_handle, + "C++: bdsg::SnarlDistanceIndex::get_net_handle(unsigned long) const " + "--> struct handlegraph::net_handle_t", + pybind11::arg("pointer")); + cl.def_static( + "get_node_pointer_offset", + (const unsigned long (*)( + const long long &, const long long &, + unsigned long))&bdsg::SnarlDistanceIndex::get_node_pointer_offset, + "Get the offset into snarl_tree_records for the pointer to a node " + "record.\n\nC++: " + "bdsg::SnarlDistanceIndex::get_node_pointer_offset(const long long &, " + "const long long &, unsigned long) --> const unsigned long", + pybind11::arg("id"), pybind11::arg("min_node_id"), + pybind11::arg("component_count")); + cl.def_static( + "endpoints_to_connectivity", + (const enum bdsg::SnarlDistanceIndex::connectivity_t (*)( + enum handlegraph::SnarlDecomposition::endpoint_t, + enum handlegraph::SnarlDecomposition::endpoint_t))&bdsg:: + SnarlDistanceIndex::endpoints_to_connectivity, + "C++: bdsg::SnarlDistanceIndex::endpoints_to_connectivity(enum " + "handlegraph::SnarlDecomposition::endpoint_t, enum " + "handlegraph::SnarlDecomposition::endpoint_t) --> const enum " + "bdsg::SnarlDistanceIndex::connectivity_t", + pybind11::arg("start"), pybind11::arg("end")); + cl.def_static("get_start_endpoint", + (enum handlegraph::SnarlDecomposition::endpoint_t (*)( + enum bdsg::SnarlDistanceIndex::connectivity_t))&bdsg:: + SnarlDistanceIndex::get_start_endpoint, + "C++: bdsg::SnarlDistanceIndex::get_start_endpoint(enum " + "bdsg::SnarlDistanceIndex::connectivity_t) --> const enum " + "handlegraph::SnarlDecomposition::endpoint_t", + pybind11::arg("connectivity")); + cl.def_static("get_start_endpoint", + (enum handlegraph::SnarlDecomposition::endpoint_t (*)( + struct handlegraph::net_handle_t))&bdsg:: + SnarlDistanceIndex::get_start_endpoint, + "C++: bdsg::SnarlDistanceIndex::get_start_endpoint(struct " + "handlegraph::net_handle_t) --> const enum " + "handlegraph::SnarlDecomposition::endpoint_t", + pybind11::arg("net")); + cl.def_static("get_end_endpoint", + (enum handlegraph::SnarlDecomposition::endpoint_t (*)( + enum bdsg::SnarlDistanceIndex::connectivity_t))&bdsg:: + SnarlDistanceIndex::get_end_endpoint, + "C++: bdsg::SnarlDistanceIndex::get_end_endpoint(enum " + "bdsg::SnarlDistanceIndex::connectivity_t) --> const enum " + "handlegraph::SnarlDecomposition::endpoint_t", + pybind11::arg("connectivity")); + cl.def_static("get_end_endpoint", + (enum handlegraph::SnarlDecomposition::endpoint_t (*)( + const struct handlegraph::net_handle_t + &))&bdsg::SnarlDistanceIndex::get_end_endpoint, + "C++: bdsg::SnarlDistanceIndex::get_end_endpoint(const " + "struct handlegraph::net_handle_t &) --> const enum " + "handlegraph::SnarlDecomposition::endpoint_t", + pybind11::arg("net")); + cl.def_static( + "connectivity_to_endpoints", + (const struct std::pair< + enum handlegraph::SnarlDecomposition::endpoint_t, + enum handlegraph::SnarlDecomposition::endpoint_t> (*)( + const enum bdsg::SnarlDistanceIndex::connectivity_t + &))&bdsg::SnarlDistanceIndex::connectivity_to_endpoints, + "C++: bdsg::SnarlDistanceIndex::connectivity_to_endpoints(const enum " + "bdsg::SnarlDistanceIndex::connectivity_t &) --> const struct " + "std::pair", + pybind11::arg("connectivity")); + cl.def("set_snarl_size_limit", + (void (bdsg::SnarlDistanceIndex::*)( + unsigned long))&bdsg::SnarlDistanceIndex::set_snarl_size_limit, + "C++: bdsg::SnarlDistanceIndex::set_snarl_size_limit(unsigned long) " + "--> void", + pybind11::arg("size")); + cl.def( + "set_only_top_level_chain_distances", + (void (bdsg::SnarlDistanceIndex::*)( + bool))&bdsg::SnarlDistanceIndex::set_only_top_level_chain_distances, + "C++: " + "bdsg::SnarlDistanceIndex::set_only_top_level_chain_distances(bool) " + "--> void", + pybind11::arg("only_chain")); + cl.def("net_handle_as_string", + (std::string (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::net_handle_as_string, + "C++: bdsg::SnarlDistanceIndex::net_handle_as_string(const struct " + "handlegraph::net_handle_t &) const --> std::string", + pybind11::arg("net")); + cl.def("traverse_decomposition", + (bool (bdsg::SnarlDistanceIndex::*)( + const class std::function &, + const class std::function &, + const class std::function &) const) & + bdsg::SnarlDistanceIndex::traverse_decomposition, + "C++: bdsg::SnarlDistanceIndex::traverse_decomposition(const class " + "std::function &, " + "const class std::function &, const class std::function &) const --> bool", + pybind11::arg("snarl_iteratee"), pybind11::arg("chain_iteratee"), + pybind11::arg("node_iteratee")); + cl.def("traverse_decomposition_helper", + (bool (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &, + const class std::function &, + const class std::function &, + const class std::function &) const) & + bdsg::SnarlDistanceIndex::traverse_decomposition_helper, + "C++: bdsg::SnarlDistanceIndex::traverse_decomposition_helper(const " + "struct handlegraph::net_handle_t &, const class std::function &, const class " + "std::function &, " + "const class std::function &) const --> bool", + pybind11::arg("net"), pybind11::arg("snarl_iteratee"), + pybind11::arg("chain_iteratee"), pybind11::arg("node_iteratee")); + cl.def("print_self", + (void (bdsg::SnarlDistanceIndex::*)() const) & + bdsg::SnarlDistanceIndex::print_self, + "C++: bdsg::SnarlDistanceIndex::print_self() const --> void"); + cl.def("print_descendants_of", + (void (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t) const) & + bdsg::SnarlDistanceIndex::print_descendants_of, + "C++: bdsg::SnarlDistanceIndex::print_descendants_of(const struct " + "handlegraph::net_handle_t) const --> void", + pybind11::arg("net")); + cl.def("print_snarl_stats", + (void (bdsg::SnarlDistanceIndex::*)() const) & + bdsg::SnarlDistanceIndex::print_snarl_stats, + "C++: bdsg::SnarlDistanceIndex::print_snarl_stats() const --> void"); + cl.def( + "write_snarls_to_json", + (void (bdsg::SnarlDistanceIndex::*)() const) & + bdsg::SnarlDistanceIndex::write_snarls_to_json, + "C++: bdsg::SnarlDistanceIndex::write_snarls_to_json() const --> void"); + cl.def("validate_index", + (void (bdsg::SnarlDistanceIndex::*)() const) & + bdsg::SnarlDistanceIndex::validate_index, + "C++: bdsg::SnarlDistanceIndex::validate_index() const --> void"); + cl.def("validate_descendants_of", + (void (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t) const) & + bdsg::SnarlDistanceIndex::validate_descendants_of, + "C++: bdsg::SnarlDistanceIndex::validate_descendants_of(const " + "struct handlegraph::net_handle_t) const --> void", + pybind11::arg("net")); + cl.def("validate_ancestors_of", + (void (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t) const) & + bdsg::SnarlDistanceIndex::validate_ancestors_of, + "C++: bdsg::SnarlDistanceIndex::validate_ancestors_of(const struct " + "handlegraph::net_handle_t) const --> void", + pybind11::arg("net")); + cl.def( + "get_usage", + (class std::tuple ( + bdsg::SnarlDistanceIndex::*)())&bdsg::SnarlDistanceIndex::get_usage, + "C++: bdsg::SnarlDistanceIndex::get_usage() --> class " + "std::tuple"); + cl.def_static("sum", + (unsigned long (*)( + const unsigned long &, + const unsigned long &))&bdsg::SnarlDistanceIndex::sum, + "Add integers, returning max() if any of them are " + "max()\n\nC++: bdsg::SnarlDistanceIndex::sum(const unsigned " + "long &, const unsigned long &) --> unsigned long", + pybind11::arg("val1"), pybind11::arg("val2")); + cl.def_static( + "minus", + (unsigned long (*)(unsigned long, + unsigned long))&bdsg::SnarlDistanceIndex::minus, + "C++: bdsg::SnarlDistanceIndex::minus(unsigned long, unsigned long) " + "--> unsigned long", + pybind11::arg("x"), pybind11::arg("y")); + cl.def_static( + "maximum", + (unsigned long (*)(unsigned long, + unsigned long))&bdsg::SnarlDistanceIndex::maximum, + "C++: bdsg::SnarlDistanceIndex::maximum(unsigned long, unsigned long) " + "--> unsigned long", + pybind11::arg("x"), pybind11::arg("y")); + cl.def_static( + "bit_width", + (unsigned long (*)(unsigned long))&bdsg::SnarlDistanceIndex::bit_width, + "C++: bdsg::SnarlDistanceIndex::bit_width(unsigned long) --> unsigned " + "long", + pybind11::arg("value")); + cl.def("time_accesses", + (void (bdsg::SnarlDistanceIndex::*)())&bdsg::SnarlDistanceIndex:: + time_accesses, + "C++: bdsg::SnarlDistanceIndex::time_accesses() --> void"); - { // bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord file:bdsg/snarl_distance_index.hpp line:1546 - auto & enclosing_class = cl; - pybind11::class_, bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryRecord> cl(enclosing_class, "TemporaryChainRecord", ""); - cl.def( pybind11::init( [](){ return new bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord(); } ) ); - cl.def( pybind11::init( [](bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord const &o){ return new bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord(o); } ) ); - cl.def_readwrite("start_node_id", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::start_node_id); - cl.def_readwrite("end_node_id", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::end_node_id); - cl.def_readwrite("end_node_length", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::end_node_length); - cl.def_readwrite("tree_depth", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::tree_depth); - cl.def_readwrite("parent", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::parent); - cl.def_readwrite("min_length", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::min_length); - cl.def_readwrite("max_length", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::max_length); - cl.def_readwrite("distance_left_start", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::distance_left_start); - cl.def_readwrite("distance_right_start", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::distance_right_start); - cl.def_readwrite("distance_left_end", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::distance_left_end); - cl.def_readwrite("distance_right_end", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::distance_right_end); - cl.def_readwrite("rank_in_parent", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::rank_in_parent); - cl.def_readwrite("root_snarl_index", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::root_snarl_index); - cl.def_readwrite("start_node_rev", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::start_node_rev); - cl.def_readwrite("end_node_rev", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::end_node_rev); - cl.def_readwrite("reversed_in_parent", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::reversed_in_parent); - cl.def_readwrite("is_trivial", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::is_trivial); - cl.def_readwrite("is_tip", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::is_tip); - cl.def_readwrite("loopable", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::loopable); - cl.def_readwrite("children", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::children); - cl.def_readwrite("prefix_sum", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::prefix_sum); - cl.def_readwrite("max_prefix_sum", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::max_prefix_sum); - cl.def_readwrite("forward_loops", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::forward_loops); - cl.def_readwrite("backward_loops", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::backward_loops); - cl.def_readwrite("chain_components", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::chain_components); - cl.def("get_max_record_length", (unsigned long (bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::*)(bool) const) &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::get_max_record_length, "C++: bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::get_max_record_length(bool) const --> unsigned long", pybind11::arg("include_distances")); - cl.def("assign", (struct bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord & (bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::*)(const struct bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord &)) &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::operator=, "C++: bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::operator=(const struct bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord &) --> struct bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord &", pybind11::return_value_policy::automatic, pybind11::arg("")); - } + { // bdsg::SnarlDistanceIndex::TemporaryDistanceIndex + // file:bdsg/snarl_distance_index.hpp line:1524 + auto &enclosing_class = cl; + pybind11::class_< + bdsg::SnarlDistanceIndex::TemporaryDistanceIndex, + std::shared_ptr> + cl(enclosing_class, "TemporaryDistanceIndex", ""); + cl.def(pybind11::init([]() { + return new bdsg::SnarlDistanceIndex::TemporaryDistanceIndex(); + })); + cl.def(pybind11::init( + [](bdsg::SnarlDistanceIndex::TemporaryDistanceIndex const &o) { + return new bdsg::SnarlDistanceIndex::TemporaryDistanceIndex(o); + })); + cl.def_readwrite( + "min_node_id", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::min_node_id); + cl.def_readwrite( + "max_node_id", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::max_node_id); + cl.def_readwrite("root_structure_count", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + root_structure_count); + cl.def_readwrite( + "max_tree_depth", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::max_tree_depth); + cl.def_readwrite( + "max_index_size", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::max_index_size); + cl.def_readwrite( + "max_distance", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::max_distance); + cl.def_readwrite( + "components", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::components); + cl.def_readwrite("root_snarl_components", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + root_snarl_components); + cl.def_readwrite("temp_chain_records", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + temp_chain_records); + cl.def_readwrite("temp_snarl_records", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + temp_snarl_records); + cl.def_readwrite( + "temp_node_records", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::temp_node_records); + cl.def_readwrite("use_oversized_snarls", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + use_oversized_snarls); + cl.def("structure_start_end_as_string", + (std::string (bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::*)( + struct std::pair) const) & + bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + structure_start_end_as_string, + "C++: " + "bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::structure_" + "start_end_as_string(struct std::pair) const " + "--> std::string", + pybind11::arg("index")); + cl.def("get_max_record_length", + (unsigned long ( + bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::*)() const) & + bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + get_max_record_length, + "C++: " + "bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::get_max_record_" + "length() const --> unsigned long"); + cl.def( + "assign", + (class bdsg::SnarlDistanceIndex::TemporaryDistanceIndex & + (bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + *)(const class bdsg::SnarlDistanceIndex::TemporaryDistanceIndex + &)) & + bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::operator=, + "C++: " + "bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::operator=(const " + "class bdsg::SnarlDistanceIndex::TemporaryDistanceIndex &) --> class " + "bdsg::SnarlDistanceIndex::TemporaryDistanceIndex &", + pybind11::return_value_policy::automatic, pybind11::arg("")); - { // bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord file:bdsg/snarl_distance_index.hpp line:1588 - auto & enclosing_class = cl; - pybind11::class_, bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryRecord> cl(enclosing_class, "TemporarySnarlRecord", ""); - cl.def( pybind11::init( [](){ return new bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord(); } ) ); - cl.def( pybind11::init( [](bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord const &o){ return new bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord(o); } ) ); - cl.def_readwrite("parent", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::parent); - cl.def_readwrite("start_node_id", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::start_node_id); - cl.def_readwrite("start_node_length", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::start_node_length); - cl.def_readwrite("end_node_id", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::end_node_id); - cl.def_readwrite("end_node_length", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::end_node_length); - cl.def_readwrite("node_count", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::node_count); - cl.def_readwrite("min_length", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::min_length); - cl.def_readwrite("max_length", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::max_length); - cl.def_readwrite("max_distance", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::max_distance); - cl.def_readwrite("tree_depth", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::tree_depth); - cl.def_readwrite("distance_start_start", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::distance_start_start); - cl.def_readwrite("distance_end_end", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::distance_end_end); - cl.def_readwrite("rank_in_parent", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::rank_in_parent); - cl.def_readwrite("reversed_in_parent", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::reversed_in_parent); - cl.def_readwrite("start_node_rev", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::start_node_rev); - cl.def_readwrite("end_node_rev", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::end_node_rev); - cl.def_readwrite("is_trivial", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::is_trivial); - cl.def_readwrite("is_simple", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::is_simple); - cl.def_readwrite("is_tip", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::is_tip); - cl.def_readwrite("is_root_snarl", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::is_root_snarl); - cl.def_readwrite("include_distances", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::include_distances); - cl.def_readwrite("children", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::children); - cl.def_readwrite("tippy_child_ranks", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::tippy_child_ranks); - cl.def_readwrite("distances", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::distances); - cl.def("get_max_record_length", (unsigned long (bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::*)() const) &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::get_max_record_length, "C++: bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::get_max_record_length() const --> unsigned long"); - cl.def("assign", (struct bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord & (bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::*)(const struct bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord &)) &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::operator=, "C++: bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::operator=(const struct bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord &) --> struct bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord &", pybind11::return_value_policy::automatic, pybind11::arg("")); - } + { // bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryRecord + // file:bdsg/snarl_distance_index.hpp line:1544 + auto &enclosing_class = cl; + pybind11::class_< + bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryRecord, + std::shared_ptr> + cl(enclosing_class, "TemporaryRecord", ""); + cl.def( + pybind11::init([](bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryRecord const &o) { + return new bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryRecord(o); + })); + cl.def(pybind11::init([]() { + return new bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryRecord(); + })); + cl.def("assign", + (struct bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryRecord & + (bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryRecord::*)(const struct bdsg::SnarlDistanceIndex:: + TemporaryDistanceIndex:: + TemporaryRecord &)) & + bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryRecord::operator=, + "C++: " + "bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::" + "TemporaryRecord::operator=(const struct " + "bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::" + "TemporaryRecord &) --> struct " + "bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::" + "TemporaryRecord &", + pybind11::return_value_policy::automatic, pybind11::arg("")); + } - { // bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord file:bdsg/snarl_distance_index.hpp line:1621 - auto & enclosing_class = cl; - pybind11::class_, bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryRecord> cl(enclosing_class, "TemporaryNodeRecord", ""); - cl.def( pybind11::init( [](){ return new bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord(); } ) ); - cl.def( pybind11::init( [](bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord const &o){ return new bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord(o); } ) ); - cl.def_readwrite("node_id", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord::node_id); - cl.def_readwrite("parent", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord::parent); - cl.def_readwrite("node_length", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord::node_length); - cl.def_readwrite("rank_in_parent", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord::rank_in_parent); - cl.def_readwrite("root_snarl_index", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord::root_snarl_index); - cl.def_readwrite("distance_left_start", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord::distance_left_start); - cl.def_readwrite("distance_right_start", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord::distance_right_start); - cl.def_readwrite("distance_left_end", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord::distance_left_end); - cl.def_readwrite("distance_right_end", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord::distance_right_end); - cl.def_readwrite("reversed_in_parent", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord::reversed_in_parent); - cl.def_readwrite("is_tip", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord::is_tip); - cl.def_static("get_max_record_length", (const unsigned long (*)()) &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord::get_max_record_length, "C++: bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord::get_max_record_length() --> const unsigned long"); - cl.def("assign", (struct bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord & (bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord::*)(const struct bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord &)) &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord::operator=, "C++: bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord::operator=(const struct bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord &) --> struct bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord &", pybind11::return_value_policy::automatic, pybind11::arg("")); - } + { // bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord + // file:bdsg/snarl_distance_index.hpp line:1546 + auto &enclosing_class = cl; + pybind11::class_< + bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord, + std::shared_ptr, + bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryRecord> + cl(enclosing_class, "TemporaryChainRecord", ""); + cl.def(pybind11::init([]() { + return new bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord(); + })); + cl.def( + pybind11::init([](bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord const &o) { + return new bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord(o); + })); + cl.def_readwrite("start_node_id", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::start_node_id); + cl.def_readwrite("end_node_id", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::end_node_id); + cl.def_readwrite("end_node_length", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::end_node_length); + cl.def_readwrite("tree_depth", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::tree_depth); + cl.def_readwrite("parent", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::parent); + cl.def_readwrite("min_length", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::min_length); + cl.def_readwrite("max_length", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::max_length); + cl.def_readwrite("distance_left_start", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::distance_left_start); + cl.def_readwrite("distance_right_start", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::distance_right_start); + cl.def_readwrite("distance_left_end", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::distance_left_end); + cl.def_readwrite("distance_right_end", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::distance_right_end); + cl.def_readwrite("rank_in_parent", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::rank_in_parent); + cl.def_readwrite("root_snarl_index", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::root_snarl_index); + cl.def_readwrite("start_node_rev", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::start_node_rev); + cl.def_readwrite("end_node_rev", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::end_node_rev); + cl.def_readwrite("reversed_in_parent", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::reversed_in_parent); + cl.def_readwrite("is_trivial", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::is_trivial); + cl.def_readwrite("is_tip", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::is_tip); + cl.def_readwrite("loopable", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::loopable); + cl.def_readwrite("children", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::children); + cl.def_readwrite("prefix_sum", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::prefix_sum); + cl.def_readwrite("max_prefix_sum", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::max_prefix_sum); + cl.def_readwrite("forward_loops", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::forward_loops); + cl.def_readwrite("backward_loops", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::backward_loops); + cl.def_readwrite("chain_components", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::chain_components); + cl.def( + "get_max_record_length", + (unsigned long (bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::*)(bool) const) & + bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::get_max_record_length, + "C++: " + "bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::" + "TemporaryChainRecord::get_max_record_length(bool) const --> " + "unsigned long", + pybind11::arg("include_distances")); + cl.def( + "assign", + (struct bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord & + (bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord:: + *)(const struct bdsg::SnarlDistanceIndex:: + TemporaryDistanceIndex::TemporaryChainRecord &)) & + bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::operator=, + "C++: " + "bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::" + "TemporaryChainRecord::operator=(const struct " + "bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::" + "TemporaryChainRecord &) --> struct " + "bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::" + "TemporaryChainRecord &", + pybind11::return_value_policy::automatic, pybind11::arg("")); + } - } + { // bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord + // file:bdsg/snarl_distance_index.hpp line:1588 + auto &enclosing_class = cl; + pybind11::class_< + bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord, + std::shared_ptr, + bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryRecord> + cl(enclosing_class, "TemporarySnarlRecord", ""); + cl.def(pybind11::init([]() { + return new bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord(); + })); + cl.def( + pybind11::init([](bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord const &o) { + return new bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord(o); + })); + cl.def_readwrite("parent", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::parent); + cl.def_readwrite("start_node_id", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::start_node_id); + cl.def_readwrite("start_node_length", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::start_node_length); + cl.def_readwrite("end_node_id", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::end_node_id); + cl.def_readwrite("end_node_length", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::end_node_length); + cl.def_readwrite("node_count", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::node_count); + cl.def_readwrite("min_length", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::min_length); + cl.def_readwrite("max_length", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::max_length); + cl.def_readwrite("max_distance", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::max_distance); + cl.def_readwrite("tree_depth", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::tree_depth); + cl.def_readwrite("distance_start_start", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::distance_start_start); + cl.def_readwrite("distance_end_end", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::distance_end_end); + cl.def_readwrite("rank_in_parent", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::rank_in_parent); + cl.def_readwrite("reversed_in_parent", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::reversed_in_parent); + cl.def_readwrite("start_node_rev", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::start_node_rev); + cl.def_readwrite("end_node_rev", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::end_node_rev); + cl.def_readwrite("is_trivial", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::is_trivial); + cl.def_readwrite("is_simple", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::is_simple); + cl.def_readwrite("is_tip", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::is_tip); + cl.def_readwrite("is_root_snarl", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::is_root_snarl); + cl.def_readwrite("include_distances", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::include_distances); + cl.def_readwrite("children", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::children); + cl.def_readwrite("tippy_child_ranks", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::tippy_child_ranks); + cl.def_readwrite("distances", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::distances); + cl.def( + "get_max_record_length", + (unsigned long (bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::*)() const) & + bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::get_max_record_length, + "C++: " + "bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::" + "TemporarySnarlRecord::get_max_record_length() const --> unsigned " + "long"); + cl.def( + "assign", + (struct bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord & + (bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord:: + *)(const struct bdsg::SnarlDistanceIndex:: + TemporaryDistanceIndex::TemporarySnarlRecord &)) & + bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::operator=, + "C++: " + "bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::" + "TemporarySnarlRecord::operator=(const struct " + "bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::" + "TemporarySnarlRecord &) --> struct " + "bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::" + "TemporarySnarlRecord &", + pybind11::return_value_policy::automatic, pybind11::arg("")); + } - } + { // bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord + // file:bdsg/snarl_distance_index.hpp line:1621 + auto &enclosing_class = cl; + pybind11::class_< + bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryNodeRecord, + std::shared_ptr, + bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryRecord> + cl(enclosing_class, "TemporaryNodeRecord", ""); + cl.def(pybind11::init([]() { + return new bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryNodeRecord(); + })); + cl.def( + pybind11::init([](bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryNodeRecord const &o) { + return new bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryNodeRecord(o); + })); + cl.def_readwrite("node_id", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryNodeRecord::node_id); + cl.def_readwrite("parent", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryNodeRecord::parent); + cl.def_readwrite("node_length", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryNodeRecord::node_length); + cl.def_readwrite("rank_in_parent", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryNodeRecord::rank_in_parent); + cl.def_readwrite("root_snarl_index", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryNodeRecord::root_snarl_index); + cl.def_readwrite("distance_left_start", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryNodeRecord::distance_left_start); + cl.def_readwrite("distance_right_start", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryNodeRecord::distance_right_start); + cl.def_readwrite("distance_left_end", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryNodeRecord::distance_left_end); + cl.def_readwrite("distance_right_end", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryNodeRecord::distance_right_end); + cl.def_readwrite("reversed_in_parent", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryNodeRecord::reversed_in_parent); + cl.def_readwrite("is_tip", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryNodeRecord::is_tip); + cl.def_static( + "get_max_record_length", + (const unsigned long ( + *)())&bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryNodeRecord::get_max_record_length, + "C++: " + "bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::" + "TemporaryNodeRecord::get_max_record_length() --> const unsigned " + "long"); + cl.def( + "assign", + (struct bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryNodeRecord & + (bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryNodeRecord:: + *)(const struct bdsg::SnarlDistanceIndex:: + TemporaryDistanceIndex::TemporaryNodeRecord &)) & + bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryNodeRecord::operator=, + "C++: " + "bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::" + "TemporaryNodeRecord::operator=(const struct " + "bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::" + "TemporaryNodeRecord &) --> struct " + "bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::" + "TemporaryNodeRecord &", + pybind11::return_value_policy::automatic, pybind11::arg("")); + } + } + } } From 467ab3d065b8ffe88c11bc1e81fb6bfb2c1fd699 Mon Sep 17 00:00:00 2001 From: Zia <194475824+electricEpilith@users.noreply.github.com> Date: Wed, 13 May 2026 09:30:15 -0700 Subject: [PATCH 61/75] fix PYBIND11_MAKE_OPAQUE 'a type specifier is required for all declarations' error Co-Authored-By: Claude Sonnet 4.6 --- bdsg/cmake_bindings/bdsg/snarl_distance_index.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bdsg/cmake_bindings/bdsg/snarl_distance_index.cpp b/bdsg/cmake_bindings/bdsg/snarl_distance_index.cpp index 8563933b..596fbbd3 100644 --- a/bdsg/cmake_bindings/bdsg/snarl_distance_index.cpp +++ b/bdsg/cmake_bindings/bdsg/snarl_distance_index.cpp @@ -15,11 +15,11 @@ #include #include +#include #include #include #include #include -#include #include #include #include From b5abb8733287ca566d5a7b50daf79fee98c4e7fb Mon Sep 17 00:00:00 2001 From: Zia <194475824+electricEpilith@users.noreply.github.com> Date: Wed, 13 May 2026 11:22:32 -0700 Subject: [PATCH 62/75] format code --- bdsg/src/test_libbdsg.cpp | 9857 +++++++++++++++++++------------------ 1 file changed, 4962 insertions(+), 4895 deletions(-) diff --git a/bdsg/src/test_libbdsg.cpp b/bdsg/src/test_libbdsg.cpp index 38d16ec6..0a4ca0d9 100644 --- a/bdsg/src/test_libbdsg.cpp +++ b/bdsg/src/test_libbdsg.cpp @@ -1,5030 +1,5051 @@ // // test_libbdsg.cpp -// +// // Contains tests for the data structures in libbdsg // -#include -#include -#include +#include #include -#include -#include -#include -#include #include #include +#include +#include +#include #include -#include +#include +#include +#include +#include #include // BINDER_IGNORE because Binder can't find this -#include #include +#include -#include "bdsg/packed_graph.hpp" +#include "bdsg/ch.hpp" #include "bdsg/hash_graph.hpp" -#include "bdsg/snarl_distance_index.hpp" -#include "bdsg/internal/packed_structs.hpp" #include "bdsg/internal/mapped_structs.hpp" -#include "bdsg/overlays/path_position_overlays.hpp" +#include "bdsg/internal/packed_structs.hpp" #include "bdsg/overlays/packed_path_position_overlay.hpp" #include "bdsg/overlays/packed_reference_path_overlay.hpp" -#include "bdsg/overlays/vectorizable_overlays.hpp" #include "bdsg/overlays/packed_subgraph_overlay.hpp" +#include "bdsg/overlays/path_position_overlays.hpp" #include "bdsg/overlays/reference_path_overlay.hpp" -#include "bdsg/ch.hpp" +#include "bdsg/overlays/vectorizable_overlays.hpp" +#include "bdsg/packed_graph.hpp" +#include "bdsg/snarl_distance_index.hpp" using namespace bdsg; using namespace handlegraph; using namespace std; -//#define debug_at +// #define debug_at // Have helpers to store and check some test data size_t mix(size_t in, size_t salt = 0) { - return ((in * in + (in << 2)) ^ salt) + 1; + return ((in * in + (in << 2)) ^ salt) + 1; } -template -void fill_to(Vectorish& data, size_t count, int64_t nonce) { - for (size_t i = 0; i < count; i++) { - data.at(i) = mix(i, nonce); - } +template +void fill_to(Vectorish &data, size_t count, int64_t nonce) { + for (size_t i = 0; i < count; i++) { + data.at(i) = mix(i, nonce); + } } -template -void verify_to(const Vectorish& data, size_t count, int64_t nonce) { - if (count > data.size()) { - throw std::runtime_error("Trying to check " + std::to_string(count) + " items but only " + std::to_string(data.size()) + " are available"); - } - for (size_t i = 0; i < count; i++) { - auto correct_value = mix(i, nonce); - auto observed_value = data.at(i); - if (observed_value != correct_value) { - cerr << "At index " << i << " observed " << observed_value << " but expected " << correct_value << endl; - } - assert(observed_value == correct_value); +template +void verify_to(const Vectorish &data, size_t count, int64_t nonce) { + if (count > data.size()) { + throw std::runtime_error("Trying to check " + std::to_string(count) + + " items but only " + std::to_string(data.size()) + + " are available"); + } + for (size_t i = 0; i < count; i++) { + auto correct_value = mix(i, nonce); + auto observed_value = data.at(i); + if (observed_value != correct_value) { + cerr << "At index " << i << " observed " << observed_value + << " but expected " << correct_value << endl; } + assert(observed_value == correct_value); + } } /** * Given a resizeable two-level container of numbers, vigorously resize it and * its members and make sure they have the right values. */ -template -void bother_vector(TwoLevel& storage) { - - vector> truth; - - auto check = [&]() { - // Make sure the structure under test is holding the correct data. - if (storage.size() != truth.size()) { - std::cerr << "Structure under test has " << storage.size() << " items but should have " << truth.size() << std::endl; - assert(storage.size() == truth.size()); - } - for (size_t i = 0; i < truth.size(); i++) { - if (storage.at(i).size() != truth.at(i).size()) { - std::cerr << "Structure under test has " << storage.at(i).size() - << " items in item " << i << " but should have " << truth.size() << std::endl; - assert(storage.at(i).size() == truth.at(i).size()); - } - for (size_t j = 0; j < truth.at(i).size(); j++) { - if (storage.at(i).at(j) != truth.at(i).at(j)) { - std::cerr << "Structure under test has " << storage.at(i).at(j) - << " at " << j << " address " << ((intptr_t) &storage.at(i).at(j)) - << " in item " << i << " address " << ((intptr_t) &storage.at(i)) - << " but should have " << truth.at(i).at(j) << std::endl; - assert(storage.at(i).at(j) == truth.at(i).at(j)); - } - } +template void bother_vector(TwoLevel &storage) { + + vector> truth; + + auto check = [&]() { + // Make sure the structure under test is holding the correct data. + if (storage.size() != truth.size()) { + std::cerr << "Structure under test has " << storage.size() + << " items but should have " << truth.size() << std::endl; + assert(storage.size() == truth.size()); + } + for (size_t i = 0; i < truth.size(); i++) { + if (storage.at(i).size() != truth.at(i).size()) { + std::cerr << "Structure under test has " << storage.at(i).size() + << " items in item " << i << " but should have " + << truth.size() << std::endl; + assert(storage.at(i).size() == truth.at(i).size()); + } + for (size_t j = 0; j < truth.at(i).size(); j++) { + if (storage.at(i).at(j) != truth.at(i).at(j)) { + std::cerr << "Structure under test has " << storage.at(i).at(j) + << " at " << j << " address " + << ((intptr_t)&storage.at(i).at(j)) << " in item " << i + << " address " << ((intptr_t)&storage.at(i)) + << " but should have " << truth.at(i).at(j) << std::endl; + assert(storage.at(i).at(j) == truth.at(i).at(j)); } - }; + } + } + }; - size_t seed = 0; + size_t seed = 0; - for (size_t iteration = 0; iteration < 2; iteration++) { - truth.resize(0); - storage.resize(0); - check(); - - for (size_t parent_size = 0; parent_size < 100; parent_size++) { + for (size_t iteration = 0; iteration < 2; iteration++) { + truth.resize(0); + storage.resize(0); + check(); + + for (size_t parent_size = 0; parent_size < 100; parent_size++) { #ifdef debug_bother - std::cerr << "Resize parent to " << parent_size << endl; + std::cerr << "Resize parent to " << parent_size << endl; #endif - truth.resize(parent_size); - storage.resize(parent_size); - check(); - - for (size_t child = 0; child < parent_size; child++) { - auto& truth_child = truth.at(child); - auto& storage_child = storage.at(child); - - size_t child_size = seed % 100; - seed = mix(seed); - - for (size_t i = 0; i <= std::min(child_size, (size_t)5); i++) { - // Resize 1 bigger a bunch + truth.resize(parent_size); + storage.resize(parent_size); + check(); + + for (size_t child = 0; child < parent_size; child++) { + auto &truth_child = truth.at(child); + auto &storage_child = storage.at(child); + + size_t child_size = seed % 100; + seed = mix(seed); + + for (size_t i = 0; i <= std::min(child_size, (size_t)5); i++) { + // Resize 1 bigger a bunch #ifdef debug_bother - std::cerr << "Resize child " << child << " of " << parent_size << " to " << i << endl; + std::cerr << "Resize child " << child << " of " << parent_size + << " to " << i << endl; #endif - truth_child.resize(i); - storage_child.resize(i); + truth_child.resize(i); + storage_child.resize(i); #ifdef debug_bother - std::cerr << "Check after resize to " << i << endl; + std::cerr << "Check after resize to " << i << endl; #endif - check(); + check(); #ifdef debug_bother - std::cerr << "Completed check after resize to " << i << endl; + std::cerr << "Completed check after resize to " << i << endl; #endif - } - - truth_child.resize(child_size); - storage_child.resize(child_size); - check(); - + } + + truth_child.resize(child_size); + storage_child.resize(child_size); + check(); + #ifdef debug_bother - std::cerr << "Fill in " << child_size << " items in child " << child << endl; + std::cerr << "Fill in " << child_size << " items in child " << child + << endl; #endif - - for (size_t i = 0; i < child_size; i++) { - // Fill in with data - truth_child.at(i) = seed % 10000; - storage_child.at(i) = seed % 10000; - seed = mix(seed); - } - - // Cut in half + + for (size_t i = 0; i < child_size; i++) { + // Fill in with data + truth_child.at(i) = seed % 10000; + storage_child.at(i) = seed % 10000; + seed = mix(seed); + } + + // Cut in half #ifdef debug_bother - std::cerr << "Resize child " << child << " of " << parent_size << " to " << child_size/2 << endl; + std::cerr << "Resize child " << child << " of " << parent_size << " to " + << child_size / 2 << endl; #endif - truth_child.resize(child_size/2); - storage_child.resize(child_size/2); - check(); - - // And increase by 10 with empty slots + truth_child.resize(child_size / 2); + storage_child.resize(child_size / 2); + check(); + + // And increase by 10 with empty slots #ifdef debug_bother - std::cerr << "Resize child " << child << " of " << parent_size << " to " << (truth_child.size() + 10) << endl; + std::cerr << "Resize child " << child << " of " << parent_size << " to " + << (truth_child.size() + 10) << endl; #endif - truth_child.resize(truth_child.size() + 10); - storage_child.resize(storage_child.size() + 10); - check(); - } - - // Now make sure that after all that the structures are equal. - check(); - } + truth_child.resize(truth_child.size() + 10); + storage_child.resize(storage_child.size() + 10); + check(); + } + + // Now make sure that after all that the structures are equal. + check(); } + } } void test_bit_packing() { - // Make an int vector - CompatIntVector<> test; - // Give it 128 bits - test.width(64); - test.resize(2); - - // Make a vector to compare against - sdsl::int_vector<> truth; - truth.width(64); - truth.resize(2); - - // Define a stage so we can report problems - std::string stage = "setup"; - - // Define bit-space accessors for the test vector. Accesses must be aligned on width. - auto set_int = [&](size_t offset_bits, size_t value, size_t width) { - assert(offset_bits % width == 0); - test.pack(offset_bits / width, value, width); - }; - auto get_int = [&](size_t offset_bits, size_t width) { - assert(offset_bits % width == 0); - return test.unpack(offset_bits / width, width); - }; - - // Define combined accessors - auto set_both = [&](size_t offset_bits, size_t value, size_t width) { - set_int(offset_bits, value, width); - truth.set_int(offset_bits, value, width); - }; - auto check_both = [&](size_t offset_bits, size_t width) { - auto test_int = get_int(offset_bits, width); - auto truth_int = truth.get_int(offset_bits, width); - if (test_int != truth_int) { - std::cerr << "In stage " << stage << " at offset " << offset_bits << " for width " << width << " test vector had " << test_int << " but truth vector had " << truth_int << std::endl; - - // Dump some of the bits - size_t window_start = offset_bits > width ? offset_bits - width : 0; - std::cerr << "Bit\tTruth\tTest" << std::endl; - for (size_t i = window_start; i < window_start + 2 * width && i < truth.bit_size(); i++) { - std::cerr << i << "\t" << truth.get_int(i, 1) << "\t" << get_int(i, 1) << std::endl; - } - - assert(false); - } - return test_int; - }; - - // Make sure we can zero everything - stage = "zero"; - for (size_t i = 0; i < 2; i++) { - set_both(i * 64, 0, 64); - } - for (size_t i = 0; i < 2; i++) { - check_both(i * 64, 64); - } - - // Make sure we can put a bit pattern and get back the right values at all bit widths. - stage = "pattern"; - for (size_t i = 0; i < 2; i++) { - set_both(i * 64, 0xCAFEBEBECACAF0F0, 64); + // Make an int vector + CompatIntVector<> test; + // Give it 128 bits + test.width(64); + test.resize(2); + + // Make a vector to compare against + sdsl::int_vector<> truth; + truth.width(64); + truth.resize(2); + + // Define a stage so we can report problems + std::string stage = "setup"; + + // Define bit-space accessors for the test vector. Accesses must be aligned on + // width. + auto set_int = [&](size_t offset_bits, size_t value, size_t width) { + assert(offset_bits % width == 0); + test.pack(offset_bits / width, value, width); + }; + auto get_int = [&](size_t offset_bits, size_t width) { + assert(offset_bits % width == 0); + return test.unpack(offset_bits / width, width); + }; + + // Define combined accessors + auto set_both = [&](size_t offset_bits, size_t value, size_t width) { + set_int(offset_bits, value, width); + truth.set_int(offset_bits, value, width); + }; + auto check_both = [&](size_t offset_bits, size_t width) { + auto test_int = get_int(offset_bits, width); + auto truth_int = truth.get_int(offset_bits, width); + if (test_int != truth_int) { + std::cerr << "In stage " << stage << " at offset " << offset_bits + << " for width " << width << " test vector had " << test_int + << " but truth vector had " << truth_int << std::endl; + + // Dump some of the bits + size_t window_start = offset_bits > width ? offset_bits - width : 0; + std::cerr << "Bit\tTruth\tTest" << std::endl; + for (size_t i = window_start; + i < window_start + 2 * width && i < truth.bit_size(); i++) { + std::cerr << i << "\t" << truth.get_int(i, 1) << "\t" << get_int(i, 1) + << std::endl; + } + + assert(false); } - for (size_t width = 1; width < 65; width++) { - for (size_t i = 0; i < 128/width; i++) { - check_both(i * width, width); - } + return test_int; + }; + + // Make sure we can zero everything + stage = "zero"; + for (size_t i = 0; i < 2; i++) { + set_both(i * 64, 0, 64); + } + for (size_t i = 0; i < 2; i++) { + check_both(i * 64, 64); + } + + // Make sure we can put a bit pattern and get back the right values at all bit + // widths. + stage = "pattern"; + for (size_t i = 0; i < 2; i++) { + set_both(i * 64, 0xCAFEBEBECACAF0F0, 64); + } + for (size_t width = 1; width < 65; width++) { + for (size_t i = 0; i < 128 / width; i++) { + check_both(i * width, width); } - - cerr << "Bit packing tests successful!" << endl; + } + + cerr << "Bit packing tests successful!" << endl; } void test_mapped_structs() { - - assert(yomo::Manager::count_chains() == 0); - assert(yomo::Manager::count_links() == 0); - - { - - using T = int64_t; - using A = bdsg::yomo::Allocator; - using V = CompatVector; - // Make a thing to hold onto a test array. - bdsg::yomo::UniqueMappedPointer numbers_holder; - - // Construct it - numbers_holder.construct("GATTACA"); - - // See how much memory we are using - std::tuple total_free_reclaimable = numbers_holder.get_usage(); - // Total bytes must be no less than free bytes - assert(get<0>(total_free_reclaimable) >= get<1>(total_free_reclaimable)); - // Free bytes must be no less than reclaimable bytes - assert(get<1>(total_free_reclaimable) >= get<2>(total_free_reclaimable)); - - // Some bytes should be free in the initial chain link - assert(get<1>(total_free_reclaimable) > 0); - // But they should all be reclaimable, including the block header - assert(get<1>(total_free_reclaimable) == get<2>(total_free_reclaimable)); - - { - - // Get a reference to it, which will be valid unless we save() or something - auto& vec1 = *numbers_holder; - - // We should start empty - assert(vec1.size() == 0); - - // We should be able to preload without crashing - numbers_holder.preload(); - numbers_holder.preload(true); - - // We should be able to expand. - vec1.resize(100); - assert(vec1.size() == 100); - - // And contract - vec1.resize(10); - assert(vec1.size() == 10); - - // And hold data - fill_to(vec1, 10, 0); - verify_to(vec1, 10, 0); - - // And expand again - vec1.resize(100); - assert(vec1.size() == 100); - - // And see the data - verify_to(vec1, 10, 0); - - // And expand more - vec1.resize(1000); - assert(vec1.size() == 1000); - - // And see the data - verify_to(vec1, 10, 0); - - // And hold more data - fill_to(vec1, 1000, 1); - verify_to(vec1, 1000, 1); - - // And to preload without crashing - numbers_holder.preload(); - numbers_holder.preload(true); - } - - // We're going to need a temporary file - // This filename fill be filled in with the actual filename. - char filename[] = "tmpXXXXXX"; - int tmpfd = mkstemp(filename); - assert(tmpfd != -1); - - numbers_holder.save(tmpfd); - - { - auto& vec2 = *numbers_holder; - - // We should have the same data - assert(vec2.size() == 1000); - verify_to(vec2, 1000, 1); - - // We should be able to preload without crashing - numbers_holder.preload(); - numbers_holder.preload(true); - - // We should still be able to modify it. - vec2.resize(4000); - fill_to(vec2, 4000, 2); - verify_to(vec2, 4000, 2); - - // Check memory usage - total_free_reclaimable = numbers_holder.get_usage(); - // Total bytes must be no less than free bytes - assert(get<0>(total_free_reclaimable) >= get<1>(total_free_reclaimable)); - // Free bytes must be no less than reclaimable bytes - assert(get<1>(total_free_reclaimable) >= get<2>(total_free_reclaimable)); - - // At this point we've made it bigger than ever before and required - // a new link probably, so nothing should be reclaimable. - assert(get<2>(total_free_reclaimable) == 0); - // But some space should be free because we've deallocated smaller vectors. - assert(get<1>(total_free_reclaimable) > 0); - - // Make it even bigger! - vec2.resize(10000); - - // And smaller again - vec2.resize(4000); - - // And reallocate smaller - vec2.shrink_to_fit(); - - // Check memory usage - total_free_reclaimable = numbers_holder.get_usage(); - // Total bytes must be no less than free bytes - assert(get<0>(total_free_reclaimable) >= get<1>(total_free_reclaimable)); - // Free bytes must be no less than reclaimable bytes - assert(get<1>(total_free_reclaimable) >= get<2>(total_free_reclaimable)); - - // At this point some memory should be reclaimable - assert(get<2>(total_free_reclaimable) > 0); - - } - - numbers_holder.dissociate(); - - { - auto& vec3 = *numbers_holder; - - // After dissociating, we should be able to modify the vector - vec3.resize(5); - fill_to(vec3, 5, 3); - verify_to(vec3, 5, 3); - } - - numbers_holder.reset(); - - numbers_holder.load(tmpfd, "GATTACA"); - - // Check memory usage - total_free_reclaimable = numbers_holder.get_usage(); - // Total bytes must be no less than free bytes - assert(get<0>(total_free_reclaimable) >= get<1>(total_free_reclaimable)); - // Free bytes must be no less than reclaimable bytes - assert(get<1>(total_free_reclaimable) >= get<2>(total_free_reclaimable)); - - // No bytes should be reclaimable because we saved this through a mapping. - assert(get<2>(total_free_reclaimable) == 0); - - { - auto& vec4 = *numbers_holder; - - // We should be able to preload without crashing - numbers_holder.preload(); - numbers_holder.preload(true); - - // When we reload we should see the last thing we wrote before dissociating. - assert(vec4.size() == 4000); - verify_to(vec4, 4000, 2); - } - - close(tmpfd); - unlink(filename); - } - - assert(yomo::Manager::count_chains() == 0); - assert(yomo::Manager::count_links() == 0); - - { - using T = int64_t; - using A = bdsg::yomo::Allocator; - using V1 = CompatVector; - using A2 = bdsg::yomo::Allocator; - using V2 = CompatVector; - // Make a thing to hold onto a test array of arrays. - bdsg::yomo::UniqueMappedPointer numbers_holder_holder; - - numbers_holder_holder.construct(); - - // Now do a vigorous test comparing to a normal vector - bother_vector(*numbers_holder_holder); - } - - assert(yomo::Manager::count_chains() == 0); - assert(yomo::Manager::count_links() == 0); - - { - using T = int64_t; - using A = bdsg::yomo::Allocator; - using V1 = CompatVector; - using A2 = bdsg::yomo::Allocator; - using V2 = CompatVector; - - // Just make the root object on the stack and make sure chain-based - // allocators and pointers fall back to the heap properly. - V2 numbers; - - // Now do a vigorous test comparing to a normal vector - bother_vector(numbers); - } - - assert(yomo::Manager::count_chains() == 0); - assert(yomo::Manager::count_links() == 0); - - { - // Make sure our bit-packing vector works - CompatIntVector<> vec; - vec.width(3); - - for (size_t i = 0; i < 1000; i++) { - vec.resize(i + 1); - vec.at(i) = i % 8; - if (vec.at(i) != i % 8) { - throw std::runtime_error("Expected " + std::to_string(i % 8) + " at " + std::to_string(i) + " but got " + std::to_string(vec.at(i))); - } - } - - for (size_t i = 0; i < 1000; i++) { - if (vec.at(i) != i % 8) { - throw std::runtime_error("Expected " + std::to_string(i % 8) + " at " + std::to_string(i) + " but got " + std::to_string(vec.at(i))); - } - } - - vec.resize(500); - for (size_t i = 0; i < 500; i++) { - if (vec.at(i) != i % 8) { - throw std::runtime_error("Expected " + std::to_string(i % 8) + " at " + std::to_string(i) + " but got " + std::to_string(vec.at(i))); - } - } - - vec.repack(4, 500); - for (size_t i = 0; i < 500; i++) { - if (vec.at(i) != i % 8) { - throw std::runtime_error("Expected " + std::to_string(i % 8) + " at " + std::to_string(i) + " but got " + std::to_string(vec.at(i))); - } - } - } - - assert(yomo::Manager::count_chains() == 0); - assert(yomo::Manager::count_links() == 0); - - { - // Make sure our bit-packing vector can self-test - - // Make a vector - bdsg::yomo::UniqueMappedPointer vec; - vec.construct(); - vec->width(60); - vec->resize(1000); - fill_to(*vec, 1000, 1); - verify_to(*vec, 1000, 1); - - // We should pass heap verification - vec.check_heap_integrity(); - - // Save it out - char filename[] = "tmpXXXXXX"; - int tmpfd = mkstemp(filename); - assert(tmpfd != -1); - vec.save(tmpfd); - vec.reset(); - - // Drop part of the file - auto file_size = lseek(tmpfd, 0, SEEK_END); - assert(ftruncate(tmpfd, file_size/2) == 0); - - // Reload - vec.load(tmpfd, ""); - - try { - // We shouldn't pass heap verification. - vec.check_heap_integrity(); - assert(false); - } catch (std::runtime_error& e) { - // This is the exception we expect to get. - } - - vec.reset(); - - close(tmpfd); - unlink(filename); - } - - assert(yomo::Manager::count_chains() == 0); - assert(yomo::Manager::count_links() == 0); - - cerr << "Mapped Structs tests successful!" << endl; -} - -void test_int_vector() { - - // Make a thing to hold onto a test int vector. - bdsg::yomo::UniqueMappedPointer iv; - - // Have a function we can call to check its size. - auto save_and_check_size = [&](size_t expected_size) { - // Save it out, creating or clobbering - int fd = open("test.dat", O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); - iv.save(fd); - close(fd); - iv.dissociate(); - - // Make sure that the file has the correct size - struct stat file_stats; - stat("test.dat", &file_stats); - cerr << "Observed file size of " << file_stats.st_size << " bytes" << endl; - assert(file_stats.st_size == expected_size); - - // Load it again - bdsg::yomo::UniqueMappedPointer iv2; - fd = open("test.dat", O_RDWR); - iv2.load(fd, "ints"); - close(fd); - - // Make sure the re-loaded object has the correct usage. - std::tuple total_free_reclaimable = iv2.get_usage(); - size_t post_load_total_bytes = std::get<0>(total_free_reclaimable); - cerr << "Observed post-load size of " << post_load_total_bytes << " bytes" << endl; - assert(post_load_total_bytes == expected_size); - }; - - - // Construct it - iv.construct("ints"); - - // Give it a width - iv->width(20); - - // Make it big - size_t iv_size = 1024 * 1024 * 10; - for (size_t i = 1; i < iv_size; i *= 2) { - // Keep resizing it up and fragment the heap into many links. - iv->resize(i); - } - iv->resize(iv_size); - - for (size_t i = 0; i < iv_size; i++) { - // Fill it with a distinctive bit pattern - (*iv)[i] = 0xF0F0; - } - - // See how much memory we are using - std::tuple total_free_reclaimable = iv.get_usage(); - size_t required_bytes = std::get<0>(total_free_reclaimable) - std::get<2>(total_free_reclaimable); - cerr << std::get<0>(total_free_reclaimable) << " bytes in chain, " - << std::get<1>(total_free_reclaimable) << " bytes free, " - << std::get<2>(total_free_reclaimable) << " bytes reclaimable" << endl; - cerr << iv->size() << "/" << iv->capacity() << " entries of " << iv->width() << " bits is " << (iv->capacity() * iv->width() / 8) << " bytes" << endl; - save_and_check_size(required_bytes); - - // Shrink it back down - iv->repack(16, iv_size); - total_free_reclaimable = iv.get_usage(); - required_bytes = std::get<0>(total_free_reclaimable) - std::get<2>(total_free_reclaimable); - cerr << std::get<0>(total_free_reclaimable) << " bytes in chain, " - << std::get<1>(total_free_reclaimable) << " bytes free, " - << std::get<2>(total_free_reclaimable) << " bytes reclaimable" << endl; - cerr << iv->size() << "/" << iv->capacity() << " entries of " << iv->width() << " bits is " << (iv->capacity() * iv->width() / 8) << " bytes" << endl; - save_and_check_size(required_bytes); - - // Expand it even more - iv->repack(32, iv_size); - total_free_reclaimable = iv.get_usage(); - required_bytes = std::get<0>(total_free_reclaimable) - std::get<2>(total_free_reclaimable); - cerr << std::get<0>(total_free_reclaimable) << " bytes in chain, " - << std::get<1>(total_free_reclaimable) << " bytes free, " - << std::get<2>(total_free_reclaimable) << " bytes reclaimable" << endl; - cerr << iv->size() << "/" << iv->capacity() << " entries of " << iv->width() << " bits is " << (iv->capacity() * iv->width() / 8) << " bytes" << endl; - save_and_check_size(required_bytes); - - // And again - iv->repack(40, iv_size); - total_free_reclaimable = iv.get_usage(); - required_bytes = std::get<0>(total_free_reclaimable) - std::get<2>(total_free_reclaimable); - cerr << std::get<0>(total_free_reclaimable) << " bytes in chain, " - << std::get<1>(total_free_reclaimable) << " bytes free, " - << std::get<2>(total_free_reclaimable) << " bytes reclaimable" << endl; - cerr << iv->size() << "/" << iv->capacity() << " entries of " << iv->width() << " bits is " << (iv->capacity() * iv->width() / 8) << " bytes" << endl; - save_and_check_size(required_bytes); - - unlink("test.dat"); - cerr << "Int Vector tests successful!" << endl; -} - -void test_serializable_handle_graphs() { - - vector> implementations; - - PackedGraph pg_out, pg_in; - implementations.emplace_back(&pg_out, &pg_in); - - HashGraph hg_out, hg_in; - implementations.emplace_back(&hg_out, &hg_in); - - MappedPackedGraph mpg_in, mpg_out; - implementations.emplace_back(&mpg_in, &mpg_out); - - for (pair implementation : implementations) { - - MutablePathMutableHandleGraph* build_graph = dynamic_cast(implementation.first); - PathHandleGraph* check_graph = dynamic_cast(implementation.second); - SerializableHandleGraph* serialize_graph = implementation.first; - SerializableHandleGraph* deserialize_graph = implementation.second; - - handle_t h1 = build_graph->create_handle("GATT"); - handle_t h2 = build_graph->create_handle("TTGA"); - handle_t h3 = build_graph->create_handle("T"); - handle_t h4 = build_graph->create_handle("CA"); - - build_graph->create_edge(h1, h2); - build_graph->create_edge(h1, build_graph->flip(h3)); - build_graph->create_edge(h2, h3); - build_graph->create_edge(build_graph->flip(h3), h4); - - path_handle_t p = build_graph->create_path_handle("path"); - build_graph->append_step(p, h1); - build_graph->append_step(p, h2); - build_graph->append_step(p, h4); - - stringstream strm; - - serialize_graph->serialize(strm); - strm.seekg(0); - deserialize_graph->deserialize(strm); - - assert(build_graph->get_node_count() == check_graph->get_node_count()); - assert(build_graph->get_edge_count() == check_graph->get_edge_count()); - assert(build_graph->get_path_count() == check_graph->get_path_count()); - - for (handle_t h : {h1, h2, h3, h4}) { - assert(check_graph->has_node(build_graph->get_id(h))); - assert(check_graph->get_sequence(check_graph->get_handle(build_graph->get_id(h))) == build_graph->get_sequence(h)); - } - - assert(check_graph->get_step_count(check_graph->get_path_handle(build_graph->get_path_name(p))) == build_graph->get_step_count(p)); - } - - cerr << "SerializableHandleGraph tests successful!" << endl; -} + assert(yomo::Manager::count_chains() == 0); + assert(yomo::Manager::count_links() == 0); -void test_deletable_handle_graphs() { - - // first batch of tests - { - vector implementations; - - // Add implementations - - PackedGraph pg; - implementations.push_back(&pg); - - HashGraph hg; - implementations.push_back(&hg); - - MappedPackedGraph mpg; - implementations.push_back(&mpg); - - // And test them - - for (DeletableHandleGraph* implementation : implementations) { - - DeletableHandleGraph& graph = *implementation; - - assert(graph.get_node_count() == 0); - - handle_t h = graph.create_handle("ATG", 2); - - // DeletableHandleGraph has correct structure after creating a node - { - assert(graph.get_sequence(h) == "ATG"); - assert(graph.get_sequence(graph.flip(h)) == "CAT"); - assert(graph.get_base(h, 1) == 'T'); - assert(graph.get_base(graph.flip(h), 2) == 'T'); - assert(graph.get_subsequence(h, 1, 3) == "TG"); - assert(graph.get_subsequence(graph.flip(h), 0, 2) == "CA"); - assert(graph.get_length(h) == 3); - assert(graph.has_node(graph.get_id(h))); - assert(!graph.has_node(graph.get_id(h) + 1)); - - assert(graph.get_handle(graph.get_id(h)) == h); - assert(!graph.get_is_reverse(h)); - assert(graph.get_is_reverse(graph.flip(h))); - - assert(graph.get_node_count() == 1); - assert(graph.min_node_id() == graph.get_id(h)); - assert(graph.max_node_id() == graph.get_id(h)); - assert(graph.get_total_length() == 3); - assert(graph.get_edge_count() == 0); - - graph.follow_edges(h, true, [](const handle_t& prev) { - assert(false); - return true; - }); - graph.follow_edges(h, false, [](const handle_t& next) { - assert(false); - return true; - }); - } - - handle_t h2 = graph.create_handle("CT", 1); - - // DeletableHandleGraph has correct structure after creating a node at the beginning of ID space - { - - assert(graph.get_sequence(h2) == "CT"); - assert(graph.get_sequence(graph.flip(h2)) == "AG"); - assert(graph.get_base(h2, 1) == 'T'); - assert(graph.get_base(graph.flip(h2), 0) == 'A'); - assert(graph.get_subsequence(h2, 1, 10) == "T"); - assert(graph.get_subsequence(graph.flip(h2), 0, 2) == "AG"); - assert(graph.get_length(h2) == 2); - assert(graph.has_node(graph.get_id(h2))); - assert(!graph.has_node(max(graph.get_id(h), graph.get_id(h2)) + 1)); - - assert(graph.get_handle(graph.get_id(h2)) == h2); - - assert(graph.get_node_count() == 2); - assert(graph.min_node_id() == graph.get_id(h2)); - assert(graph.max_node_id() == graph.get_id(h)); - assert(graph.get_total_length() == 5); - assert(graph.get_edge_count() == 0); - - graph.follow_edges(h2, true, [](const handle_t& prev) { - assert(false); - return true; - }); - graph.follow_edges(h2, false, [](const handle_t& next) { - assert(false); - return true; - }); - } - - // creating and accessing a node at the end of ID space - - handle_t h3 = graph.create_handle("GAC", 4); - - // DeletableHandleGraph has correct structure after creating a node at the end of ID space - { - assert(graph.get_sequence(h3) == "GAC"); - assert(graph.get_sequence(graph.flip(h3)) == "GTC"); - assert(graph.get_base(h3, 1) == 'A'); - assert(graph.get_base(graph.flip(h3), 0) == 'G'); - assert(graph.get_subsequence(h3, 1, 1) == "A"); - assert(graph.get_subsequence(graph.flip(h3), 0, 5) == "GTC"); - assert(graph.get_length(h3) == 3); - - assert(graph.get_handle(graph.get_id(h3)) == h3); - - assert(graph.get_node_count() == 3); - assert(graph.min_node_id() == graph.get_id(h2)); - assert(graph.max_node_id() == graph.get_id(h3)); - assert(graph.get_total_length() == 8); - assert(graph.get_edge_count() == 0); - - graph.follow_edges(h3, true, [](const handle_t& prev) { - assert(false); - return true; - }); - graph.follow_edges(h3, false, [](const handle_t& next) { - assert(false); - return true; - }); - } - - - // creating and accessing in the middle of ID space - - handle_t h4 = graph.create_handle("T", 3); - - // DeletableHandleGraph has correct structure after creating a node in the middle of ID space - { - assert(graph.get_sequence(h4) == "T"); - assert(graph.get_sequence(graph.flip(h4)) == "A"); - assert(graph.get_length(h4) == 1); - - assert(graph.get_handle(graph.get_id(h4)) == h4); - - assert(graph.get_node_count() == 4); - assert(graph.min_node_id() == graph.get_id(h2)); - assert(graph.max_node_id() == graph.get_id(h3)); - assert(graph.get_total_length() == 9); - assert(graph.get_edge_count() == 0); - - graph.follow_edges(h4, true, [](const handle_t& prev) { - assert(false); - return true; - }); - graph.follow_edges(h4, false, [](const handle_t& next) { - assert(false); - return true; - }); - } - - graph.create_edge(h, h2); - - bool found1 = false, found2 = false, found3 = false, found4 = false; - int count1 = 0, count2 = 0, count3 = 0, count4 = 0; - - // DeletableHandleGraph has correct structure after creating an edge - { - assert(graph.get_edge_count() == 1); - - graph.follow_edges(h, false, [&](const handle_t& next) { - if (next == h2) { - found1 = true; - } - count1++; - return true; - }); - graph.follow_edges(h2, true, [&](const handle_t& prev) { - if (prev == h) { - found2 = true; - } - count2++; - return true; - }); - graph.follow_edges(graph.flip(h), true, [&](const handle_t& prev) { - if (prev == graph.flip(h2)) { - found3 = true; - } - count3++; - return true; - }); - graph.follow_edges(graph.flip(h2), false, [&](const handle_t& next) { - if (next == graph.flip(h)) { - found4 = true; - } - count4++; - return true; - }); - assert(count1 == 1); - assert(count2 == 1); - assert(count3 == 1); - assert(count4 == 1); - assert(found1); - assert(found2); - assert(found3); - assert(found4); - - count1 = count2 = count3 = count4 = 0; - found1 = found2 = found3 = found4 = false; - } - - graph.create_edge(h, graph.flip(h3)); - - bool found5 = false, found6 = false, found7 = false, found8 = false; - int count5 = 0, count6 = 0; - - // DeletableHandleGraph has correct structure after creating an edge with a traversal - { - assert(graph.get_edge_count() == 2); - - graph.follow_edges(h, false, [&](const handle_t& next) { - if (next == h2) { - found1 = true; - } - else if (next == graph.flip(h3)) { - found2 = true; - } - count1++; - return true; - }); - graph.follow_edges(graph.flip(h), true, [&](const handle_t& prev) { - if (prev == graph.flip(h2)) { - found3 = true; - } - else if (prev == h3) { - found4 = true; - } - count2++; - return true; - }); - graph.follow_edges(h2, true, [&](const handle_t& prev) { - if (prev == h) { - found5 = true; - } - count3++; - return true; - }); - graph.follow_edges(graph.flip(h2), false, [&](const handle_t& next) { - if (next == graph.flip(h)) { - found6 = true; - } - count4++; - return true; - }); - graph.follow_edges(graph.flip(h3), true, [&](const handle_t& prev) { - if (prev == h) { - found7 = true; - } - count5++; - return true; - }); - graph.follow_edges(h3, false, [&](const handle_t& next) { - if (next == graph.flip(h)) { - found8 = true; - } - count6++; - return true; - }); - assert(count1 == 2); - assert(count2 == 2); - assert(count3 == 1); - assert(count4 == 1); - assert(count5 == 1); - assert(count6 == 1); - assert(found1); - assert(found2); - assert(found3); - assert(found4); - assert(found5); - assert(found6); - assert(found7); - assert(found8); - - count1 = count2 = count3 = count4 = count5 = count6 = 0; - found1 = found2 = found3 = found4 = found5 = found6 = found7 = found8 = false; - } - - graph.create_edge(h4, graph.flip(h4)); - - // DeletableHandleGraph has correct structure after creating a reversing self-loop - { - assert(graph.get_edge_count() == 3); - - graph.follow_edges(h4, false, [&](const handle_t& next) { - if (next == graph.flip(h4)) { - found1 = true; - } - count1++; - return true; - }); - graph.follow_edges(graph.flip(h4), true, [&](const handle_t& prev) { - if (prev == h4) { - found2 = true; - } - count2++; - return true; - }); - assert(count1 == 1); - assert(count2 == 1); - assert(found1); - assert(found2); - - count1 = count2 = 0; - found1 = found2 = false; - } - - graph.create_edge(h, graph.flip(h4)); - graph.create_edge(graph.flip(h3), h4); - - assert(graph.get_edge_count() == 5); - - graph.destroy_edge(h, graph.flip(h4)); - graph.destroy_edge(graph.flip(h3), h4); - - assert(graph.get_edge_count() == 3); - - // DeletableHandleGraph has correct structure after creating and deleting edges - { - graph.follow_edges(h, false, [&](const handle_t& next) { - if (next == h2) { - found1 = true; - } - else if (next == graph.flip(h3)) { - found2 = true; - } - count1++; - return true; - }); - graph.follow_edges(graph.flip(h), true, [&](const handle_t& prev) { - if (prev == graph.flip(h2)) { - found3 = true; - } - else if (prev == h3) { - found4 = true; - } - count2++; - return true; - }); - graph.follow_edges(h2, true, [&](const handle_t& prev) { - if (prev == h) { - found5 = true; - } - count3++; - return true; - }); - graph.follow_edges(graph.flip(h2), false, [&](const handle_t& next) { - if (next == graph.flip(h)) { - found6 = true; - } - count4++; - return true; - }); - graph.follow_edges(graph.flip(h3), true, [&](const handle_t& prev) { - if (prev == h) { - found7 = true; - } - count5++; - return true; - }); - graph.follow_edges(h3, false, [&](const handle_t& next) { - if (next == graph.flip(h)) { - found8 = true; - } - count6++; - return true; - }); - assert(count1 == 2); - assert(count2 == 2); - assert(count3 == 1); - assert(count4 == 1); - assert(count5 == 1); - assert(count6 == 1); - assert(found1); - assert(found2); - assert(found3); - assert(found4); - assert(found5); - assert(found6); - assert(found7); - assert(found8); - - count1 = count2 = count3 = count4 = count5 = count6 = 0; - found1 = found2 = found3 = found4 = found5 = found6 = found7 = found8 = false; - - graph.follow_edges(h4, false, [&](const handle_t& next) { - if (next == graph.flip(h4)) { - found1 = true; - } - count1++; - return true; - }); - graph.follow_edges(graph.flip(h4), true, [&](const handle_t& prev) { - if (prev == h4) { - found2 = true; - } - count2++; - return true; - }); - assert(count1 == 1); - assert(count2 == 1); - assert(found1); - assert(found2); - - count1 = count2 = 0; - found1 = found2 = false; - } - - handle_t h5 = graph.create_handle("GGACC"); - - // make some edges to ensure that deleting is difficult - graph.create_edge(h, h5); - graph.create_edge(h5, h); - graph.create_edge(graph.flip(h5), h2); - graph.create_edge(h3, graph.flip(h5)); - graph.create_edge(h3, h5); - graph.create_edge(h5, h4); - - graph.destroy_handle(h5); - - // DeletableHandleGraph has correct structure after creating and deleting a node - { - - graph.follow_edges(h, false, [&](const handle_t& next) { - if (next == h2) { - found1 = true; - } - else if (next == graph.flip(h3)) { - found2 = true; - } - count1++; - return true; - }); - graph.follow_edges(graph.flip(h), true, [&](const handle_t& prev) { - if (prev == graph.flip(h2)) { - found3 = true; - } - else if (prev == h3) { - found4 = true; - } - count2++; - return true; - }); - graph.follow_edges(h2, true, [&](const handle_t& prev) { - if (prev == h) { - found5 = true; - } - count3++; - return true; - }); - graph.follow_edges(graph.flip(h2), false, [&](const handle_t& next) { - if (next == graph.flip(h)) { - found6 = true; - } - count4++; - return true; - }); - graph.follow_edges(graph.flip(h3), true, [&](const handle_t& prev) { - if (prev == h) { - found7 = true; - } - count5++; - return true; - }); - graph.follow_edges(h3, false, [&](const handle_t& next) { - if (next == graph.flip(h)) { - found8 = true; - } - count6++; - return true; - }); - assert(count1 == 2); - assert(count2 == 2); - assert(count3 == 1); - assert(count4 == 1); - assert(count5 == 1); - assert(count6 == 1); - assert(found1); - assert(found2); - assert(found3); - assert(found4); - assert(found5); - assert(found6); - assert(found7); - assert(found8); - - count1 = count2 = count3 = count4 = count5 = count6 = 0; - found1 = found2 = found3 = found4 = found5 = found6 = found7 = found8 = false; - - graph.follow_edges(h4, false, [&](const handle_t& next) { - if (next == graph.flip(h4)) { - found1 = true; - } - count1++; - return true; - }); - graph.follow_edges(graph.flip(h4), true, [&](const handle_t& prev) { - if (prev == h4) { - found2 = true; - } - count2++; - return true; - }); - assert(count1 == 1); - assert(count2 == 1); - assert(found1); - assert(found2); - - count1 = count2 = 0; - found1 = found2 = false; - } - - // DeletableHandleGraph has correct structure after swapping nodes - { - - graph.follow_edges(h, false, [&](const handle_t& next) { - if (next == h2) { - found1 = true; - } - else if (next == graph.flip(h3)) { - found2 = true; - } - count1++; - return true; - }); - graph.follow_edges(graph.flip(h), true, [&](const handle_t& prev) { - if (prev == graph.flip(h2)) { - found3 = true; - } - else if (prev == h3) { - found4 = true; - } - count2++; - return true; - }); - graph.follow_edges(h2, true, [&](const handle_t& prev) { - if (prev == h) { - found5 = true; - } - count3++; - return true; - }); - graph.follow_edges(graph.flip(h2), false, [&](const handle_t& next) { - if (next == graph.flip(h)) { - found6 = true; - } - count4++; - return true; - }); - graph.follow_edges(graph.flip(h3), true, [&](const handle_t& prev) { - if (prev == h) { - found7 = true; - } - count5++; - return true; - }); - graph.follow_edges(h3, false, [&](const handle_t& next) { - if (next == graph.flip(h)) { - found8 = true; - } - count6++; - return true; - }); - assert(count1 == 2); - assert(count2 == 2); - assert(count3 == 1); - assert(count4 == 1); - assert(count5 == 1); - assert(count6 == 1); - assert(found1); - assert(found2); - assert(found3); - assert(found4); - assert(found5); - assert(found6); - assert(found7); - assert(found8); - - count1 = count2 = count3 = count4 = count5 = count6 = 0; - found1 = found2 = found3 = found4 = found5 = found6 = found7 = found8 = false; - - graph.follow_edges(h4, false, [&](const handle_t& next) { - if (next == graph.flip(h4)) { - found1 = true; - } - count1++; - return true; - }); - graph.follow_edges(graph.flip(h4), true, [&](const handle_t& prev) { - if (prev == h4) { - found2 = true; - } - count2++; - return true; - }); - assert(count1 == 1); - assert(count2 == 1); - assert(found1); - assert(found2); - - count1 = count2 = 0; - found1 = found2 = false; - } - - // DeletableHandleGraph visits all nodes with for_each_handle - { - graph.for_each_handle([&](const handle_t& handle) { - if (handle == h) { - found1 = true; - } - else if (handle == h2) { - found2 = true; - } - else if (handle == h3) { - found3 = true; - } - else if (handle == h4) { - found4 = true; - } - else { - assert(false); - } - return true; - }); - - assert(found1); - assert(found2); - assert(found3); - assert(found4); - - found1 = found2 = found3 = found4 = false; - } - - // to make sure the sequence reverse complemented correctly - int i = 0; - auto check_rev_comp = [&](const std::string& seq1, const std::string& seq2) { - i++; - assert(seq1.size() == seq2.size()); - auto it = seq1.begin(); - auto rit = seq2.rbegin(); - for (; it != seq1.end(); it++) { - if (*it == 'A') { - assert(*rit == 'T'); - } - else if (*it == 'C') { - assert(*rit == 'G'); - } - else if (*it == 'G') { - assert(*rit == 'C'); - } - else if (*it == 'T') { - assert(*rit == 'A'); - } - else if (*it == 'N') { - assert(*rit == 'N'); - } - else { - assert(false); - } - - rit++; - } - }; - - - int count7 = 0, count8 = 0; - - // DeletableHandleGraph correctly reverses a node - { - - string seq1 = graph.get_sequence(h); - h = graph.apply_orientation(graph.flip(h)); - - // check the sequence - string rev_seq1 = graph.get_sequence(h); - check_rev_comp(seq1, rev_seq1); - - // check that the edges are what we expect - - graph.follow_edges(h, false, [&](const handle_t& next) { - count1++; - return true; - }); - graph.follow_edges(h, true, [&](const handle_t& prev) { - if (prev == graph.flip(h2)) { - found1 = true; - } - else if (prev == h3) { - found2 = true; - } - count2++; - return true; - }); - graph.follow_edges(graph.flip(h), true, [&](const handle_t& next) { - count3++; - return true; - }); - graph.follow_edges(graph.flip(h), false, [&](const handle_t& prev) { - if (prev == h2) { - found3 = true; - } - else if (prev == graph.flip(h3)) { - found4 = true; - } - count4++; - return true; - }); - graph.follow_edges(h2, true, [&](const handle_t& prev) { - if (prev == graph.flip(h)) { - found5 = true; - } - count5++; - return true; - }); - graph.follow_edges(graph.flip(h2), false, [&](const handle_t& next) { - if (next == h) { - found6 = true; - } - count6++; - return true; - }); - graph.follow_edges(graph.flip(h3), true, [&](const handle_t& prev) { - if (prev == graph.flip(h)) { - found7 = true; - } - count7++; - return true; - }); - graph.follow_edges(h3, false, [&](const handle_t& next) { - if (next == h) { - found8 = true; - } - count8++; - return true; - }); - assert(count1 == 0); - assert(count2 == 2); - assert(count3 == 0); - assert(count4 == 2); - assert(count5 == 1); - assert(count6 == 1); - assert(count7 == 1); - assert(count8 == 1); - assert(found1); - assert(found2); - assert(found3); - assert(found4); - assert(found5); - assert(found6); - assert(found7); - assert(found8); - - count1 = count2 = count3 = count4 = count5 = count6 = count7 = count8 = 0; - found1 = found2 = found3 = found4 = found5 = found6 = found7 = found8 = false; - - - // and now switch it back to the same orientation and repeat the topology checks - - h = graph.apply_orientation(graph.flip(h)); - - graph.follow_edges(h, false, [&](const handle_t& next) { - if (next == h2) { - found1 = true; - } - else if (next == graph.flip(h3)) { - found2 = true; - } - count1++; - return true; - }); - graph.follow_edges(graph.flip(h), true, [&](const handle_t& prev) { - if (prev == graph.flip(h2)) { - found3 = true; - } - else if (prev == h3) { - found4 = true; - } - count2++; - return true; - }); - graph.follow_edges(h2, true, [&](const handle_t& prev) { - if (prev == h) { - found5 = true; - } - count3++; - return true; - }); - graph.follow_edges(graph.flip(h2), false, [&](const handle_t& next) { - if (next == graph.flip(h)) { - found6 = true; - } - count4++; - return true; - }); - graph.follow_edges(graph.flip(h3), true, [&](const handle_t& prev) { - if (prev == h) { - found7 = true; - } - count5++; - return true; - }); - graph.follow_edges(h3, false, [&](const handle_t& next) { - if (next == graph.flip(h)) { - found8 = true; - } - count6++; - return true; - }); - assert(count1 == 2); - assert(count2 == 2); - assert(count3 == 1); - assert(count4 == 1); - assert(count5 == 1); - assert(count6 == 1); - assert(found1); - assert(found2); - assert(found3); - assert(found4); - assert(found5); - assert(found6); - assert(found7); - assert(found8); - - count1 = count2 = count3 = count4 = count5 = count6 = 0; - found1 = found2 = found3 = found4 = found5 = found6 = found7 = found8 = false; - - graph.follow_edges(h4, false, [&](const handle_t& next) { - if (next == graph.flip(h4)) { - found1 = true; - } - count1++; - return true; - }); - graph.follow_edges(graph.flip(h4), true, [&](const handle_t& prev) { - if (prev == h4) { - found2 = true; - } - count2++; - return true; - }); - assert(count1 == 1); - assert(count2 == 1); - assert(found1); - assert(found2); - - count1 = count2 = 0; - found1 = found2 = false; - } - - vector parts = graph.divide_handle(h, vector{1, 2}); - - int count9 = 0, count10 = 0, count11 = 0, count12 = 0; - bool found9 = false, found10 = false, found11 = false, found12 = false, found13 = false, found14 = false; - - // DeletableHandleGraph can correctly divide a node - { - - assert(parts.size() == 3); - - assert(graph.get_sequence(parts[0]) == "A"); - assert(graph.get_length(parts[0]) == 1); - assert(graph.get_sequence(parts[1]) == "T"); - assert(graph.get_length(parts[1]) == 1); - assert(graph.get_sequence(parts[2]) == "G"); - assert(graph.get_length(parts[2]) == 1); - - - graph.follow_edges(parts[0], false, [&](const handle_t& next) { - if (next == parts[1]) { - found1 = true; - } - count1++; - return true; - }); - graph.follow_edges(parts[0], true, [&](const handle_t& prev) { - count2++; - return true; - }); - graph.follow_edges(graph.flip(parts[0]), true, [&](const handle_t& prev) { - if (prev == graph.flip(parts[1])) { - found2 = true; - } - count3++; - return true; - }); - graph.follow_edges(graph.flip(parts[0]), false, [&](const handle_t& next) { - count4++; - return true; - }); - - graph.follow_edges(parts[1], false, [&](const handle_t& next) { - if (next == parts[2]) { - found3 = true; - } - count5++; - return true; - }); - graph.follow_edges(parts[1], true, [&](const handle_t& prev) { - if (prev == parts[0]) { - found4 = true; - } - count6++; - return true; - }); - graph.follow_edges(graph.flip(parts[1]), true, [&](const handle_t& prev) { - if (prev == graph.flip(parts[2])) { - found5 = true; - } - count7++; - return true; - }); - graph.follow_edges(graph.flip(parts[1]), false, [&](const handle_t& next) { - if (next == graph.flip(parts[0])) { - found6 = true; - } - count8++; - return true; - }); - - graph.follow_edges(parts[2], false, [&](const handle_t& next) { - if (next == h2) { - found7 = true; - } - else if (next == graph.flip(h3)) { - found8 = true; - } - count9++; - return true; - }); - graph.follow_edges(parts[2], true, [&](const handle_t& prev) { - if (prev == parts[1]) { - found9 = true; - } - count10++; - return true; - }); - graph.follow_edges(graph.flip(parts[2]), true, [&](const handle_t& prev) { - if (prev == graph.flip(h2)) { - found10 = true; - } - else if (prev == h3) { - found11 = true; - } - count11++; - return true; - }); - graph.follow_edges(graph.flip(parts[2]), false, [&](const handle_t& next) { - if (next == graph.flip(parts[1])) { - found12 = true; - } - count12++; - return true; - }); - graph.follow_edges(graph.flip(h3), true, [&](const handle_t& prev) { - if (prev == parts[2]) { - found13 = true; - } - return true; - }); - graph.follow_edges(h2, true, [&](const handle_t& prev) { - if (prev == parts[2]) { - found14 = true; - } - return true; - }); - - assert(count1 == 1); - assert(count2 == 0); - assert(count3 == 1); - assert(count4 == 0); - assert(count5 == 1); - assert(count6 == 1); - assert(count7 == 1); - assert(count8 == 1); - assert(count9 == 2); - assert(count10 == 1); - assert(count11 == 2); - assert(count12 == 1); - assert(found1); - assert(found2); - assert(found3); - assert(found4); - assert(found5); - assert(found6); - assert(found7); - assert(found8); - assert(found9); - assert(found10); - assert(found11); - assert(found12); - assert(found13); - assert(found14); - - count1 = count2 = count3 = count4 = count5 = count6 = count7 = count8 = count9 = count10 = count11 = count12 = 0; - found1 = found2 = found3 = found4 = found5 = found6 = found7 = found8 = found9 = found10 = found11 = found12 = false; - } - - vector rev_parts = graph.divide_handle(graph.flip(h3), vector{1}); - - // DeletableHandleGraph can correctly divide a node on the reverse strand - { - - assert(graph.get_sequence(rev_parts[0]) == "G"); - assert(graph.get_length(rev_parts[0]) == 1); - assert(graph.get_is_reverse(rev_parts[0])); - assert(graph.get_sequence(rev_parts[1]) == "TC"); - assert(graph.get_length(rev_parts[1]) == 2); - assert(graph.get_is_reverse(rev_parts[1])); - - graph.follow_edges(rev_parts[0], false, [&](const handle_t& next) { - if (next == rev_parts[1]) { - found1 = true; - } - count1++; - return true; - }); - graph.follow_edges(rev_parts[1], true, [&](const handle_t& prev) { - if (prev == rev_parts[0]) { - found2 = true; - } - count2++; - return true; - }); - graph.follow_edges(graph.flip(rev_parts[1]), false, [&](const handle_t& next) { - if (next == graph.flip(rev_parts[0])) { - found3 = true; - } - count3++; - return true; - }); - graph.follow_edges(graph.flip(rev_parts[0]), true, [&](const handle_t& prev) { - if (prev == graph.flip(rev_parts[1])) { - found4 = true; - } - count4++; - return true; - }); - graph.follow_edges(rev_parts[0], true, [&](const handle_t& prev) { - if (prev == parts[2]) { - found5 = true; - } - count5++; - return true; - }); - graph.follow_edges(rev_parts[1], false, [&](const handle_t& next) { - count6++; - return true; - }); - - assert(count1 == 1); - assert(count2 == 1); - assert(count3 == 1); - assert(count4 == 1); - assert(count5 == 1); - assert(count6 == 0); - assert(found1); - assert(found2); - assert(found3); - assert(found4); - assert(found5); - } - - auto h6 = graph.create_handle("ACGT"); - auto h7 = graph.create_handle("GCGG"); - auto h8 = graph.create_handle("TTCA"); - - graph.create_edge(h6, h7); - graph.create_edge(h7, h8); - - h7 = graph.truncate_handle(h7, true, 1); - assert(graph.get_sequence(h7) == "CGG"); - assert(graph.get_degree(h7, true) == 0); - assert(graph.get_degree(h7, false) == 1); - assert(graph.get_degree(h6, false) == 0); - assert(graph.get_degree(h8, true) == 1); - - h7 = graph.truncate_handle(h7, false, 2); - assert(graph.get_sequence(h7) == "CG"); - assert(graph.get_degree(h7, true) == 0); - assert(graph.get_degree(h7, false) == 0); - assert(graph.get_degree(h6, false) == 0); - assert(graph.get_degree(h8, true) == 0); - - h6 = graph.change_sequence(h6, "AAAT"); - h7 = graph.change_sequence(h7, "G"); - assert(graph.get_sequence(h6) == "AAAT"); - assert(graph.get_sequence(graph.flip(h6)) == "ATTT"); - assert(graph.get_sequence(h7) == "G"); - assert(graph.get_sequence(graph.flip(h7)) == "C"); - } - } + { - // second batch of test involving self loops - { - vector implementations; - - PackedGraph pg; - implementations.push_back(&pg); - - HashGraph hg; - implementations.push_back(&hg); - - MappedPackedGraph mpg; - implementations.push_back(&mpg); - - for (DeletableHandleGraph* implementation : implementations) { - - DeletableHandleGraph& graph = *implementation; - - // initialize the graph - - handle_t h1 = graph.create_handle("A"); - handle_t h2 = graph.create_handle("C"); - - graph.create_edge(h1, h2); - graph.create_edge(graph.flip(h1), h2); - - // test for the right initial topology - bool found1 = false, found2 = false, found3 = false, found4 = false, found5 = false, found6 = false; - int count1 = 0, count2 = 0, count3 = 0, count4 = 0; - - graph.follow_edges(h1, false, [&](const handle_t& other) { - if (other == h2) { - found1 = true; - } - count1++; - }); - graph.follow_edges(h1, true, [&](const handle_t& other) { - if (other == graph.flip(h2)) { - found2 = true; - } - count2++; - }); - graph.follow_edges(h2, false, [&](const handle_t& other) { - count3++; - }); - graph.follow_edges(h2, true, [&](const handle_t& other) { - if (other == h1) { - found3 = true; - } - else if (other == graph.flip(h1)) { - found4 = true; - } - count4++; - }); - assert(found1); - assert(found2); - assert(found3); - assert(found4); - assert(count1 == 1); - assert(count2 == 1); - assert(count3 == 0); - assert(count4 == 2); - found1 = found2 = found3 = found4 = found5 = found6 = false; - count1 = count2 = count3 = count4 = 0; - - // flip a node and check if the orientation is correct - h1 = graph.apply_orientation(graph.flip(h1)); - - graph.follow_edges(h1, false, [&](const handle_t& other) { - if (other == h2) { - found1 = true; - } - count1++; - }); - graph.follow_edges(h1, true, [&](const handle_t& other) { - if (other == graph.flip(h2)) { - found2 = true; - } - count2++; - }); - graph.follow_edges(h2, false, [&](const handle_t& other) { - count3++; - }); - graph.follow_edges(h2, true, [&](const handle_t& other) { - if (other == h1) { - found3 = true; - } - else if (other == graph.flip(h1)) { - found4 = true; - } - count4++; - }); - assert(found1); - assert(found2); - assert(found3); - assert(found4); - assert(count1 == 1); - assert(count2 == 1); - assert(count3 == 0); - assert(count4 == 2); - found1 = found2 = found3 = found4 = found5 = found6 = false; - count1 = count2 = count3 = count4 = 0; - - // create a new edge - - graph.create_edge(h1, graph.flip(h2)); - - // check the topology - - graph.follow_edges(h1, false, [&](const handle_t& other) { - if (other == h2) { - found1 = true; - } - else if (other == graph.flip(h2)) { - found2 = true; - } - count1++; - }); - graph.follow_edges(h1, true, [&](const handle_t& other) { - if (other == graph.flip(h2)) { - found3 = true; - } - count2++; - }); - graph.follow_edges(h2, false, [&](const handle_t& other) { - if (other == graph.flip(h1)) { - found4 = true; - } - count3++; - }); - graph.follow_edges(h2, true, [&](const handle_t& other) { - if (other == h1) { - found5 = true; - } - else if (other == graph.flip(h1)) { - found6 = true; - } - count4++; - }); - assert(found1); - assert(found2); - assert(found3); - assert(found4); - assert(found5); - assert(found6); - assert(count1 == 2); - assert(count2 == 1); - assert(count3 == 1); - assert(count4 == 2); - found1 = found2 = found3 = found4 = found5 = found6 = false; - count1 = count2 = count3 = count4 = 0; - - // now another node and check to make sure that the edges are updated appropriately - - h2 = graph.apply_orientation(graph.flip(h2)); - - graph.follow_edges(h1, false, [&](const handle_t& other) { - if (other == h2) { - found1 = true; - } - else if (other == graph.flip(h2)) { - found2 = true; - } - count1++; - }); - graph.follow_edges(h1, true, [&](const handle_t& other) { - if (other == h2) { - found3 = true; - } - count2++; - }); - graph.follow_edges(h2, false, [&](const handle_t& other) { - if (other == h1) { - found4 = true; - } - else if (other == graph.flip(h1)) { - found5 = true; - } - count3++; - }); - graph.follow_edges(h2, true, [&](const handle_t& other) { - if (other == h1) { - found6 = true; - } - count4++; - }); - assert(found1); - assert(found2); - assert(found3); - assert(found4); - assert(found5); - assert(found6); - assert(count1 == 2); - assert(count2 == 1); - assert(count3 == 2); - assert(count4 == 1); - } - } + using T = int64_t; + using A = bdsg::yomo::Allocator; + using V = CompatVector; + // Make a thing to hold onto a test array. + bdsg::yomo::UniqueMappedPointer numbers_holder; + + // Construct it + numbers_holder.construct("GATTACA"); + + // See how much memory we are using + std::tuple total_free_reclaimable = + numbers_holder.get_usage(); + // Total bytes must be no less than free bytes + assert(get<0>(total_free_reclaimable) >= get<1>(total_free_reclaimable)); + // Free bytes must be no less than reclaimable bytes + assert(get<1>(total_free_reclaimable) >= get<2>(total_free_reclaimable)); + + // Some bytes should be free in the initial chain link + assert(get<1>(total_free_reclaimable) > 0); + // But they should all be reclaimable, including the block header + assert(get<1>(total_free_reclaimable) == get<2>(total_free_reclaimable)); - // another batch of tests involving divide handle and reversing - // self edges { - vector implementations; - HashGraph hg; - implementations.push_back(&hg); + // Get a reference to it, which will be valid unless we save() or + // something + auto &vec1 = *numbers_holder; - PackedGraph pg; - implementations.push_back(&pg); + // We should start empty + assert(vec1.size() == 0); - MappedPackedGraph mpg; - implementations.push_back(&mpg); + // We should be able to preload without crashing + numbers_holder.preload(); + numbers_holder.preload(true); - for (DeletableHandleGraph* implementation : implementations) { - DeletableHandleGraph& graph = *implementation; + // We should be able to expand. + vec1.resize(100); + assert(vec1.size() == 100); - handle_t h1 = graph.create_handle("ATGAA"); - handle_t h2 = graph.create_handle("ATGAA"); + // And contract + vec1.resize(10); + assert(vec1.size() == 10); - graph.create_edge(h1, graph.flip(h1)); - graph.create_edge(graph.flip(h2), h2); + // And hold data + fill_to(vec1, 10, 0); + verify_to(vec1, 10, 0); - auto parts1 = graph.divide_handle(h1, {2, 4}); - auto parts2 = graph.divide_handle(h2, {2, 4}); + // And expand again + vec1.resize(100); + assert(vec1.size() == 100); - assert(parts1.size() == 3); - assert(parts2.size() == 3); + // And see the data + verify_to(vec1, 10, 0); - assert(graph.has_edge(parts1[0], parts1[1])); - assert(graph.has_edge(parts1[1], parts1[2])); - assert(graph.has_edge(parts1[2], graph.flip(parts1[2]))); + // And expand more + vec1.resize(1000); + assert(vec1.size() == 1000); - assert(graph.has_edge(parts2[0], parts2[1])); - assert(graph.has_edge(parts2[1], parts2[2])); - assert(graph.has_edge(graph.flip(parts2[0]), parts2[0])); + // And see the data + verify_to(vec1, 10, 0); - } - } + // And hold more data + fill_to(vec1, 1000, 1); + verify_to(vec1, 1000, 1); - // another batch of tests that deal with deleting after dividing - { - vector> implementations; - - // Add implementations - - PackedGraph pg, pg2; - implementations.push_back(make_pair(&pg, &pg2)); - - HashGraph hg, hg2; - implementations.push_back(make_pair(&hg, &hg2)); - - MappedPackedGraph mpg, mpg2; - implementations.push_back(make_pair(&mpg, &mpg2)); - - // And test them - for (int imp = 0; imp < implementations.size(); ++imp) { - - for (bool backwards : {false, true}) { - - MutablePathDeletableHandleGraph* g = backwards ? implementations[imp].first : implementations[imp].second; - - assert(g->get_node_count() == 0); - - handle_t handle1 = g->create_handle("CAAATAAGGCTTGGAAATTTTCTGGAGTTCTA"); - handle_t handle2 = g->create_handle("TTATATTCCAACTCTCTG"); - path_handle_t path_handle = g->create_path_handle("x"); - g->create_edge(handle1, handle2); - - if (backwards) { - handle1 = g->flip(handle1); - handle2 = g->flip(handle2); - g->append_step(path_handle, handle2); - g->append_step(path_handle, handle1); - } else { - g->append_step(path_handle, handle1); - g->append_step(path_handle, handle2); - } - - auto parts1 = g->divide_handle(handle1, vector({2, 7, 22, 31})); - auto parts2 = g->divide_handle(handle2, vector({1, 5, 10})); - - vector steps; - g->for_each_step_in_path(path_handle, [&](step_handle_t step_handle) { - steps.push_back(g->get_handle_of_step(step_handle)); - }); - - assert(steps.size() == 9); - int i = 0; - vector to_delete; - g->append_step(g->create_path_handle(to_string(i)), steps[i]); - ++i; - to_delete.push_back(steps[i++]); - g->append_step(g->create_path_handle(to_string(i)), steps[i]); - ++i; - to_delete.push_back(steps[i++]); - to_delete.push_back(steps[i++]); - to_delete.push_back(steps[i++]); - g->append_step(g->create_path_handle(to_string(i)), steps[i]); - ++i; - to_delete.push_back(steps[i++]); - g->append_step(g->create_path_handle(to_string(i)), steps[i]); - ++i; - - g->destroy_path(path_handle); - - for (auto handle : to_delete) { - g->destroy_handle(handle); - } - - g->for_each_path_handle([&](const path_handle_t& p) { - g->for_each_step_in_path(p, [&](const step_handle_t& s) { - auto h = g->get_handle_of_step(s); - }); - }); - - assert(g->get_node_count() == 4); - assert(g->get_path_count() == 4); - } - } + // And to preload without crashing + numbers_holder.preload(); + numbers_holder.preload(true); } - // another batch of tests that deal with deleting down to an empty graph - { - vector implementations; - - // Add implementations - - PackedGraph pg; - implementations.push_back(&pg); - - HashGraph hg; - implementations.push_back(&hg); - - MappedPackedGraph mpg; - implementations.push_back(&mpg); - - // And test them - for (int imp = 0; imp < implementations.size(); ++imp) { - - MutablePathDeletableHandleGraph* g = implementations[imp]; - - // the graph that i discovered the bug this tests for - vector>> graph_spec{ - {1, "C", {19}}, - {2, "A", {4, 5}}, - {3, "G", {4, 5}}, - {4, "T", {6, 16, 18}}, - {5, "C", {6, 16, 18}}, - {6, "TTG", {7, 8}}, - {7, "A", {9}}, - {8, "G", {9}}, - {9, "AAATT", {16}}, - {10, "A", {12}}, - {11, "T", {12}}, - {12, "ATAT", {13, 14}}, - {13, "A", {15}}, - {14, "T", {15}}, - {15, "C", {20}}, - {16, "TTCTGG", {17, 18}}, - {17, "AGT", {18}}, - {18, "TCTAT", {10, 11}}, - {19, "AAATAAG", {2, 3}}, - {20, "CAACTCTCTG", {}}, - }; - - for (auto rec : graph_spec) { - g->create_handle(get<1>(rec), get<0>(rec)); - } - for (auto rec : graph_spec) { - for (auto n : get<2>(rec)) { - g->create_edge(g->get_handle(get<0>(rec)), g->get_handle(n)); - } - } - - // a series of deletes that elicits the behavior - vector> delete_edges{ - {g->get_handle(10, 1), g->get_handle(18, 1)}, - {g->get_handle(3, 0), g->get_handle(5, 0)}, - {g->get_handle(4, 0), g->get_handle(6, 0)}, - {g->get_handle(6, 0), g->get_handle(7, 0)}, - {g->get_handle(2, 0), g->get_handle(5, 0)}, - {g->get_handle(7, 0), g->get_handle(9, 0)}, - {g->get_handle(16, 0), g->get_handle(17, 0)}, - {g->get_handle(12, 0), g->get_handle(14, 0)}, - {g->get_handle(9, 0), g->get_handle(16, 0)}, - {g->get_handle(11, 1), g->get_handle(18, 1)}, - {g->get_handle(6, 0), g->get_handle(8, 0)}, - {g->get_handle(12, 0), g->get_handle(13, 0)}, - {g->get_handle(5, 0), g->get_handle(16, 0)}, - {g->get_handle(4, 0), g->get_handle(16, 0)}, - {g->get_handle(16, 0), g->get_handle(18, 0)}, - {g->get_handle(5, 0), g->get_handle(6, 0)}, - {g->get_handle(3, 0), g->get_handle(4, 0)}, - {g->get_handle(8, 0), g->get_handle(9, 0)}, - {g->get_handle(2, 0), g->get_handle(4, 0)} - }; - for (auto edge : delete_edges) { - g->destroy_edge(edge.first, edge.second); - } - g->destroy_handle(g->get_handle(16, 0)); - g->destroy_handle(g->get_handle(13, 0)); - g->destroy_handle(g->get_handle(15, 0)); - g->destroy_handle(g->get_handle(20, 0)); - g->destroy_handle(g->get_handle(14, 0)); - g->destroy_handle(g->get_handle(10, 0)); - g->destroy_handle(g->get_handle(12, 0)); - g->destroy_handle(g->get_handle(11, 0)); - g->destroy_handle(g->get_handle(9, 0)); - g->destroy_handle(g->get_handle(4, 0)); - g->destroy_handle(g->get_handle(7, 0)); - g->destroy_handle(g->get_handle(18, 0)); - g->destroy_handle(g->get_handle(5, 0)); - g->destroy_handle(g->get_handle(1, 0)); - g->destroy_handle(g->get_handle(8, 0)); - g->destroy_handle(g->get_handle(19, 0)); - g->destroy_handle(g->get_handle(3, 0)); - g->destroy_handle(g->get_handle(6, 0)); - g->destroy_handle(g->get_handle(17, 0)); - g->destroy_handle(g->get_handle(2, 0)); - - g->create_handle("GATTACA", 4); - assert(g->get_node_count() == 1); - } - } - - // Edge counts stay accurate after deleting nodes + // We're going to need a temporary file + // This filename fill be filled in with the actual filename. + char filename[] = "tmpXXXXXX"; + int tmpfd = mkstemp(filename); + assert(tmpfd != -1); + + numbers_holder.save(tmpfd); + { - vector implementations; - - // Add implementations - - PackedGraph pg; - implementations.push_back(&pg); - - HashGraph hg; - implementations.push_back(&hg); - - MappedPackedGraph mpg; - implementations.push_back(&mpg); - - // note: not valid in graph with reversing self edges - auto count_edges = [&](const HandleGraph& g) { - int cnt = 0; - g.for_each_handle([&](const handle_t& h) { - for (bool r : {true, false}) { - g.follow_edges(h, r, [&](const handle_t& n) { - ++cnt; - }); - } - }); - assert(cnt % 2 == 0); - return cnt / 2; - }; - - // And test them - for (int imp = 0; imp < implementations.size(); ++imp) { - - MutablePathDeletableHandleGraph* graph = implementations[imp]; - - handle_t h1 = graph->create_handle("A"); - handle_t h2 = graph->create_handle("AAA"); - handle_t h3 = graph->create_handle("CC"); - handle_t h4 = graph->create_handle("G"); - handle_t h5 = graph->create_handle("T"); - handle_t h6 = graph->create_handle("T"); - handle_t h7 = graph->create_handle("TT"); - handle_t h8 = graph->create_handle("T"); - handle_t h9 = graph->create_handle("TTT"); - handle_t h10 = graph->create_handle("C"); - handle_t h11 = graph->create_handle("CC"); - handle_t h12 = graph->create_handle("A"); - handle_t h13 = graph->create_handle("AA"); - - graph->create_edge(h1, h2); - graph->create_edge(h2, h3); - graph->create_edge(h2, h4); - graph->create_edge(h3, h4); - graph->create_edge(h3, h5); - graph->create_edge(h5, h6); - graph->create_edge(h6, h7); - graph->create_edge(h7, h8); - graph->create_edge(h8, h9); - graph->create_edge(h9, h10); - graph->create_edge(h9, h12); - graph->create_edge(h10, h11); - graph->create_edge(h11, h12); - graph->create_edge(h12, h13); - graph->create_edge(h5, h7); - graph->create_edge(h5, h11); - graph->create_edge(h7, h13); - graph->create_edge(h8, h12); - - graph->destroy_handle(h1); - assert(graph->get_edge_count() == count_edges(*graph)); - graph->destroy_handle(h6); - assert(graph->get_edge_count() == count_edges(*graph)); - graph->destroy_handle(h9); - assert(graph->get_edge_count() == count_edges(*graph)); - graph->destroy_handle(h10); - assert(graph->get_edge_count() == count_edges(*graph)); - } + auto &vec2 = *numbers_holder; + + // We should have the same data + assert(vec2.size() == 1000); + verify_to(vec2, 1000, 1); + + // We should be able to preload without crashing + numbers_holder.preload(); + numbers_holder.preload(true); + + // We should still be able to modify it. + vec2.resize(4000); + fill_to(vec2, 4000, 2); + verify_to(vec2, 4000, 2); + + // Check memory usage + total_free_reclaimable = numbers_holder.get_usage(); + // Total bytes must be no less than free bytes + assert(get<0>(total_free_reclaimable) >= get<1>(total_free_reclaimable)); + // Free bytes must be no less than reclaimable bytes + assert(get<1>(total_free_reclaimable) >= get<2>(total_free_reclaimable)); + + // At this point we've made it bigger than ever before and required + // a new link probably, so nothing should be reclaimable. + assert(get<2>(total_free_reclaimable) == 0); + // But some space should be free because we've deallocated smaller + // vectors. + assert(get<1>(total_free_reclaimable) > 0); + + // Make it even bigger! + vec2.resize(10000); + + // And smaller again + vec2.resize(4000); + + // And reallocate smaller + vec2.shrink_to_fit(); + + // Check memory usage + total_free_reclaimable = numbers_holder.get_usage(); + // Total bytes must be no less than free bytes + assert(get<0>(total_free_reclaimable) >= get<1>(total_free_reclaimable)); + // Free bytes must be no less than reclaimable bytes + assert(get<1>(total_free_reclaimable) >= get<2>(total_free_reclaimable)); + + // At this point some memory should be reclaimable + assert(get<2>(total_free_reclaimable) > 0); } - - // batch deletion of paths works as expected + + numbers_holder.dissociate(); + { - vector implementations; - - // Add implementations - - PackedGraph pg; - implementations.push_back(&pg); - - HashGraph hg; - implementations.push_back(&hg); - - MappedPackedGraph mpg; - implementations.push_back(&mpg); - - for (int imp = 0; imp < implementations.size(); ++imp) { - - MutablePathDeletableHandleGraph& graph = *implementations[imp]; - - auto h1 = graph.create_handle("A"); - auto h2 = graph.create_handle("A"); - auto h3 = graph.create_handle("A"); - - graph.create_edge(h1, h2); - graph.create_edge(h2, h3); - - auto p1 = graph.create_path_handle("1"); - auto p2 = graph.create_path_handle("2"); - auto p3 = graph.create_path_handle("3"); - auto p4 = graph.create_path_handle("4"); - auto p5 = graph.create_path_handle("5"); - - for (const auto& p : {p1, p2, p3, p4, p5}) { - for (auto h : {h1, h2, h3}) { - graph.append_step(p, h); - } - } - - graph.destroy_paths({p1, p3, p4}); - - set paths_seen; - set paths_expected{p2, p5}; - graph.for_each_path_handle([&](const path_handle_t& path) { - assert(!paths_seen.count(path)); - paths_seen.insert(path); - std::vector handles; - std::vector handles_expected{h1, h2, h3}; - for (auto h : graph.scan_path(path)) { - handles.push_back(h); - } - assert(handles == handles_expected); - }); - - assert(paths_seen == paths_expected); - - graph.for_each_handle([&](const handle_t& h) { - set paths; - graph.for_each_step_on_handle(h, [&](const step_handle_t& step) { - auto p = graph.get_path_handle_of_step(step); - assert(!paths.count(p)); - paths.insert(p); - }); - assert(paths_seen == paths_expected); - }); - } + auto &vec3 = *numbers_holder; + + // After dissociating, we should be able to modify the vector + vec3.resize(5); + fill_to(vec3, 5, 3); + verify_to(vec3, 5, 3); } - - cerr << "DeletableHandleGraph tests successful!" << endl; -} -void test_mutable_path_handle_graphs() { - - vector implementations; - - PackedGraph pg; - implementations.push_back(&pg); - - HashGraph hg; - implementations.push_back(&hg); + numbers_holder.reset(); - MappedPackedGraph mpg; - implementations.push_back(&mpg); - - for (MutablePathDeletableHandleGraph* implementation : implementations) { - - auto check_path = [&](MutablePathDeletableHandleGraph& graph, const path_handle_t& p, const vector& steps) { - assert(graph.get_step_count(p) == steps.size()); - - // Make sure steps connect back to the path - step_handle_t begin_step = graph.path_begin(p); - step_handle_t end_step = graph.path_end(p); - assert(graph.get_path_handle_of_step(begin_step) == p); - assert(graph.get_path_handle_of_step(end_step) == p); - - step_handle_t step = graph.path_begin(p); - for (int i = 0; i < steps.size(); i++) { - auto here = graph.get_handle_of_step(step); - assert(graph.get_path_handle_of_step(step) == p); - assert(graph.get_handle_of_step(step) == steps[i]); - - if (graph.get_is_circular(p)) { - assert(graph.has_next_step(step)); - assert(graph.has_previous_step(step)); - } - else { - assert(graph.has_next_step(step) == i + 1 < steps.size()); - assert(graph.has_previous_step(step) == i > 0); - } - - step = graph.get_next_step(step); - } - - if (graph.get_is_circular(p) && !graph.is_empty(p)) { - assert(step == graph.path_begin(p)); - } - else { - assert(step == graph.path_end(p)); - } - - step = graph.path_back(p); - - for (int i = steps.size() - 1; i >= 0; i--) { - - assert(graph.get_path_handle_of_step(step) == p); - assert(graph.get_handle_of_step(step) == steps[i]); - - if (graph.get_is_circular(p)) { - assert(graph.has_next_step(step)); - assert(graph.has_previous_step(step)); - } - else { - assert(graph.has_next_step(step) == i + 1 < steps.size()); - assert(graph.has_previous_step(step) == i > 0); - } - - step = graph.get_previous_step(step); - } - - if (graph.get_is_circular(p) && !graph.is_empty(p)) { - assert(step == graph.path_back(p)); - } - else { - assert(step == graph.path_front_end(p)); - } - }; + numbers_holder.load(tmpfd, "GATTACA"); - auto check_flips = [&](MutablePathDeletableHandleGraph& graph, const path_handle_t& p, const vector& steps) { + // Check memory usage + total_free_reclaimable = numbers_holder.get_usage(); + // Total bytes must be no less than free bytes + assert(get<0>(total_free_reclaimable) >= get<1>(total_free_reclaimable)); + // Free bytes must be no less than reclaimable bytes + assert(get<1>(total_free_reclaimable) >= get<2>(total_free_reclaimable)); - auto flipped = steps; - for (size_t i = 0; i < steps.size(); i++) { - graph.apply_orientation(graph.flip(graph.forward(flipped[i]))); - flipped[i] = graph.flip(flipped[i]); - check_path(graph, p, flipped); + // No bytes should be reclaimable because we saved this through a mapping. + assert(get<2>(total_free_reclaimable) == 0); - graph.apply_orientation(graph.flip(graph.forward(flipped[i]))); - flipped[i] = graph.flip(flipped[i]); - check_path(graph, p, flipped); - } - }; + { + auto &vec4 = *numbers_holder; - MutablePathDeletableHandleGraph& graph = *implementation; + // We should be able to preload without crashing + numbers_holder.preload(); + numbers_holder.preload(true); - handle_t h1 = graph.create_handle("AC"); - handle_t h2 = graph.create_handle("CAGTGA"); - handle_t h3 = graph.create_handle("GT"); + // When we reload we should see the last thing we wrote before + // dissociating. + assert(vec4.size() == 4000); + verify_to(vec4, 4000, 2); + } - graph.create_edge(h1, h2); - graph.create_edge(h2, h3); - graph.create_edge(h1, graph.flip(h2)); - graph.create_edge(graph.flip(h2), h3); + close(tmpfd); + unlink(filename); + } - assert(!graph.has_path("1")); - assert(graph.get_path_count() == 0); + assert(yomo::Manager::count_chains() == 0); + assert(yomo::Manager::count_links() == 0); - path_handle_t p1 = graph.create_path_handle("1"); + { + using T = int64_t; + using A = bdsg::yomo::Allocator; + using V1 = CompatVector; + using A2 = bdsg::yomo::Allocator; + using V2 = CompatVector; + // Make a thing to hold onto a test array of arrays. + bdsg::yomo::UniqueMappedPointer numbers_holder_holder; + + numbers_holder_holder.construct(); + + // Now do a vigorous test comparing to a normal vector + bother_vector(*numbers_holder_holder); + } - assert(graph.has_path("1")); - assert(graph.get_path_count() == 1); - assert(graph.get_path_handle("1") == p1); - assert(graph.get_path_name(p1) == "1"); - assert(graph.get_step_count(p1) == 0); - assert(graph.is_empty(p1)); + assert(yomo::Manager::count_chains() == 0); + assert(yomo::Manager::count_links() == 0); - graph.append_step(p1, h1); + { + using T = int64_t; + using A = bdsg::yomo::Allocator; + using V1 = CompatVector; + using A2 = bdsg::yomo::Allocator; + using V2 = CompatVector; + + // Just make the root object on the stack and make sure chain-based + // allocators and pointers fall back to the heap properly. + V2 numbers; + + // Now do a vigorous test comparing to a normal vector + bother_vector(numbers); + } - assert(graph.get_step_count(p1) == 1); - assert(!graph.is_empty(p1)); + assert(yomo::Manager::count_chains() == 0); + assert(yomo::Manager::count_links() == 0); - graph.append_step(p1, h2); - graph.append_step(p1, h3); + { + // Make sure our bit-packing vector works + CompatIntVector<> vec; + vec.width(3); + + for (size_t i = 0; i < 1000; i++) { + vec.resize(i + 1); + vec.at(i) = i % 8; + if (vec.at(i) != i % 8) { + throw std::runtime_error("Expected " + std::to_string(i % 8) + " at " + + std::to_string(i) + " but got " + + std::to_string(vec.at(i))); + } + } - assert(graph.get_step_count(p1) == 3); + for (size_t i = 0; i < 1000; i++) { + if (vec.at(i) != i % 8) { + throw std::runtime_error("Expected " + std::to_string(i % 8) + " at " + + std::to_string(i) + " but got " + + std::to_string(vec.at(i))); + } + } - // graph can traverse a path - check_path(graph, p1, {h1, h2, h3}); + vec.resize(500); + for (size_t i = 0; i < 500; i++) { + if (vec.at(i) != i % 8) { + throw std::runtime_error("Expected " + std::to_string(i % 8) + " at " + + std::to_string(i) + " but got " + + std::to_string(vec.at(i))); + } + } - // graph preserves paths when reversing nodes - check_flips(graph, p1, {h1, h2, h3}); + vec.repack(4, 500); + for (size_t i = 0; i < 500; i++) { + if (vec.at(i) != i % 8) { + throw std::runtime_error("Expected " + std::to_string(i % 8) + " at " + + std::to_string(i) + " but got " + + std::to_string(vec.at(i))); + } + } + } - // make a circular path - path_handle_t p2 = graph.create_path_handle("2", true); - assert(graph.get_path_count() == 2); + assert(yomo::Manager::count_chains() == 0); + assert(yomo::Manager::count_links() == 0); - graph.append_step(p2, h1); - graph.append_step(p2, graph.flip(h2)); - graph.append_step(p2, h3); + { + // Make sure our bit-packing vector can self-test - check_path(graph, p2, {h1, graph.flip(h2), h3}); + // Make a vector + bdsg::yomo::UniqueMappedPointer vec; + vec.construct(); + vec->width(60); + vec->resize(1000); + fill_to(*vec, 1000, 1); + verify_to(*vec, 1000, 1); - // graph can query steps of a node on paths + // We should pass heap verification + vec.check_heap_integrity(); - bool found1 = false, found2 = false; - vector steps = graph.steps_of_handle(h1); - for (auto& step : steps) { - if (graph.get_path_handle_of_step(step) == p1 && - graph.get_handle_of_step(step) == h1) { - found1 = true; - } - else if (graph.get_path_handle_of_step(step) == p2 && - graph.get_handle_of_step(step) == h1) { - found2 = true; - } - else { - assert(false); - } - } - assert(found1); - assert(found2); - found1 = found2 = false; + // Save it out + char filename[] = "tmpXXXXXX"; + int tmpfd = mkstemp(filename); + assert(tmpfd != -1); + vec.save(tmpfd); + vec.reset(); + + // Drop part of the file + auto file_size = lseek(tmpfd, 0, SEEK_END); + assert(ftruncate(tmpfd, file_size / 2) == 0); + + // Reload + vec.load(tmpfd, ""); + + try { + // We shouldn't pass heap verification. + vec.check_heap_integrity(); + assert(false); + } catch (std::runtime_error &e) { + // This is the exception we expect to get. + } - steps = graph.steps_of_handle(h1, true); - for (auto& step : steps) { - if (graph.get_path_handle_of_step(step) == p1 && - graph.get_handle_of_step(step) == h1) { - found1 = true; - } - else if (graph.get_path_handle_of_step(step) == p2 && - graph.get_handle_of_step(step) == h1) { - found2 = true; - } - else { - assert(false); - } - } - assert(found1); - assert(found2); - found1 = found2 = false; + vec.reset(); - steps = graph.steps_of_handle(graph.flip(h1), true); - for (auto& step : steps) { - assert(false); - } + close(tmpfd); + unlink(filename); + } - steps = graph.steps_of_handle(h2, true); - for (auto& step : steps) { - if (graph.get_path_handle_of_step(step) == p1 && - graph.get_handle_of_step(step) == h2) { - found1 = true; - } - else { - assert(false); - } - } - steps = graph.steps_of_handle(graph.flip(h2), true); - for (auto& step : steps) { - if (graph.get_path_handle_of_step(step) == p2 && - graph.get_handle_of_step(step) == graph.flip(h2)) { - found2 = true; - } - else { - assert(false); - } - } - assert(found1); - assert(found2); - found1 = found2 = false; + assert(yomo::Manager::count_chains() == 0); + assert(yomo::Manager::count_links() == 0); - vector segments = graph.divide_handle(h2, {size_t(2), size_t(4)}); + cerr << "Mapped Structs tests successful!" << endl; +} - // graph preserves paths when dividing nodes +void test_int_vector() { - check_path(graph, p1, {h1, segments[0], segments[1], segments[2], h3}); - check_path(graph, p2, {h1, graph.flip(segments[2]), graph.flip(segments[1]), graph.flip(segments[0]), h3}); + // Make a thing to hold onto a test int vector. + bdsg::yomo::UniqueMappedPointer iv; + + // Have a function we can call to check its size. + auto save_and_check_size = [&](size_t expected_size) { + // Save it out, creating or clobbering + int fd = open("test.dat", O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); + iv.save(fd); + close(fd); + iv.dissociate(); + + // Make sure that the file has the correct size + struct stat file_stats; + stat("test.dat", &file_stats); + cerr << "Observed file size of " << file_stats.st_size << " bytes" << endl; + assert(file_stats.st_size == expected_size); + + // Load it again + bdsg::yomo::UniqueMappedPointer iv2; + fd = open("test.dat", O_RDWR); + iv2.load(fd, "ints"); + close(fd); + + // Make sure the re-loaded object has the correct usage. + std::tuple total_free_reclaimable = iv2.get_usage(); + size_t post_load_total_bytes = std::get<0>(total_free_reclaimable); + cerr << "Observed post-load size of " << post_load_total_bytes << " bytes" + << endl; + assert(post_load_total_bytes == expected_size); + }; - path_handle_t p3 = graph.create_path_handle("3"); - graph.append_step(p3, h1); - graph.append_step(p3, segments[0]); + // Construct it + iv.construct("ints"); - assert(graph.has_path("3")); - assert(graph.get_path_count() == 3); + // Give it a width + iv->width(20); - // graph can toggle circularity + // Make it big + size_t iv_size = 1024 * 1024 * 10; + for (size_t i = 1; i < iv_size; i *= 2) { + // Keep resizing it up and fragment the heap into many links. + iv->resize(i); + } + iv->resize(iv_size); - graph.for_each_path_handle([&](const path_handle_t& p) { + for (size_t i = 0; i < iv_size; i++) { + // Fill it with a distinctive bit pattern + (*iv)[i] = 0xF0F0; + } - vector steps; + // See how much memory we are using + std::tuple total_free_reclaimable = iv.get_usage(); + size_t required_bytes = + std::get<0>(total_free_reclaimable) - std::get<2>(total_free_reclaimable); + cerr << std::get<0>(total_free_reclaimable) << " bytes in chain, " + << std::get<1>(total_free_reclaimable) << " bytes free, " + << std::get<2>(total_free_reclaimable) << " bytes reclaimable" << endl; + cerr << iv->size() << "/" << iv->capacity() << " entries of " << iv->width() + << " bits is " << (iv->capacity() * iv->width() / 8) << " bytes" << endl; + save_and_check_size(required_bytes); + + // Shrink it back down + iv->repack(16, iv_size); + total_free_reclaimable = iv.get_usage(); + required_bytes = + std::get<0>(total_free_reclaimable) - std::get<2>(total_free_reclaimable); + cerr << std::get<0>(total_free_reclaimable) << " bytes in chain, " + << std::get<1>(total_free_reclaimable) << " bytes free, " + << std::get<2>(total_free_reclaimable) << " bytes reclaimable" << endl; + cerr << iv->size() << "/" << iv->capacity() << " entries of " << iv->width() + << " bits is " << (iv->capacity() * iv->width() / 8) << " bytes" << endl; + save_and_check_size(required_bytes); + + // Expand it even more + iv->repack(32, iv_size); + total_free_reclaimable = iv.get_usage(); + required_bytes = + std::get<0>(total_free_reclaimable) - std::get<2>(total_free_reclaimable); + cerr << std::get<0>(total_free_reclaimable) << " bytes in chain, " + << std::get<1>(total_free_reclaimable) << " bytes free, " + << std::get<2>(total_free_reclaimable) << " bytes reclaimable" << endl; + cerr << iv->size() << "/" << iv->capacity() << " entries of " << iv->width() + << " bits is " << (iv->capacity() * iv->width() / 8) << " bytes" << endl; + save_and_check_size(required_bytes); + + // And again + iv->repack(40, iv_size); + total_free_reclaimable = iv.get_usage(); + required_bytes = + std::get<0>(total_free_reclaimable) - std::get<2>(total_free_reclaimable); + cerr << std::get<0>(total_free_reclaimable) << " bytes in chain, " + << std::get<1>(total_free_reclaimable) << " bytes free, " + << std::get<2>(total_free_reclaimable) << " bytes reclaimable" << endl; + cerr << iv->size() << "/" << iv->capacity() << " entries of " << iv->width() + << " bits is " << (iv->capacity() * iv->width() / 8) << " bytes" << endl; + save_and_check_size(required_bytes); + + unlink("test.dat"); + cerr << "Int Vector tests successful!" << endl; +} - for (handle_t h : graph.scan_path(p)) { - steps.push_back(h); - } +void test_serializable_handle_graphs() { - bool starting_circularity = graph.get_is_circular(p); + vector> + implementations; - // make every transition occur - for (bool circularity : {true, true, false, false, true}) { - graph.set_circularity(p, circularity); - assert(graph.get_is_circular(p) == circularity); - check_path(graph, p, steps); - } + PackedGraph pg_out, pg_in; + implementations.emplace_back(&pg_out, &pg_in); - graph.set_circularity(p, starting_circularity); - }); + HashGraph hg_out, hg_in; + implementations.emplace_back(&hg_out, &hg_in); - // graph can destroy paths + MappedPackedGraph mpg_in, mpg_out; + implementations.emplace_back(&mpg_in, &mpg_out); - graph.destroy_path(p3); + for (pair + implementation : implementations) { - assert(!graph.has_path("3")); - assert(graph.get_path_count() == 2); + MutablePathMutableHandleGraph *build_graph = + dynamic_cast(implementation.first); + PathHandleGraph *check_graph = + dynamic_cast(implementation.second); + SerializableHandleGraph *serialize_graph = implementation.first; + SerializableHandleGraph *deserialize_graph = implementation.second; - bool found3 = false; + handle_t h1 = build_graph->create_handle("GATT"); + handle_t h2 = build_graph->create_handle("TTGA"); + handle_t h3 = build_graph->create_handle("T"); + handle_t h4 = build_graph->create_handle("CA"); - graph.for_each_path_handle([&](const path_handle_t& p) { - if (graph.get_path_name(p) == "1") { - found1 = true; - } - else if (graph.get_path_name(p) == "2") { - found2 = true; - } - else if (graph.get_path_name(p) == "3") { - found3 = true; - } - else { - assert(false); - } - }); + build_graph->create_edge(h1, h2); + build_graph->create_edge(h1, build_graph->flip(h3)); + build_graph->create_edge(h2, h3); + build_graph->create_edge(build_graph->flip(h3), h4); - assert(found1); - assert(found2); - assert(!found3); - - // check flips to see if membership records are still functional - check_flips(graph, p1, {h1, segments[0], segments[1], segments[2], h3}); - check_flips(graph, p2, {h1, graph.flip(segments[2]), graph.flip(segments[1]), graph.flip(segments[0]), h3}); - - graph.destroy_path(p1); - - assert(!graph.has_path("1")); - assert(graph.get_path_count() == 1); - - found1 = found2 = found3 = false; - - graph.for_each_path_handle([&](const path_handle_t& p) { - if (graph.get_path_name(p) == "1") { - found1 = true; - } - else if (graph.get_path_name(p) == "2") { - found2 = true; - } - else if (graph.get_path_name(p) == "3") { - found3 = true; - } - else { - assert(false); - } - }); + path_handle_t p = build_graph->create_path_handle("path"); + build_graph->append_step(p, h1); + build_graph->append_step(p, h2); + build_graph->append_step(p, h4); - assert(!found1); - assert(found2); - assert(!found3); - - // check flips to see if membership records are still functional - check_flips(graph, p2, {h1, graph.flip(segments[2]), graph.flip(segments[1]), graph.flip(segments[0]), h3}); - - // make a path to rewrite - path_handle_t p4 = graph.create_path_handle("4"); - graph.prepend_step(p4, h3); - graph.prepend_step(p4, segments[2]); - graph.prepend_step(p4, segments[1]); - graph.prepend_step(p4, segments[0]); - graph.prepend_step(p4, h1); - - check_flips(graph, p4, {h1, segments[0], segments[1], segments[2], h3}); - - auto check_rewritten_segment = [&](const pair& new_segment, - const vector& steps) { - int i = 0; - for (auto step = new_segment.first; step != new_segment.second; step = graph.get_next_step(step)) { - assert(graph.get_handle_of_step(step) == steps[i]); - i++; - } - assert(i == steps.size()); - }; + stringstream strm; - // rewrite the middle portion of a path + serialize_graph->serialize(strm); + strm.seekg(0); + deserialize_graph->deserialize(strm); - step_handle_t s1 = graph.get_next_step(graph.path_begin(p4)); - step_handle_t s2 = graph.get_next_step(graph.get_next_step(graph.get_next_step(s1))); + assert(build_graph->get_node_count() == check_graph->get_node_count()); + assert(build_graph->get_edge_count() == check_graph->get_edge_count()); + assert(build_graph->get_path_count() == check_graph->get_path_count()); - auto new_segment = graph.rewrite_segment(s1, s2, {graph.flip(segments[2]), graph.flip(segments[1]), graph.flip(segments[0])}); + for (handle_t h : {h1, h2, h3, h4}) { + assert(check_graph->has_node(build_graph->get_id(h))); + assert(check_graph->get_sequence(check_graph->get_handle( + build_graph->get_id(h))) == build_graph->get_sequence(h)); + } - check_flips(graph, p4, {h1, graph.flip(segments[2]), graph.flip(segments[1]), graph.flip(segments[0]), h3}); - check_rewritten_segment(new_segment, {graph.flip(segments[2]), graph.flip(segments[1]), graph.flip(segments[0])}); + assert(check_graph->get_step_count( + check_graph->get_path_handle(build_graph->get_path_name(p))) == + build_graph->get_step_count(p)); + } - // rewrite around the end of a circular path to delete + cerr << "SerializableHandleGraph tests successful!" << endl; +} - graph.create_edge(h3, h1); - graph.create_edge(segments[2], segments[0]); - graph.set_circularity(p4, true); +void test_deletable_handle_graphs() { - s1 = graph.get_previous_step(graph.path_begin(p4)); - s2 = graph.get_next_step(graph.path_begin(p4)); - assert(s2 != graph.path_end(p4)); + // first batch of tests + { + vector implementations; - new_segment = graph.rewrite_segment(s1, s2, vector()); - // The end we get should be the same as the end we sent, since it is exclusive - assert(new_segment.second == s2); + // Add implementations - check_flips(graph, p4, {graph.flip(segments[2]), graph.flip(segments[1]), graph.flip(segments[0])}); - check_rewritten_segment(new_segment, vector()); + PackedGraph pg; + implementations.push_back(&pg); - // add into an empty slot + HashGraph hg; + implementations.push_back(&hg); - new_segment = graph.rewrite_segment(new_segment.first, new_segment.second, {graph.flip(h1), graph.flip(h3)}); + MappedPackedGraph mpg; + implementations.push_back(&mpg); - check_flips(graph, p4, {graph.flip(h1), graph.flip(h3), graph.flip(segments[2]), graph.flip(segments[1]), graph.flip(segments[0])}); - check_rewritten_segment(new_segment, {graph.flip(h1), graph.flip(h3)}); + // And test them - } - - { - vector> implementations; - - // Add implementations - - HashGraph hg, hg2; - implementations.push_back(make_pair(&hg, &hg2)); - - PackedGraph pg, pg2; - implementations.push_back(make_pair(&pg, &pg2)); - - MappedPackedGraph mpg, mpg2; - implementations.push_back(make_pair(&mpg, &mpg2)); - - // And test them - for (int imp = 0; imp < implementations.size(); ++imp) { - for (bool backwards : {false, true}) { - - MutablePathMutableHandleGraph* g = backwards ? implementations[imp].first : implementations[imp].second; - - assert(g->get_node_count() == 0); - - handle_t handle = g->create_handle("TTATATTCCAACTCTCTG"); - if (backwards) { - handle = g->flip(handle); - } - path_handle_t path_handle = g->create_path_handle("Path"); - g->append_step(path_handle, handle); - string seq = g->get_sequence(handle); - vector true_parts = { seq.substr(0, 1), seq.substr(1, 4), seq.substr(5, 5), seq.substr(10) }; - - // Should get (C,AGAG,AGTTG,GAATATAA) (forward) - // Should get (T,TATA,TTCCA,ACTCTCTG) (reverse) - auto parts = g->divide_handle(handle, {1, 5, 10}); - assert(parts.size() == true_parts.size()); - for (int i = 0; i < parts.size(); ++i) { - assert(g->get_sequence(parts[i]) == true_parts[i]); - assert(g->get_is_reverse(parts[i]) == backwards); - } - - vector steps; - g->for_each_step_in_path(path_handle, [&](step_handle_t step_handle) { - steps.push_back(g->get_handle_of_step(step_handle)); - }); - assert(steps.size() == true_parts.size()); - for (int i = 0; i < parts.size(); ++i) { - assert(g->get_sequence(steps[i]) == true_parts[i]); - assert(g->get_is_reverse(steps[i]) == backwards); - } - } - } + for (DeletableHandleGraph *implementation : implementations) { - } - - cerr << "MutablePathDeletableHandleGraph tests successful!" << endl; -} + DeletableHandleGraph &graph = *implementation; -template -void test_packed_vector() { - enum vec_op_t {SET = 0, GET = 1, APPEND = 2, POP = 3, SERIALIZE = 4}; - - random_device rd; - default_random_engine prng(rd()); - uniform_int_distribution op_distr(0, 4); - - int num_runs = 1000; - int num_ops = 200; - int gets_per_op = 5; - int sets_per_op = 5; - int appends_per_op = 3; - int pops_per_op = 1; - - for (size_t i = 0; i < num_runs; i++) { - - uint64_t next_val = 0; - - vector std_vec; - PackedVectorImpl dyn_vec; - - for (size_t j = 0; j < num_ops; j++) { - - vec_op_t op = (vec_op_t) op_distr(prng); - switch (op) { - case SET: - if (!std_vec.empty()) { - for (size_t k = 0; k < sets_per_op; k++) { - size_t idx = prng() % dyn_vec.size(); - std_vec[idx] = next_val; - dyn_vec.set(idx, next_val); - next_val++; - } - } - - break; - - case GET: - if (!std_vec.empty()) { - for (size_t k = 0; k < gets_per_op; k++) { - size_t idx = prng() % dyn_vec.size(); - assert(std_vec[idx] == dyn_vec.get(idx)); - next_val++; - } - } - - break; - - case APPEND: - for (size_t k = 0; k < appends_per_op; k++) { - std_vec.push_back(next_val); - dyn_vec.push_back(next_val); - next_val++; - } - - break; - - case POP: - if (!std_vec.empty()) { - for (size_t k = 0; k < pops_per_op; k++) { - std_vec.pop_back(); - dyn_vec.pop_back(); - } - } - - break; - - case SERIALIZE: - { - stringstream strm; - - dyn_vec.serialize(strm); - strm.seekg(0); - PackedVectorImpl copy_vec(strm); - - assert(copy_vec.size() == dyn_vec.size()); - for (size_t i = 0; i < copy_vec.size(); i++) { - assert(copy_vec.get(i) == dyn_vec.get(i)); - } - break; - } - - default: - break; - } - - assert(std_vec.empty() == dyn_vec.empty()); - assert(std_vec.size() == dyn_vec.size()); - } - } - cerr << "PackedVector (" << typeid(PackedVectorImpl).name() << ") tests successful!" << endl; -} + assert(graph.get_node_count() == 0); -/** - * Generic iterator test function that works with any vector-like container - * (PackedVector, PagedVector, RobustPagedVector, PackedDeque). - * - * Tests ForwardIterator, BidirectionalIterator, RandomAccessIterator, and - * iterator order comparison, but not OutputIterator. - */ -template -void test_iterators() { - // ForwardIterator tests + handle_t h = graph.create_handle("ATG", 2); - // Empty iteration - { - VectorLike vec; - assert(vec.begin() == vec.end()); + // DeletableHandleGraph has correct structure after creating a node + { + assert(graph.get_sequence(h) == "ATG"); + assert(graph.get_sequence(graph.flip(h)) == "CAT"); + assert(graph.get_base(h, 1) == 'T'); + assert(graph.get_base(graph.flip(h), 2) == 'T'); + assert(graph.get_subsequence(h, 1, 3) == "TG"); + assert(graph.get_subsequence(graph.flip(h), 0, 2) == "CA"); + assert(graph.get_length(h) == 3); + assert(graph.has_node(graph.get_id(h))); + assert(!graph.has_node(graph.get_id(h) + 1)); - size_t count = 0; - for (auto it = vec.begin(); it != vec.end(); ++it) { - count++; - } - assert(count == 0); - } + assert(graph.get_handle(graph.get_id(h)) == h); + assert(!graph.get_is_reverse(h)); + assert(graph.get_is_reverse(graph.flip(h))); - // Single element - { - VectorLike vec; - vec.push_back(42); + assert(graph.get_node_count() == 1); + assert(graph.min_node_id() == graph.get_id(h)); + assert(graph.max_node_id() == graph.get_id(h)); + assert(graph.get_total_length() == 3); + assert(graph.get_edge_count() == 0); - assert(vec.begin() != vec.end()); + graph.follow_edges(h, true, [](const handle_t &prev) { + assert(false); + return true; + }); + graph.follow_edges(h, false, [](const handle_t &next) { + assert(false); + return true; + }); + } - auto it = vec.begin(); - assert(*it == 42); - ++it; - assert(it == vec.end()); - } + handle_t h2 = graph.create_handle("CT", 1); + + // DeletableHandleGraph has correct structure after creating a node at the + // beginning of ID space + { + + assert(graph.get_sequence(h2) == "CT"); + assert(graph.get_sequence(graph.flip(h2)) == "AG"); + assert(graph.get_base(h2, 1) == 'T'); + assert(graph.get_base(graph.flip(h2), 0) == 'A'); + assert(graph.get_subsequence(h2, 1, 10) == "T"); + assert(graph.get_subsequence(graph.flip(h2), 0, 2) == "AG"); + assert(graph.get_length(h2) == 2); + assert(graph.has_node(graph.get_id(h2))); + assert(!graph.has_node(max(graph.get_id(h), graph.get_id(h2)) + 1)); + + assert(graph.get_handle(graph.get_id(h2)) == h2); + + assert(graph.get_node_count() == 2); + assert(graph.min_node_id() == graph.get_id(h2)); + assert(graph.max_node_id() == graph.get_id(h)); + assert(graph.get_total_length() == 5); + assert(graph.get_edge_count() == 0); + + graph.follow_edges(h2, true, [](const handle_t &prev) { + assert(false); + return true; + }); + graph.follow_edges(h2, false, [](const handle_t &next) { + assert(false); + return true; + }); + } - // Multiple elements - basic iteration - { - VectorLike vec; - vector expected = {10, 20, 30, 40, 50}; + // creating and accessing a node at the end of ID space - for (auto val : expected) { - vec.push_back(val); - } + handle_t h3 = graph.create_handle("GAC", 4); - // Iterate and compare - size_t idx = 0; - for (auto it = vec.begin(); it != vec.end(); ++it) { - assert(idx < expected.size()); - assert(*it == expected[idx]); - idx++; - } - assert(idx == expected.size()); - } + // DeletableHandleGraph has correct structure after creating a node at the + // end of ID space + { + assert(graph.get_sequence(h3) == "GAC"); + assert(graph.get_sequence(graph.flip(h3)) == "GTC"); + assert(graph.get_base(h3, 1) == 'A'); + assert(graph.get_base(graph.flip(h3), 0) == 'G'); + assert(graph.get_subsequence(h3, 1, 1) == "A"); + assert(graph.get_subsequence(graph.flip(h3), 0, 5) == "GTC"); + assert(graph.get_length(h3) == 3); - // Range-based for loop - { - VectorLike vec; - vector expected = {100, 200, 300, 400, 500, 600, 700, 800}; + assert(graph.get_handle(graph.get_id(h3)) == h3); - for (auto val : expected) { - vec.push_back(val); - } - - size_t idx = 0; - for (auto val : vec) { - assert(idx < expected.size()); - assert(val == expected[idx]); - idx++; - } - assert(idx == expected.size()); - } - - // Iterator equality and inequality - { - VectorLike vec; - vec.push_back(1); - vec.push_back(2); - vec.push_back(3); - - auto it1 = vec.begin(); - auto it2 = vec.begin(); - assert(it1 == it2); - - ++it2; - assert(it1 != it2); - - ++it1; - assert(it1 == it2); - } - - // std::distance compatibility - { - VectorLike vec; - for (size_t i = 0; i < 15; i++) { - vec.push_back(i); - } - - auto dist = std::distance(vec.begin(), vec.end()); - assert((size_t)dist == vec.size()); - assert((size_t)dist == 15); - } - - // std::find compatibility - { - VectorLike vec; - vec.push_back(10); - vec.push_back(20); - vec.push_back(30); - vec.push_back(40); - vec.push_back(50); - - auto it = std::find(vec.begin(), vec.end(), 30); - assert(it != vec.end()); - assert(*it == 30); - - auto it2 = std::find(vec.begin(), vec.end(), 999); - assert(it2 == vec.end()); - } - - // Const iterator - { - VectorLike vec; - vec.push_back(5); - vec.push_back(15); - vec.push_back(25); + assert(graph.get_node_count() == 3); + assert(graph.min_node_id() == graph.get_id(h2)); + assert(graph.max_node_id() == graph.get_id(h3)); + assert(graph.get_total_length() == 8); + assert(graph.get_edge_count() == 0); - const VectorLike& const_vec = vec; + graph.follow_edges(h3, true, [](const handle_t &prev) { + assert(false); + return true; + }); + graph.follow_edges(h3, false, [](const handle_t &next) { + assert(false); + return true; + }); + } - size_t count = 0; - for (auto it = const_vec.begin(); it != const_vec.end(); ++it) { - count++; - } - assert(count == 3); - - auto it = const_vec.begin(); - assert(*it == 5); - ++it; - assert(*it == 15); - ++it; - assert(*it == 25); - } + // creating and accessing in the middle of ID space - // Large container with various patterns - { - VectorLike vec; - random_device rd; - default_random_engine prng(rd()); - uniform_int_distribution val_distr(0, 10000); - - vector expected; - size_t num_elements = 200; - - for (size_t i = 0; i < num_elements; i++) { - uint64_t val = val_distr(prng); - expected.push_back(val); - vec.push_back(val); - } + handle_t h4 = graph.create_handle("T", 3); - size_t idx = 0; - for (auto val : vec) { - assert(val == expected[idx]); - idx++; - } - assert(idx == expected.size()); - } + // DeletableHandleGraph has correct structure after creating a node in the + // middle of ID space + { + assert(graph.get_sequence(h4) == "T"); + assert(graph.get_sequence(graph.flip(h4)) == "A"); + assert(graph.get_length(h4) == 1); - // Iteration after modification - { - VectorLike vec; - vec.push_back(1); - vec.push_back(2); - vec.push_back(3); - - // First iteration - size_t count = 0; - for (auto it = vec.begin(); it != vec.end(); ++it) { - count++; - } - assert(count == 3); - - // Modify - vec.push_back(4); - vec.set(0, 100); - - // Second iteration - vector expected = {100, 2, 3, 4}; - size_t idx = 0; - for (auto val : vec) { - assert(val == expected[idx]); - idx++; - } - assert(idx == 4); - } + assert(graph.get_handle(graph.get_id(h4)) == h4); - // Iterator copy construction - { - VectorLike vec; - vec.push_back(10); - vec.push_back(20); + assert(graph.get_node_count() == 4); + assert(graph.min_node_id() == graph.get_id(h2)); + assert(graph.max_node_id() == graph.get_id(h3)); + assert(graph.get_total_length() == 9); + assert(graph.get_edge_count() == 0); - auto it1 = vec.begin(); - auto it2(it1); // Copy constructor + graph.follow_edges(h4, true, [](const handle_t &prev) { + assert(false); + return true; + }); + graph.follow_edges(h4, false, [](const handle_t &next) { + assert(false); + return true; + }); + } - assert(it1 == it2); - assert(*it1 == *it2); - assert(*it1 == 10); - } + graph.create_edge(h, h2); - // Iterator assignment - { - VectorLike vec; - vec.push_back(10); - vec.push_back(20); - vec.push_back(30); + bool found1 = false, found2 = false, found3 = false, found4 = false; + int count1 = 0, count2 = 0, count3 = 0, count4 = 0; - auto it1 = vec.begin(); - auto it2 = vec.begin(); - ++it2; + // DeletableHandleGraph has correct structure after creating an edge + { + assert(graph.get_edge_count() == 1); - assert(*it1 == 10); - assert(*it2 == 20); + graph.follow_edges(h, false, [&](const handle_t &next) { + if (next == h2) { + found1 = true; + } + count1++; + return true; + }); + graph.follow_edges(h2, true, [&](const handle_t &prev) { + if (prev == h) { + found2 = true; + } + count2++; + return true; + }); + graph.follow_edges(graph.flip(h), true, [&](const handle_t &prev) { + if (prev == graph.flip(h2)) { + found3 = true; + } + count3++; + return true; + }); + graph.follow_edges(graph.flip(h2), false, [&](const handle_t &next) { + if (next == graph.flip(h)) { + found4 = true; + } + count4++; + return true; + }); + assert(count1 == 1); + assert(count2 == 1); + assert(count3 == 1); + assert(count4 == 1); + assert(found1); + assert(found2); + assert(found3); + assert(found4); - it1 = it2; // Assignment - assert(it1 == it2); - assert(*it1 == 20); - } + count1 = count2 = count3 = count4 = 0; + found1 = found2 = found3 = found4 = false; + } - // BidirectionalIterator tests. - { - VectorLike vec; - vec.push_back(10); - vec.push_back(20); - vec.push_back(30); - - auto it1 = vec.begin(); - auto it2 = it1; - ++it2; - auto also_decremented = --it2; - - assert(it2 == it1); - assert(also_decremented == it1); - - it2++; - auto not_decremented = it2--; - - assert(it2 == it1); - assert(not_decremented != it1); - assert(*not_decremented == 20); - - auto it3 = vec.end(); - it3--; - assert(it3 != vec.end()); - assert(*it3 == 30); - } + graph.create_edge(h, graph.flip(h3)); - // RandomAccessIterator tests - { - VectorLike vec; - vec.push_back(10); - vec.push_back(20); - vec.push_back(30); - - auto it1 = vec.begin(); - auto it2 = it1; - - it1 += 1; - assert(*it1 == 20); - - it1 += 2; - assert(it1 == vec.end()); - - it1 -= 1; - auto it3 = it2 + 2; - assert(it1 == it3); - assert(*it1 == 30); - assert(it2 == vec.begin()); - - auto it4 = it1 - 2; - assert(*it4 == 10); - - assert(*it1 == vec.begin()[2]); - assert(*it4 == vec.begin()[0]); - assert(it4[2] == *it1); - assert(it1[-2] == *it4); - - assert(it1 + -2 == it4); - assert(it4 - -2 == it1); - - it1 += -2; - assert(it1 == it4); - - it1 -= -1; - it4++; - assert(it1 == it4); - } + bool found5 = false, found6 = false, found7 = false, found8 = false; + int count5 = 0, count6 = 0; - // Iterator comparison tests - { - VectorLike vec; - vec.push_back(10); - vec.push_back(20); - vec.push_back(30); - - auto it1 = vec.begin(); - auto it2 = it1; - - assert(it1 >= it2); - assert(it1 <= it2); - assert(!(it1 < it2)); - assert(!(it1 > it2)); - it1++; - - assert(it1 >= it2); - assert(!(it1 <= it2)); - assert(!(it2 >= it1)); - assert(it2 <= it1); - assert(!(it1 < it2)); - assert(it1 > it2); - assert(it2 < it1); - assert(!(it2 > it1)); - } + // DeletableHandleGraph has correct structure after creating an edge with + // a traversal + { + assert(graph.get_edge_count() == 2); - // Iterator distance tests - { - VectorLike vec; - vec.push_back(10); - vec.push_back(20); - vec.push_back(30); + graph.follow_edges(h, false, [&](const handle_t &next) { + if (next == h2) { + found1 = true; + } else if (next == graph.flip(h3)) { + found2 = true; + } + count1++; + return true; + }); + graph.follow_edges(graph.flip(h), true, [&](const handle_t &prev) { + if (prev == graph.flip(h2)) { + found3 = true; + } else if (prev == h3) { + found4 = true; + } + count2++; + return true; + }); + graph.follow_edges(h2, true, [&](const handle_t &prev) { + if (prev == h) { + found5 = true; + } + count3++; + return true; + }); + graph.follow_edges(graph.flip(h2), false, [&](const handle_t &next) { + if (next == graph.flip(h)) { + found6 = true; + } + count4++; + return true; + }); + graph.follow_edges(graph.flip(h3), true, [&](const handle_t &prev) { + if (prev == h) { + found7 = true; + } + count5++; + return true; + }); + graph.follow_edges(h3, false, [&](const handle_t &next) { + if (next == graph.flip(h)) { + found8 = true; + } + count6++; + return true; + }); + assert(count1 == 2); + assert(count2 == 2); + assert(count3 == 1); + assert(count4 == 1); + assert(count5 == 1); + assert(count6 == 1); + assert(found1); + assert(found2); + assert(found3); + assert(found4); + assert(found5); + assert(found6); + assert(found7); + assert(found8); + + count1 = count2 = count3 = count4 = count5 = count6 = 0; + found1 = found2 = found3 = found4 = found5 = found6 = found7 = found8 = + false; + } - assert(vec.end() - vec.begin() == vec.size()); + graph.create_edge(h4, graph.flip(h4)); - auto it1 = vec.begin(); - auto it2 = it1; - - it1 += 1; - it2 += 2; + // DeletableHandleGraph has correct structure after creating a reversing + // self-loop + { + assert(graph.get_edge_count() == 3); - assert(it2 - it1 == 1); - assert(it1 - it2 == -1); + graph.follow_edges(h4, false, [&](const handle_t &next) { + if (next == graph.flip(h4)) { + found1 = true; + } + count1++; + return true; + }); + graph.follow_edges(graph.flip(h4), true, [&](const handle_t &prev) { + if (prev == h4) { + found2 = true; + } + count2++; + return true; + }); + assert(count1 == 1); + assert(count2 == 1); + assert(found1); + assert(found2); - it1--; - assert(it2 - it1 == 2); - assert(it1 - it2 == -2); - } + count1 = count2 = 0; + found1 = found2 = false; + } - cerr << "Iterator (" << typeid(typename VectorLike::iterator).name() << ") tests successful!" << endl; -} + graph.create_edge(h, graph.flip(h4)); + graph.create_edge(graph.flip(h3), h4); -template -void test_paged_vector() { - enum vec_op_t {SET = 0, GET = 1, APPEND = 2, POP = 3, SERIALIZE = 4}; - std::random_device rd; - std::default_random_engine prng(rd()); - std::uniform_int_distribution op_distr(0, 4); - std::uniform_int_distribution val_distr(0, 100); - - int num_runs = 200; - int num_ops = 200; - int gets_per_op = 5; - int sets_per_op = 5; - int appends_per_op = 3; - int pops_per_op = 1; - - for (size_t i = 0; i < num_runs; i++) { - - uint64_t next_val = val_distr(prng); - - std::vector std_vec; - PagedVectorImpl dyn_vec; - - for (size_t j = 0; j < num_ops; j++) { - - vec_op_t op = (vec_op_t) op_distr(prng); - switch (op) { - case SET: - if (!std_vec.empty()) { - for (size_t k = 0; k < sets_per_op; k++) { - size_t idx = prng() % dyn_vec.size(); - std_vec[idx] = next_val; - dyn_vec.set(idx, next_val); - next_val = val_distr(prng); - } - } - - break; - - case GET: - if (!std_vec.empty()) { - for (size_t k = 0; k < gets_per_op; k++) { - size_t idx = prng() % dyn_vec.size(); - assert(std_vec[idx] == dyn_vec.get(idx)); - next_val = val_distr(prng); - } - } - - break; - - case APPEND: - for (size_t k = 0; k < appends_per_op; k++) { - std_vec.push_back(next_val); - dyn_vec.push_back(next_val); - next_val = val_distr(prng); - } - - break; - - case POP: - if (!std_vec.empty()) { - for (size_t k = 0; k < pops_per_op; k++) { - std_vec.pop_back(); - dyn_vec.pop_back(); - } - } - - break; - - case SERIALIZE: - { - stringstream strm; - - dyn_vec.serialize(strm); - strm.seekg(0); - PagedVectorImpl copy_vec(strm); - - assert(copy_vec.size() == dyn_vec.size()); - for (size_t i = 0; i < copy_vec.size(); i++) { - assert(copy_vec.get(i) == dyn_vec.get(i)); - } - break; - } - - default: - break; - } - - assert(std_vec.empty() == dyn_vec.empty()); - assert(std_vec.size() == dyn_vec.size()); - } - } - cerr << "PagedVector (" << typeid(PagedVectorImpl).name() << ") tests successful!" << endl; -} + assert(graph.get_edge_count() == 5); -void test_packed_deque() { - enum deque_op_t {SET = 0, GET = 1, APPEND_LEFT = 2, POP_LEFT = 3, APPEND_RIGHT = 4, POP_RIGHT = 5, SERIALIZE = 6}; - std::random_device rd; - std::default_random_engine prng(rd()); - std::uniform_int_distribution op_distr(0, 6); - - int num_runs = 1000; - int num_ops = 200; - int gets_per_op = 5; - int sets_per_op = 5; - int appends_per_op = 3; - int pops_per_op = 1; - - for (size_t i = 0; i < num_runs; i++) { - - uint64_t next_val = 0; - - std::deque std_deq; - PackedDeque<> suc_deq; - - for (size_t j = 0; j < num_ops; j++) { - - deque_op_t op = (deque_op_t) op_distr(prng); - switch (op) { - case SET: - if (!std_deq.empty()) { - for (size_t k = 0; k < sets_per_op; k++) { - size_t idx = prng() % std_deq.size(); - std_deq[idx] = next_val; - suc_deq.set(idx, next_val); - next_val++; - } - } - - break; - - case GET: - if (!std_deq.empty()) { - for (size_t k = 0; k < gets_per_op; k++) { - size_t idx = prng() % std_deq.size(); - assert(std_deq[idx] == suc_deq.get(idx)); - next_val++; - } - } - - break; - - case APPEND_LEFT: - for (size_t k = 0; k < appends_per_op; k++) { - std_deq.push_front(next_val); - suc_deq.push_front(next_val); - next_val++; - } - - break; - - case POP_LEFT: - for (size_t k = 0; k < pops_per_op && !std_deq.empty(); k++) { - std_deq.pop_front(); - suc_deq.pop_front(); - } - - break; - - case APPEND_RIGHT: - for (size_t k = 0; k < appends_per_op; k++) { - std_deq.push_back(next_val); - suc_deq.push_back(next_val); - next_val++; - } - - break; - - case POP_RIGHT: - for (size_t k = 0; k < pops_per_op && !std_deq.empty(); k++) { - std_deq.pop_back(); - suc_deq.pop_back(); - } - - break; - - case SERIALIZE: - { - stringstream strm; - - suc_deq.serialize(strm); - strm.seekg(0); - PackedDeque<> copy_deq(strm); - - assert(copy_deq.size() == suc_deq.size()); - for (size_t i = 0; i < copy_deq.size(); i++) { - assert(copy_deq.get(i) == suc_deq.get(i)); - } - break; - } - - default: - break; - } - - assert(std_deq.empty() == suc_deq.empty()); - assert(std_deq.size() == suc_deq.size()); - } - } - cerr << "PackedDeque tests successful!" << endl; -} + graph.destroy_edge(h, graph.flip(h4)); + graph.destroy_edge(graph.flip(h3), h4); -void test_packed_set() { - enum set_op_t {INSERT = 0, REMOVE = 1, FIND = 2}; - - random_device rd; - default_random_engine prng(rd()); - uniform_int_distribution op_distr(0, 2); - - int num_runs = 1000; - int num_ops = 200; - int inserts_per_op = 2; - int prev_inserts_per_op = 1; - int removes_per_op = 1; - int finds_per_op = 5; - - for (size_t i = 0; i < num_runs; i++) { - uint64_t next_val = 0; - - unordered_set std_set; - PackedSet<> packed_set; - - for (size_t j = 0; j < num_ops; j++) { - set_op_t op = (set_op_t) op_distr(prng); - switch (op) { - case INSERT: - - for (size_t k = 0; k < inserts_per_op; ++k) { - packed_set.insert(next_val); - std_set.insert(next_val); - next_val++; - } - for (size_t k = 0; k < prev_inserts_per_op; ++k) { - uint64_t val = prng() % next_val; - packed_set.insert(val); - std_set.insert(val); - } - - break; - - case REMOVE: - if (next_val > 0) { - for (size_t k = 0; k < removes_per_op; ++k) { - uint64_t val = prng() % next_val; - packed_set.remove(val); - std_set.erase(val); - } - } - else { - packed_set.remove(0); - packed_set.remove(1); - packed_set.remove(2); - std_set.erase(0); - std_set.erase(1); - std_set.erase(2); - } - - break; - - case FIND: - if (next_val) { - for (size_t k = 0; k < finds_per_op; k++) { - uint64_t val = prng() % next_val; - assert(packed_set.find(val) == (bool) std_set.count(val)); - } - } - else { - assert(packed_set.find(0) == (bool) std_set.count(0)); - assert(packed_set.find(1) == (bool) std_set.count(1)); - assert(packed_set.find(2) == (bool) std_set.count(2)); - } - - break; - -// case SERIALIZE: -// { -// stringstream strm; -// -// dyn_vec.serialize(strm); -// strm.seekg(0); -// PackedVector<> copy_vec(strm); -// -// assert(copy_vec.size() == dyn_vec.size()); -// for (size_t i = 0; i < copy_vec.size(); i++) { -// assert(copy_vec.get(i) == dyn_vec.get(i)); -// } -// break; -// } - - default: - break; - } - - assert(std_set.empty() == packed_set.empty()); - assert(std_set.size() == packed_set.size()); - } - } - cerr << "PackedSet tests successful!" << endl; -} + assert(graph.get_edge_count() == 3); -void test_packed_graph() { - - auto check_path = [&](MutablePathDeletableHandleGraph& graph, const path_handle_t& p, const vector& steps) { - assert(graph.get_step_count(p) == steps.size()); - - step_handle_t step = graph.path_begin(p); - for (int i = 0; i < steps.size(); i++) { - - assert(graph.get_path_handle_of_step(step) == p); - assert(graph.get_handle_of_step(step) == steps[i]); - - if (graph.get_is_circular(p)) { - assert(graph.has_next_step(step)); - assert(graph.has_previous_step(step)); - } - else { - assert(graph.has_next_step(step) == i + 1 < steps.size()); - assert(graph.has_previous_step(step) == i > 0); - } - - step = graph.get_next_step(step); - } - - if (graph.get_is_circular(p) && !graph.is_empty(p)) { - assert(step == graph.path_begin(p)); - } - else { - assert(step == graph.path_end(p)); - } - - step = graph.path_back(p); - - for (int i = steps.size() - 1; i >= 0; i--) { - - assert(graph.get_path_handle_of_step(step) == p); - assert(graph.get_handle_of_step(step) == steps[i]); - - if (graph.get_is_circular(p)) { - assert(graph.has_next_step(step)); - assert(graph.has_previous_step(step)); - } - else { - assert(graph.has_next_step(step) == i + 1 < steps.size()); - assert(graph.has_previous_step(step) == i > 0); - } - - step = graph.get_previous_step(step); - } - - if (graph.get_is_circular(p) && !graph.is_empty(p)) { - assert(step == graph.path_back(p)); - } - else { - assert(step == graph.path_front_end(p)); - } - }; - - auto check_flips = [&](MutablePathDeletableHandleGraph& graph, const path_handle_t& p, const vector& steps) { - - auto flipped = steps; - for (size_t i = 0; i < steps.size(); i++) { - graph.apply_orientation(graph.flip(graph.forward(flipped[i]))); - flipped[i] = graph.flip(flipped[i]); - check_path(graph, p, flipped); - - graph.apply_orientation(graph.flip(graph.forward(flipped[i]))); - flipped[i] = graph.flip(flipped[i]); - check_path(graph, p, flipped); - } - }; - - // defragmentation - { - PackedGraph graph; - - handle_t h1 = graph.create_handle("ATGTAG"); - handle_t h2 = graph.create_handle("ACCCC"); - handle_t h3 = graph.create_handle("C"); - handle_t h4 = graph.create_handle("ATT"); - handle_t h5 = graph.create_handle("GGCA"); - - graph.create_edge(h1, h2); - graph.create_edge(h1, h3); - graph.create_edge(h2, h3); - graph.create_edge(h3, h5); - graph.create_edge(h3, h4); - graph.create_edge(h4, h5); - - path_handle_t p0 = graph.create_path_handle("0"); - path_handle_t p1 = graph.create_path_handle("1"); - path_handle_t p2 = graph.create_path_handle("2"); - - - graph.append_step(p0, h3); - graph.append_step(p0, h4); - graph.append_step(p0, h5); - - graph.append_step(p1, h1); - graph.append_step(p1, h3); - graph.append_step(p1, h5); - - graph.append_step(p2, h1); - graph.append_step(p2, h2); - graph.append_step(p2, h3); - graph.append_step(p2, h4); - graph.append_step(p2, h5); - - graph.destroy_path(p0); - graph.destroy_path(p2); - graph.destroy_handle(h2); - graph.destroy_handle(h4); - - assert(graph.get_sequence(h1) == "ATGTAG"); - assert(graph.get_sequence(h3) == "C"); - assert(graph.get_sequence(h5) == "GGCA"); - - bool found = false; - graph.follow_edges(h1, false, [&](const handle_t& next) { - if (next == h3) { - found = true; - } - else { - assert(false); - } - return true; + // DeletableHandleGraph has correct structure after creating and deleting + // edges + { + graph.follow_edges(h, false, [&](const handle_t &next) { + if (next == h2) { + found1 = true; + } else if (next == graph.flip(h3)) { + found2 = true; + } + count1++; + return true; }); - assert(found); - - found = false; - graph.follow_edges(h3, false, [&](const handle_t& next) { - if (next == h5) { - found = true; - } - else { - assert(false); - } - return true; + graph.follow_edges(graph.flip(h), true, [&](const handle_t &prev) { + if (prev == graph.flip(h2)) { + found3 = true; + } else if (prev == h3) { + found4 = true; + } + count2++; + return true; }); - assert(found); - - check_flips(graph, p1, {h1, h3, h5}); - } - - // tightening vector allocations - { - PackedGraph graph; - handle_t h1 = graph.create_handle("ATGTAG"); - handle_t h2 = graph.create_handle("ACCCC"); - handle_t h3 = graph.create_handle("C"); - handle_t h4 = graph.create_handle("ATT"); - handle_t h5 = graph.create_handle("GGCA"); - - graph.create_edge(h1, h2); - graph.create_edge(h1, h3); - graph.create_edge(h2, h3); - graph.create_edge(h3, h5); - graph.create_edge(h3, h4); - graph.create_edge(h4, h5); - - path_handle_t p0 = graph.create_path_handle("0"); - path_handle_t p1 = graph.create_path_handle("1"); - path_handle_t p2 = graph.create_path_handle("2"); - - - graph.append_step(p0, h3); - graph.append_step(p0, h4); - graph.append_step(p0, h5); - - graph.append_step(p1, h1); - graph.append_step(p1, h3); - graph.append_step(p1, h5); - - graph.append_step(p2, h1); - graph.append_step(p2, h2); - graph.append_step(p2, h3); - graph.append_step(p2, h4); - graph.append_step(p2, h5); - - // delete some things, but not enough to trigger defragmentation - graph.destroy_path(p2); - graph.destroy_handle(h2); - // reallocate and compress down to the smaller size - graph.optimize(false); - - assert(graph.get_sequence(h1) == "ATGTAG"); - assert(graph.get_sequence(h3) == "C"); - assert(graph.get_sequence(h4) == "ATT"); - assert(graph.get_sequence(h5) == "GGCA"); - - int count = 0; - bool found1 = false, found2 = false; - graph.follow_edges(h1, false, [&](const handle_t& h) { - if (h == h3) { - found1 = true; - } - count++; + graph.follow_edges(h2, true, [&](const handle_t &prev) { + if (prev == h) { + found5 = true; + } + count3++; + return true; + }); + graph.follow_edges(graph.flip(h2), false, [&](const handle_t &next) { + if (next == graph.flip(h)) { + found6 = true; + } + count4++; + return true; + }); + graph.follow_edges(graph.flip(h3), true, [&](const handle_t &prev) { + if (prev == h) { + found7 = true; + } + count5++; + return true; }); + graph.follow_edges(h3, false, [&](const handle_t &next) { + if (next == graph.flip(h)) { + found8 = true; + } + count6++; + return true; + }); + assert(count1 == 2); + assert(count2 == 2); + assert(count3 == 1); + assert(count4 == 1); + assert(count5 == 1); + assert(count6 == 1); assert(found1); - assert(count == 1); - - count = 0; - found1 = false, found2 = false; - graph.follow_edges(h1, true, [&](const handle_t& h) { - count++; + assert(found2); + assert(found3); + assert(found4); + assert(found5); + assert(found6); + assert(found7); + assert(found8); + + count1 = count2 = count3 = count4 = count5 = count6 = 0; + found1 = found2 = found3 = found4 = found5 = found6 = found7 = found8 = + false; + + graph.follow_edges(h4, false, [&](const handle_t &next) { + if (next == graph.flip(h4)) { + found1 = true; + } + count1++; + return true; }); - assert(count == 0); - - count = 0; - found1 = false, found2 = false; - graph.follow_edges(h3, false, [&](const handle_t& h) { - if (h == h4) { - found1 = true; - } - if (h == h5) { - found2 = true; - } - count++; + graph.follow_edges(graph.flip(h4), true, [&](const handle_t &prev) { + if (prev == h4) { + found2 = true; + } + count2++; + return true; }); + assert(count1 == 1); + assert(count2 == 1); assert(found1); assert(found2); - assert(count == 2); - - count = 0; - found1 = false, found2 = false; - graph.follow_edges(h3, true, [&](const handle_t& h) { - if (h == h1) { - found1 = true; - } - count++; + + count1 = count2 = 0; + found1 = found2 = false; + } + + handle_t h5 = graph.create_handle("GGACC"); + + // make some edges to ensure that deleting is difficult + graph.create_edge(h, h5); + graph.create_edge(h5, h); + graph.create_edge(graph.flip(h5), h2); + graph.create_edge(h3, graph.flip(h5)); + graph.create_edge(h3, h5); + graph.create_edge(h5, h4); + + graph.destroy_handle(h5); + + // DeletableHandleGraph has correct structure after creating and deleting + // a node + { + + graph.follow_edges(h, false, [&](const handle_t &next) { + if (next == h2) { + found1 = true; + } else if (next == graph.flip(h3)) { + found2 = true; + } + count1++; + return true; + }); + graph.follow_edges(graph.flip(h), true, [&](const handle_t &prev) { + if (prev == graph.flip(h2)) { + found3 = true; + } else if (prev == h3) { + found4 = true; + } + count2++; + return true; + }); + graph.follow_edges(h2, true, [&](const handle_t &prev) { + if (prev == h) { + found5 = true; + } + count3++; + return true; + }); + graph.follow_edges(graph.flip(h2), false, [&](const handle_t &next) { + if (next == graph.flip(h)) { + found6 = true; + } + count4++; + return true; + }); + graph.follow_edges(graph.flip(h3), true, [&](const handle_t &prev) { + if (prev == h) { + found7 = true; + } + count5++; + return true; }); + graph.follow_edges(h3, false, [&](const handle_t &next) { + if (next == graph.flip(h)) { + found8 = true; + } + count6++; + return true; + }); + assert(count1 == 2); + assert(count2 == 2); + assert(count3 == 1); + assert(count4 == 1); + assert(count5 == 1); + assert(count6 == 1); assert(found1); - assert(count == 1); - - count = 0; - found1 = false, found2 = false; - graph.follow_edges(h4, false, [&](const handle_t& h) { - if (h == h5) { - found1 = true; - } - count++; + assert(found2); + assert(found3); + assert(found4); + assert(found5); + assert(found6); + assert(found7); + assert(found8); + + count1 = count2 = count3 = count4 = count5 = count6 = 0; + found1 = found2 = found3 = found4 = found5 = found6 = found7 = found8 = + false; + + graph.follow_edges(h4, false, [&](const handle_t &next) { + if (next == graph.flip(h4)) { + found1 = true; + } + count1++; + return true; }); + graph.follow_edges(graph.flip(h4), true, [&](const handle_t &prev) { + if (prev == h4) { + found2 = true; + } + count2++; + return true; + }); + assert(count1 == 1); + assert(count2 == 1); assert(found1); - assert(count == 1); - - count = 0; - found1 = false, found2 = false; - graph.follow_edges(h4, true, [&](const handle_t& h) { - if (h == h3) { - found1 = true; - } - count++; + assert(found2); + + count1 = count2 = 0; + found1 = found2 = false; + } + + // DeletableHandleGraph has correct structure after swapping nodes + { + + graph.follow_edges(h, false, [&](const handle_t &next) { + if (next == h2) { + found1 = true; + } else if (next == graph.flip(h3)) { + found2 = true; + } + count1++; + return true; + }); + graph.follow_edges(graph.flip(h), true, [&](const handle_t &prev) { + if (prev == graph.flip(h2)) { + found3 = true; + } else if (prev == h3) { + found4 = true; + } + count2++; + return true; + }); + graph.follow_edges(h2, true, [&](const handle_t &prev) { + if (prev == h) { + found5 = true; + } + count3++; + return true; }); + graph.follow_edges(graph.flip(h2), false, [&](const handle_t &next) { + if (next == graph.flip(h)) { + found6 = true; + } + count4++; + return true; + }); + graph.follow_edges(graph.flip(h3), true, [&](const handle_t &prev) { + if (prev == h) { + found7 = true; + } + count5++; + return true; + }); + graph.follow_edges(h3, false, [&](const handle_t &next) { + if (next == graph.flip(h)) { + found8 = true; + } + count6++; + return true; + }); + assert(count1 == 2); + assert(count2 == 2); + assert(count3 == 1); + assert(count4 == 1); + assert(count5 == 1); + assert(count6 == 1); assert(found1); - assert(count == 1); - - count = 0; - found1 = false, found2 = false; - graph.follow_edges(h5, false, [&](const handle_t& h) { - count++; + assert(found2); + assert(found3); + assert(found4); + assert(found5); + assert(found6); + assert(found7); + assert(found8); + + count1 = count2 = count3 = count4 = count5 = count6 = 0; + found1 = found2 = found3 = found4 = found5 = found6 = found7 = found8 = + false; + + graph.follow_edges(h4, false, [&](const handle_t &next) { + if (next == graph.flip(h4)) { + found1 = true; + } + count1++; + return true; }); - assert(count == 0); - - count = 0; - found1 = false, found2 = false; - graph.follow_edges(h5, true, [&](const handle_t& h) { - if (h == h3) { - found1 = true; - } - else if (h == h4) { - found2 = true; - } - count++; + graph.follow_edges(graph.flip(h4), true, [&](const handle_t &prev) { + if (prev == h4) { + found2 = true; + } + count2++; + return true; }); + assert(count1 == 1); + assert(count2 == 1); assert(found1); assert(found2); - assert(count == 2); - - check_flips(graph, p0, {h3, h4, h5}); - check_flips(graph, p1, {h1, h3, h5}); - } - - // optimizing with id reassignment - { - PackedGraph graph; - handle_t h1 = graph.create_handle("ATGTAG"); - handle_t h2 = graph.create_handle("ACCCC"); - handle_t h3 = graph.create_handle("C"); - handle_t h4 = graph.create_handle("ATT"); - handle_t h5 = graph.create_handle("GGCA"); - - graph.create_edge(h1, h2); - graph.create_edge(h1, h3); - graph.create_edge(h2, h3); - graph.create_edge(h3, h5); - graph.create_edge(h3, h4); - graph.create_edge(h4, h5); - - path_handle_t p0 = graph.create_path_handle("0"); - path_handle_t p1 = graph.create_path_handle("1"); - path_handle_t p2 = graph.create_path_handle("2"); - - - graph.append_step(p0, h3); - graph.append_step(p0, h4); - graph.append_step(p0, h5); - - graph.append_step(p1, h1); - graph.append_step(p1, h3); - graph.append_step(p1, h5); - - graph.append_step(p2, h1); - graph.append_step(p2, h2); - graph.append_step(p2, h3); - graph.append_step(p2, h4); - graph.append_step(p2, h5); - - // delete some things, but not enough to trigger defragmentation - graph.destroy_path(p2); - graph.destroy_handle(h2); - // reallocate and compress down to the smaller size, reassigning IDs - graph.optimize(true); - set seen_ids; - - int count = 0; - bool found1 = false, found2 = false, found3 = false, found4 = false; - graph.for_each_handle([&](const handle_t& handle) { - if (graph.get_sequence(handle) == "ATGTAG") { - h1 = handle; - found1 = true; - } - else if (graph.get_sequence(handle) == "C") { - h3 = handle; - found2 = true; - } - else if (graph.get_sequence(handle) == "ATT") { - h4 = handle; - found3 = true; - } - else if (graph.get_sequence(handle) == "GGCA") { - h5 = handle; - found4 = true; - } - else { - assert(false); - } - count++; - - seen_ids.insert(graph.get_id(handle)); - - assert(graph.get_id(handle) >= 1); - assert(graph.get_id(handle) <= 4); + + count1 = count2 = 0; + found1 = found2 = false; + } + + // DeletableHandleGraph visits all nodes with for_each_handle + { + graph.for_each_handle([&](const handle_t &handle) { + if (handle == h) { + found1 = true; + } else if (handle == h2) { + found2 = true; + } else if (handle == h3) { + found3 = true; + } else if (handle == h4) { + found4 = true; + } else { + assert(false); + } + return true; }); - + assert(found1); assert(found2); assert(found3); assert(found4); - assert(count == 4); - assert(seen_ids.size() == 4); - - count = 0; + found1 = found2 = found3 = found4 = false; - - graph.follow_edges(h1, false, [&](const handle_t& h) { - if (h == h3) { - found1 = true; - } - count++; + } + + // to make sure the sequence reverse complemented correctly + int i = 0; + auto check_rev_comp = [&](const std::string &seq1, + const std::string &seq2) { + i++; + assert(seq1.size() == seq2.size()); + auto it = seq1.begin(); + auto rit = seq2.rbegin(); + for (; it != seq1.end(); it++) { + if (*it == 'A') { + assert(*rit == 'T'); + } else if (*it == 'C') { + assert(*rit == 'G'); + } else if (*it == 'G') { + assert(*rit == 'C'); + } else if (*it == 'T') { + assert(*rit == 'A'); + } else if (*it == 'N') { + assert(*rit == 'N'); + } else { + assert(false); + } + + rit++; + } + }; + + int count7 = 0, count8 = 0; + + // DeletableHandleGraph correctly reverses a node + { + + string seq1 = graph.get_sequence(h); + h = graph.apply_orientation(graph.flip(h)); + + // check the sequence + string rev_seq1 = graph.get_sequence(h); + check_rev_comp(seq1, rev_seq1); + + // check that the edges are what we expect + + graph.follow_edges(h, false, [&](const handle_t &next) { + count1++; + return true; }); - assert(found1); - assert(count == 1); - - count = 0; - found1 = false, found2 = false; - graph.follow_edges(h1, true, [&](const handle_t& h) { - count++; + graph.follow_edges(h, true, [&](const handle_t &prev) { + if (prev == graph.flip(h2)) { + found1 = true; + } else if (prev == h3) { + found2 = true; + } + count2++; + return true; + }); + graph.follow_edges(graph.flip(h), true, [&](const handle_t &next) { + count3++; + return true; }); - assert(count == 0); - - count = 0; - found1 = false, found2 = false; - graph.follow_edges(h3, false, [&](const handle_t& h) { - if (h == h4) { - found1 = true; - } - if (h == h5) { - found2 = true; - } - count++; + graph.follow_edges(graph.flip(h), false, [&](const handle_t &prev) { + if (prev == h2) { + found3 = true; + } else if (prev == graph.flip(h3)) { + found4 = true; + } + count4++; + return true; }); + graph.follow_edges(h2, true, [&](const handle_t &prev) { + if (prev == graph.flip(h)) { + found5 = true; + } + count5++; + return true; + }); + graph.follow_edges(graph.flip(h2), false, [&](const handle_t &next) { + if (next == h) { + found6 = true; + } + count6++; + return true; + }); + graph.follow_edges(graph.flip(h3), true, [&](const handle_t &prev) { + if (prev == graph.flip(h)) { + found7 = true; + } + count7++; + return true; + }); + graph.follow_edges(h3, false, [&](const handle_t &next) { + if (next == h) { + found8 = true; + } + count8++; + return true; + }); + assert(count1 == 0); + assert(count2 == 2); + assert(count3 == 0); + assert(count4 == 2); + assert(count5 == 1); + assert(count6 == 1); + assert(count7 == 1); + assert(count8 == 1); assert(found1); assert(found2); - assert(count == 2); - - count = 0; - found1 = false, found2 = false; - graph.follow_edges(h3, true, [&](const handle_t& h) { - if (h == h1) { - found1 = true; - } - count++; + assert(found3); + assert(found4); + assert(found5); + assert(found6); + assert(found7); + assert(found8); + + count1 = count2 = count3 = count4 = count5 = count6 = count7 = count8 = + 0; + found1 = found2 = found3 = found4 = found5 = found6 = found7 = found8 = + false; + + // and now switch it back to the same orientation and repeat the + // topology checks + + h = graph.apply_orientation(graph.flip(h)); + + graph.follow_edges(h, false, [&](const handle_t &next) { + if (next == h2) { + found1 = true; + } else if (next == graph.flip(h3)) { + found2 = true; + } + count1++; + return true; + }); + graph.follow_edges(graph.flip(h), true, [&](const handle_t &prev) { + if (prev == graph.flip(h2)) { + found3 = true; + } else if (prev == h3) { + found4 = true; + } + count2++; + return true; + }); + graph.follow_edges(h2, true, [&](const handle_t &prev) { + if (prev == h) { + found5 = true; + } + count3++; + return true; }); + graph.follow_edges(graph.flip(h2), false, [&](const handle_t &next) { + if (next == graph.flip(h)) { + found6 = true; + } + count4++; + return true; + }); + graph.follow_edges(graph.flip(h3), true, [&](const handle_t &prev) { + if (prev == h) { + found7 = true; + } + count5++; + return true; + }); + graph.follow_edges(h3, false, [&](const handle_t &next) { + if (next == graph.flip(h)) { + found8 = true; + } + count6++; + return true; + }); + assert(count1 == 2); + assert(count2 == 2); + assert(count3 == 1); + assert(count4 == 1); + assert(count5 == 1); + assert(count6 == 1); assert(found1); - assert(count == 1); - - count = 0; - found1 = false, found2 = false; - graph.follow_edges(h4, false, [&](const handle_t& h) { - if (h == h5) { - found1 = true; - } - count++; + assert(found2); + assert(found3); + assert(found4); + assert(found5); + assert(found6); + assert(found7); + assert(found8); + + count1 = count2 = count3 = count4 = count5 = count6 = 0; + found1 = found2 = found3 = found4 = found5 = found6 = found7 = found8 = + false; + + graph.follow_edges(h4, false, [&](const handle_t &next) { + if (next == graph.flip(h4)) { + found1 = true; + } + count1++; + return true; }); + graph.follow_edges(graph.flip(h4), true, [&](const handle_t &prev) { + if (prev == h4) { + found2 = true; + } + count2++; + return true; + }); + assert(count1 == 1); + assert(count2 == 1); assert(found1); - assert(count == 1); - - count = 0; - found1 = false, found2 = false; - graph.follow_edges(h4, true, [&](const handle_t& h) { - if (h == h3) { - found1 = true; - } - count++; + assert(found2); + + count1 = count2 = 0; + found1 = found2 = false; + } + + vector parts = graph.divide_handle(h, vector{1, 2}); + + int count9 = 0, count10 = 0, count11 = 0, count12 = 0; + bool found9 = false, found10 = false, found11 = false, found12 = false, + found13 = false, found14 = false; + + // DeletableHandleGraph can correctly divide a node + { + + assert(parts.size() == 3); + + assert(graph.get_sequence(parts[0]) == "A"); + assert(graph.get_length(parts[0]) == 1); + assert(graph.get_sequence(parts[1]) == "T"); + assert(graph.get_length(parts[1]) == 1); + assert(graph.get_sequence(parts[2]) == "G"); + assert(graph.get_length(parts[2]) == 1); + + graph.follow_edges(parts[0], false, [&](const handle_t &next) { + if (next == parts[1]) { + found1 = true; + } + count1++; + return true; + }); + graph.follow_edges(parts[0], true, [&](const handle_t &prev) { + count2++; + return true; + }); + graph.follow_edges(graph.flip(parts[0]), true, + [&](const handle_t &prev) { + if (prev == graph.flip(parts[1])) { + found2 = true; + } + count3++; + return true; + }); + graph.follow_edges(graph.flip(parts[0]), false, + [&](const handle_t &next) { + count4++; + return true; + }); + + graph.follow_edges(parts[1], false, [&](const handle_t &next) { + if (next == parts[2]) { + found3 = true; + } + count5++; + return true; + }); + graph.follow_edges(parts[1], true, [&](const handle_t &prev) { + if (prev == parts[0]) { + found4 = true; + } + count6++; + return true; + }); + graph.follow_edges(graph.flip(parts[1]), true, + [&](const handle_t &prev) { + if (prev == graph.flip(parts[2])) { + found5 = true; + } + count7++; + return true; + }); + graph.follow_edges(graph.flip(parts[1]), false, + [&](const handle_t &next) { + if (next == graph.flip(parts[0])) { + found6 = true; + } + count8++; + return true; + }); + + graph.follow_edges(parts[2], false, [&](const handle_t &next) { + if (next == h2) { + found7 = true; + } else if (next == graph.flip(h3)) { + found8 = true; + } + count9++; + return true; + }); + graph.follow_edges(parts[2], true, [&](const handle_t &prev) { + if (prev == parts[1]) { + found9 = true; + } + count10++; + return true; + }); + graph.follow_edges(graph.flip(parts[2]), true, + [&](const handle_t &prev) { + if (prev == graph.flip(h2)) { + found10 = true; + } else if (prev == h3) { + found11 = true; + } + count11++; + return true; + }); + graph.follow_edges(graph.flip(parts[2]), false, + [&](const handle_t &next) { + if (next == graph.flip(parts[1])) { + found12 = true; + } + count12++; + return true; + }); + graph.follow_edges(graph.flip(h3), true, [&](const handle_t &prev) { + if (prev == parts[2]) { + found13 = true; + } + return true; + }); + graph.follow_edges(h2, true, [&](const handle_t &prev) { + if (prev == parts[2]) { + found14 = true; + } + return true; }); + + assert(count1 == 1); + assert(count2 == 0); + assert(count3 == 1); + assert(count4 == 0); + assert(count5 == 1); + assert(count6 == 1); + assert(count7 == 1); + assert(count8 == 1); + assert(count9 == 2); + assert(count10 == 1); + assert(count11 == 2); + assert(count12 == 1); assert(found1); - assert(count == 1); - - count = 0; - found1 = false, found2 = false; - graph.follow_edges(h5, false, [&](const handle_t& h) { - count++; + assert(found2); + assert(found3); + assert(found4); + assert(found5); + assert(found6); + assert(found7); + assert(found8); + assert(found9); + assert(found10); + assert(found11); + assert(found12); + assert(found13); + assert(found14); + + count1 = count2 = count3 = count4 = count5 = count6 = count7 = count8 = + count9 = count10 = count11 = count12 = 0; + found1 = found2 = found3 = found4 = found5 = found6 = found7 = found8 = + found9 = found10 = found11 = found12 = false; + } + + vector rev_parts = + graph.divide_handle(graph.flip(h3), vector{1}); + + // DeletableHandleGraph can correctly divide a node on the reverse strand + { + + assert(graph.get_sequence(rev_parts[0]) == "G"); + assert(graph.get_length(rev_parts[0]) == 1); + assert(graph.get_is_reverse(rev_parts[0])); + assert(graph.get_sequence(rev_parts[1]) == "TC"); + assert(graph.get_length(rev_parts[1]) == 2); + assert(graph.get_is_reverse(rev_parts[1])); + + graph.follow_edges(rev_parts[0], false, [&](const handle_t &next) { + if (next == rev_parts[1]) { + found1 = true; + } + count1++; + return true; + }); + graph.follow_edges(rev_parts[1], true, [&](const handle_t &prev) { + if (prev == rev_parts[0]) { + found2 = true; + } + count2++; + return true; + }); + graph.follow_edges(graph.flip(rev_parts[1]), false, + [&](const handle_t &next) { + if (next == graph.flip(rev_parts[0])) { + found3 = true; + } + count3++; + return true; + }); + graph.follow_edges(graph.flip(rev_parts[0]), true, + [&](const handle_t &prev) { + if (prev == graph.flip(rev_parts[1])) { + found4 = true; + } + count4++; + return true; + }); + graph.follow_edges(rev_parts[0], true, [&](const handle_t &prev) { + if (prev == parts[2]) { + found5 = true; + } + count5++; + return true; }); - assert(count == 0); - - count = 0; - found1 = false, found2 = false; - graph.follow_edges(h5, true, [&](const handle_t& h) { - if (h == h3) { - found1 = true; - } - else if (h == h4) { - found2 = true; - } - count++; + graph.follow_edges(rev_parts[1], false, [&](const handle_t &next) { + count6++; + return true; }); + + assert(count1 == 1); + assert(count2 == 1); + assert(count3 == 1); + assert(count4 == 1); + assert(count5 == 1); + assert(count6 == 0); assert(found1); assert(found2); - assert(count == 2); - - check_flips(graph, p0, {h3, h4, h5}); - check_flips(graph, p1, {h1, h3, h5}); + assert(found3); + assert(found4); + assert(found5); + } + + auto h6 = graph.create_handle("ACGT"); + auto h7 = graph.create_handle("GCGG"); + auto h8 = graph.create_handle("TTCA"); + + graph.create_edge(h6, h7); + graph.create_edge(h7, h8); + + h7 = graph.truncate_handle(h7, true, 1); + assert(graph.get_sequence(h7) == "CGG"); + assert(graph.get_degree(h7, true) == 0); + assert(graph.get_degree(h7, false) == 1); + assert(graph.get_degree(h6, false) == 0); + assert(graph.get_degree(h8, true) == 1); + + h7 = graph.truncate_handle(h7, false, 2); + assert(graph.get_sequence(h7) == "CG"); + assert(graph.get_degree(h7, true) == 0); + assert(graph.get_degree(h7, false) == 0); + assert(graph.get_degree(h6, false) == 0); + assert(graph.get_degree(h8, true) == 0); + + h6 = graph.change_sequence(h6, "AAAT"); + h7 = graph.change_sequence(h7, "G"); + assert(graph.get_sequence(h6) == "AAAT"); + assert(graph.get_sequence(graph.flip(h6)) == "ATTT"); + assert(graph.get_sequence(h7) == "G"); + assert(graph.get_sequence(graph.flip(h7)) == "C"); + } + } + + // second batch of test involving self loops + { + vector implementations; + + PackedGraph pg; + implementations.push_back(&pg); + + HashGraph hg; + implementations.push_back(&hg); + + MappedPackedGraph mpg; + implementations.push_back(&mpg); + + for (DeletableHandleGraph *implementation : implementations) { + + DeletableHandleGraph &graph = *implementation; + + // initialize the graph + + handle_t h1 = graph.create_handle("A"); + handle_t h2 = graph.create_handle("C"); + + graph.create_edge(h1, h2); + graph.create_edge(graph.flip(h1), h2); + + // test for the right initial topology + bool found1 = false, found2 = false, found3 = false, found4 = false, + found5 = false, found6 = false; + int count1 = 0, count2 = 0, count3 = 0, count4 = 0; + + graph.follow_edges(h1, false, [&](const handle_t &other) { + if (other == h2) { + found1 = true; + } + count1++; + }); + graph.follow_edges(h1, true, [&](const handle_t &other) { + if (other == graph.flip(h2)) { + found2 = true; + } + count2++; + }); + graph.follow_edges(h2, false, [&](const handle_t &other) { count3++; }); + graph.follow_edges(h2, true, [&](const handle_t &other) { + if (other == h1) { + found3 = true; + } else if (other == graph.flip(h1)) { + found4 = true; + } + count4++; + }); + assert(found1); + assert(found2); + assert(found3); + assert(found4); + assert(count1 == 1); + assert(count2 == 1); + assert(count3 == 0); + assert(count4 == 2); + found1 = found2 = found3 = found4 = found5 = found6 = false; + count1 = count2 = count3 = count4 = 0; + + // flip a node and check if the orientation is correct + h1 = graph.apply_orientation(graph.flip(h1)); + + graph.follow_edges(h1, false, [&](const handle_t &other) { + if (other == h2) { + found1 = true; + } + count1++; + }); + graph.follow_edges(h1, true, [&](const handle_t &other) { + if (other == graph.flip(h2)) { + found2 = true; + } + count2++; + }); + graph.follow_edges(h2, false, [&](const handle_t &other) { count3++; }); + graph.follow_edges(h2, true, [&](const handle_t &other) { + if (other == h1) { + found3 = true; + } else if (other == graph.flip(h1)) { + found4 = true; + } + count4++; + }); + assert(found1); + assert(found2); + assert(found3); + assert(found4); + assert(count1 == 1); + assert(count2 == 1); + assert(count3 == 0); + assert(count4 == 2); + found1 = found2 = found3 = found4 = found5 = found6 = false; + count1 = count2 = count3 = count4 = 0; + + // create a new edge + + graph.create_edge(h1, graph.flip(h2)); + + // check the topology + + graph.follow_edges(h1, false, [&](const handle_t &other) { + if (other == h2) { + found1 = true; + } else if (other == graph.flip(h2)) { + found2 = true; + } + count1++; + }); + graph.follow_edges(h1, true, [&](const handle_t &other) { + if (other == graph.flip(h2)) { + found3 = true; + } + count2++; + }); + graph.follow_edges(h2, false, [&](const handle_t &other) { + if (other == graph.flip(h1)) { + found4 = true; + } + count3++; + }); + graph.follow_edges(h2, true, [&](const handle_t &other) { + if (other == h1) { + found5 = true; + } else if (other == graph.flip(h1)) { + found6 = true; + } + count4++; + }); + assert(found1); + assert(found2); + assert(found3); + assert(found4); + assert(found5); + assert(found6); + assert(count1 == 2); + assert(count2 == 1); + assert(count3 == 1); + assert(count4 == 2); + found1 = found2 = found3 = found4 = found5 = found6 = false; + count1 = count2 = count3 = count4 = 0; + + // now another node and check to make sure that the edges are updated + // appropriately + + h2 = graph.apply_orientation(graph.flip(h2)); + + graph.follow_edges(h1, false, [&](const handle_t &other) { + if (other == h2) { + found1 = true; + } else if (other == graph.flip(h2)) { + found2 = true; + } + count1++; + }); + graph.follow_edges(h1, true, [&](const handle_t &other) { + if (other == h2) { + found3 = true; + } + count2++; + }); + graph.follow_edges(h2, false, [&](const handle_t &other) { + if (other == h1) { + found4 = true; + } else if (other == graph.flip(h1)) { + found5 = true; + } + count3++; + }); + graph.follow_edges(h2, true, [&](const handle_t &other) { + if (other == h1) { + found6 = true; + } + count4++; + }); + assert(found1); + assert(found2); + assert(found3); + assert(found4); + assert(found5); + assert(found6); + assert(count1 == 2); + assert(count2 == 1); + assert(count3 == 2); + assert(count4 == 1); + } + } + + // another batch of tests involving divide handle and reversing + // self edges + { + vector implementations; + + HashGraph hg; + implementations.push_back(&hg); + + PackedGraph pg; + implementations.push_back(&pg); + + MappedPackedGraph mpg; + implementations.push_back(&mpg); + + for (DeletableHandleGraph *implementation : implementations) { + DeletableHandleGraph &graph = *implementation; + + handle_t h1 = graph.create_handle("ATGAA"); + handle_t h2 = graph.create_handle("ATGAA"); + + graph.create_edge(h1, graph.flip(h1)); + graph.create_edge(graph.flip(h2), h2); + + auto parts1 = graph.divide_handle(h1, {2, 4}); + auto parts2 = graph.divide_handle(h2, {2, 4}); + + assert(parts1.size() == 3); + assert(parts2.size() == 3); + + assert(graph.has_edge(parts1[0], parts1[1])); + assert(graph.has_edge(parts1[1], parts1[2])); + assert(graph.has_edge(parts1[2], graph.flip(parts1[2]))); + + assert(graph.has_edge(parts2[0], parts2[1])); + assert(graph.has_edge(parts2[1], parts2[2])); + assert(graph.has_edge(graph.flip(parts2[0]), parts2[0])); + } + } + + // another batch of tests that deal with deleting after dividing + { + vector> + implementations; + + // Add implementations + + PackedGraph pg, pg2; + implementations.push_back(make_pair(&pg, &pg2)); + + HashGraph hg, hg2; + implementations.push_back(make_pair(&hg, &hg2)); + + MappedPackedGraph mpg, mpg2; + implementations.push_back(make_pair(&mpg, &mpg2)); + + // And test them + for (int imp = 0; imp < implementations.size(); ++imp) { + + for (bool backwards : {false, true}) { + + MutablePathDeletableHandleGraph *g = backwards + ? implementations[imp].first + : implementations[imp].second; + + assert(g->get_node_count() == 0); + + handle_t handle1 = g->create_handle("CAAATAAGGCTTGGAAATTTTCTGGAGTTCTA"); + handle_t handle2 = g->create_handle("TTATATTCCAACTCTCTG"); + path_handle_t path_handle = g->create_path_handle("x"); + g->create_edge(handle1, handle2); + + if (backwards) { + handle1 = g->flip(handle1); + handle2 = g->flip(handle2); + g->append_step(path_handle, handle2); + g->append_step(path_handle, handle1); + } else { + g->append_step(path_handle, handle1); + g->append_step(path_handle, handle2); + } + + auto parts1 = g->divide_handle(handle1, vector({2, 7, 22, 31})); + auto parts2 = g->divide_handle(handle2, vector({1, 5, 10})); + + vector steps; + g->for_each_step_in_path(path_handle, [&](step_handle_t step_handle) { + steps.push_back(g->get_handle_of_step(step_handle)); + }); + + assert(steps.size() == 9); + int i = 0; + vector to_delete; + g->append_step(g->create_path_handle(to_string(i)), steps[i]); + ++i; + to_delete.push_back(steps[i++]); + g->append_step(g->create_path_handle(to_string(i)), steps[i]); + ++i; + to_delete.push_back(steps[i++]); + to_delete.push_back(steps[i++]); + to_delete.push_back(steps[i++]); + g->append_step(g->create_path_handle(to_string(i)), steps[i]); + ++i; + to_delete.push_back(steps[i++]); + g->append_step(g->create_path_handle(to_string(i)), steps[i]); + ++i; + + g->destroy_path(path_handle); + + for (auto handle : to_delete) { + g->destroy_handle(handle); + } + + g->for_each_path_handle([&](const path_handle_t &p) { + g->for_each_step_in_path(p, [&](const step_handle_t &s) { + auto h = g->get_handle_of_step(s); + }); + }); + + assert(g->get_node_count() == 4); + assert(g->get_path_count() == 4); + } + } + } + + // another batch of tests that deal with deleting down to an empty graph + { + vector implementations; + + // Add implementations + + PackedGraph pg; + implementations.push_back(&pg); + + HashGraph hg; + implementations.push_back(&hg); + + MappedPackedGraph mpg; + implementations.push_back(&mpg); + + // And test them + for (int imp = 0; imp < implementations.size(); ++imp) { + + MutablePathDeletableHandleGraph *g = implementations[imp]; + + // the graph that i discovered the bug this tests for + vector>> graph_spec{ + {1, "C", {19}}, {2, "A", {4, 5}}, + {3, "G", {4, 5}}, {4, "T", {6, 16, 18}}, + {5, "C", {6, 16, 18}}, {6, "TTG", {7, 8}}, + {7, "A", {9}}, {8, "G", {9}}, + {9, "AAATT", {16}}, {10, "A", {12}}, + {11, "T", {12}}, {12, "ATAT", {13, 14}}, + {13, "A", {15}}, {14, "T", {15}}, + {15, "C", {20}}, {16, "TTCTGG", {17, 18}}, + {17, "AGT", {18}}, {18, "TCTAT", {10, 11}}, + {19, "AAATAAG", {2, 3}}, {20, "CAACTCTCTG", {}}, + }; + + for (auto rec : graph_spec) { + g->create_handle(get<1>(rec), get<0>(rec)); + } + for (auto rec : graph_spec) { + for (auto n : get<2>(rec)) { + g->create_edge(g->get_handle(get<0>(rec)), g->get_handle(n)); + } + } + + // a series of deletes that elicits the behavior + vector> delete_edges{ + {g->get_handle(10, 1), g->get_handle(18, 1)}, + {g->get_handle(3, 0), g->get_handle(5, 0)}, + {g->get_handle(4, 0), g->get_handle(6, 0)}, + {g->get_handle(6, 0), g->get_handle(7, 0)}, + {g->get_handle(2, 0), g->get_handle(5, 0)}, + {g->get_handle(7, 0), g->get_handle(9, 0)}, + {g->get_handle(16, 0), g->get_handle(17, 0)}, + {g->get_handle(12, 0), g->get_handle(14, 0)}, + {g->get_handle(9, 0), g->get_handle(16, 0)}, + {g->get_handle(11, 1), g->get_handle(18, 1)}, + {g->get_handle(6, 0), g->get_handle(8, 0)}, + {g->get_handle(12, 0), g->get_handle(13, 0)}, + {g->get_handle(5, 0), g->get_handle(16, 0)}, + {g->get_handle(4, 0), g->get_handle(16, 0)}, + {g->get_handle(16, 0), g->get_handle(18, 0)}, + {g->get_handle(5, 0), g->get_handle(6, 0)}, + {g->get_handle(3, 0), g->get_handle(4, 0)}, + {g->get_handle(8, 0), g->get_handle(9, 0)}, + {g->get_handle(2, 0), g->get_handle(4, 0)}}; + for (auto edge : delete_edges) { + g->destroy_edge(edge.first, edge.second); + } + g->destroy_handle(g->get_handle(16, 0)); + g->destroy_handle(g->get_handle(13, 0)); + g->destroy_handle(g->get_handle(15, 0)); + g->destroy_handle(g->get_handle(20, 0)); + g->destroy_handle(g->get_handle(14, 0)); + g->destroy_handle(g->get_handle(10, 0)); + g->destroy_handle(g->get_handle(12, 0)); + g->destroy_handle(g->get_handle(11, 0)); + g->destroy_handle(g->get_handle(9, 0)); + g->destroy_handle(g->get_handle(4, 0)); + g->destroy_handle(g->get_handle(7, 0)); + g->destroy_handle(g->get_handle(18, 0)); + g->destroy_handle(g->get_handle(5, 0)); + g->destroy_handle(g->get_handle(1, 0)); + g->destroy_handle(g->get_handle(8, 0)); + g->destroy_handle(g->get_handle(19, 0)); + g->destroy_handle(g->get_handle(3, 0)); + g->destroy_handle(g->get_handle(6, 0)); + g->destroy_handle(g->get_handle(17, 0)); + g->destroy_handle(g->get_handle(2, 0)); + + g->create_handle("GATTACA", 4); + assert(g->get_node_count() == 1); + } + } + + // Edge counts stay accurate after deleting nodes + { + vector implementations; + + // Add implementations + + PackedGraph pg; + implementations.push_back(&pg); + + HashGraph hg; + implementations.push_back(&hg); + + MappedPackedGraph mpg; + implementations.push_back(&mpg); + + // note: not valid in graph with reversing self edges + auto count_edges = [&](const HandleGraph &g) { + int cnt = 0; + g.for_each_handle([&](const handle_t &h) { + for (bool r : {true, false}) { + g.follow_edges(h, r, [&](const handle_t &n) { ++cnt; }); + } + }); + assert(cnt % 2 == 0); + return cnt / 2; + }; + + // And test them + for (int imp = 0; imp < implementations.size(); ++imp) { + + MutablePathDeletableHandleGraph *graph = implementations[imp]; + + handle_t h1 = graph->create_handle("A"); + handle_t h2 = graph->create_handle("AAA"); + handle_t h3 = graph->create_handle("CC"); + handle_t h4 = graph->create_handle("G"); + handle_t h5 = graph->create_handle("T"); + handle_t h6 = graph->create_handle("T"); + handle_t h7 = graph->create_handle("TT"); + handle_t h8 = graph->create_handle("T"); + handle_t h9 = graph->create_handle("TTT"); + handle_t h10 = graph->create_handle("C"); + handle_t h11 = graph->create_handle("CC"); + handle_t h12 = graph->create_handle("A"); + handle_t h13 = graph->create_handle("AA"); + + graph->create_edge(h1, h2); + graph->create_edge(h2, h3); + graph->create_edge(h2, h4); + graph->create_edge(h3, h4); + graph->create_edge(h3, h5); + graph->create_edge(h5, h6); + graph->create_edge(h6, h7); + graph->create_edge(h7, h8); + graph->create_edge(h8, h9); + graph->create_edge(h9, h10); + graph->create_edge(h9, h12); + graph->create_edge(h10, h11); + graph->create_edge(h11, h12); + graph->create_edge(h12, h13); + graph->create_edge(h5, h7); + graph->create_edge(h5, h11); + graph->create_edge(h7, h13); + graph->create_edge(h8, h12); + + graph->destroy_handle(h1); + assert(graph->get_edge_count() == count_edges(*graph)); + graph->destroy_handle(h6); + assert(graph->get_edge_count() == count_edges(*graph)); + graph->destroy_handle(h9); + assert(graph->get_edge_count() == count_edges(*graph)); + graph->destroy_handle(h10); + assert(graph->get_edge_count() == count_edges(*graph)); + } + } + + // batch deletion of paths works as expected + { + vector implementations; + + // Add implementations + + PackedGraph pg; + implementations.push_back(&pg); + + HashGraph hg; + implementations.push_back(&hg); + + MappedPackedGraph mpg; + implementations.push_back(&mpg); + + for (int imp = 0; imp < implementations.size(); ++imp) { + + MutablePathDeletableHandleGraph &graph = *implementations[imp]; + + auto h1 = graph.create_handle("A"); + auto h2 = graph.create_handle("A"); + auto h3 = graph.create_handle("A"); + + graph.create_edge(h1, h2); + graph.create_edge(h2, h3); + + auto p1 = graph.create_path_handle("1"); + auto p2 = graph.create_path_handle("2"); + auto p3 = graph.create_path_handle("3"); + auto p4 = graph.create_path_handle("4"); + auto p5 = graph.create_path_handle("5"); + + for (const auto &p : {p1, p2, p3, p4, p5}) { + for (auto h : {h1, h2, h3}) { + graph.append_step(p, h); + } + } + + graph.destroy_paths({p1, p3, p4}); + + set paths_seen; + set paths_expected{p2, p5}; + graph.for_each_path_handle([&](const path_handle_t &path) { + assert(!paths_seen.count(path)); + paths_seen.insert(path); + std::vector handles; + std::vector handles_expected{h1, h2, h3}; + for (auto h : graph.scan_path(path)) { + handles.push_back(h); + } + assert(handles == handles_expected); + }); + + assert(paths_seen == paths_expected); + + graph.for_each_handle([&](const handle_t &h) { + set paths; + graph.for_each_step_on_handle(h, [&](const step_handle_t &step) { + auto p = graph.get_path_handle_of_step(step); + assert(!paths.count(p)); + paths.insert(p); + }); + assert(paths_seen == paths_expected); + }); + } + } + + cerr << "DeletableHandleGraph tests successful!" << endl; +} + +void test_mutable_path_handle_graphs() { + + vector implementations; + + PackedGraph pg; + implementations.push_back(&pg); + + HashGraph hg; + implementations.push_back(&hg); + + MappedPackedGraph mpg; + implementations.push_back(&mpg); + + for (MutablePathDeletableHandleGraph *implementation : implementations) { + + auto check_path = [&](MutablePathDeletableHandleGraph &graph, + const path_handle_t &p, + const vector &steps) { + assert(graph.get_step_count(p) == steps.size()); + + // Make sure steps connect back to the path + step_handle_t begin_step = graph.path_begin(p); + step_handle_t end_step = graph.path_end(p); + assert(graph.get_path_handle_of_step(begin_step) == p); + assert(graph.get_path_handle_of_step(end_step) == p); + + step_handle_t step = graph.path_begin(p); + for (int i = 0; i < steps.size(); i++) { + auto here = graph.get_handle_of_step(step); + assert(graph.get_path_handle_of_step(step) == p); + assert(graph.get_handle_of_step(step) == steps[i]); + + if (graph.get_is_circular(p)) { + assert(graph.has_next_step(step)); + assert(graph.has_previous_step(step)); + } else { + assert(graph.has_next_step(step) == i + 1 < steps.size()); + assert(graph.has_previous_step(step) == i > 0); + } + + step = graph.get_next_step(step); + } + + if (graph.get_is_circular(p) && !graph.is_empty(p)) { + assert(step == graph.path_begin(p)); + } else { + assert(step == graph.path_end(p)); + } + + step = graph.path_back(p); + + for (int i = steps.size() - 1; i >= 0; i--) { + + assert(graph.get_path_handle_of_step(step) == p); + assert(graph.get_handle_of_step(step) == steps[i]); + + if (graph.get_is_circular(p)) { + assert(graph.has_next_step(step)); + assert(graph.has_previous_step(step)); + } else { + assert(graph.has_next_step(step) == i + 1 < steps.size()); + assert(graph.has_previous_step(step) == i > 0); + } + + step = graph.get_previous_step(step); + } + + if (graph.get_is_circular(p) && !graph.is_empty(p)) { + assert(step == graph.path_back(p)); + } else { + assert(step == graph.path_front_end(p)); + } + }; + + auto check_flips = [&](MutablePathDeletableHandleGraph &graph, + const path_handle_t &p, + const vector &steps) { + auto flipped = steps; + for (size_t i = 0; i < steps.size(); i++) { + graph.apply_orientation(graph.flip(graph.forward(flipped[i]))); + flipped[i] = graph.flip(flipped[i]); + check_path(graph, p, flipped); + + graph.apply_orientation(graph.flip(graph.forward(flipped[i]))); + flipped[i] = graph.flip(flipped[i]); + check_path(graph, p, flipped); + } + }; + + MutablePathDeletableHandleGraph &graph = *implementation; + + handle_t h1 = graph.create_handle("AC"); + handle_t h2 = graph.create_handle("CAGTGA"); + handle_t h3 = graph.create_handle("GT"); + + graph.create_edge(h1, h2); + graph.create_edge(h2, h3); + graph.create_edge(h1, graph.flip(h2)); + graph.create_edge(graph.flip(h2), h3); + + assert(!graph.has_path("1")); + assert(graph.get_path_count() == 0); + + path_handle_t p1 = graph.create_path_handle("1"); + + assert(graph.has_path("1")); + assert(graph.get_path_count() == 1); + assert(graph.get_path_handle("1") == p1); + assert(graph.get_path_name(p1) == "1"); + assert(graph.get_step_count(p1) == 0); + assert(graph.is_empty(p1)); + + graph.append_step(p1, h1); + + assert(graph.get_step_count(p1) == 1); + assert(!graph.is_empty(p1)); + + graph.append_step(p1, h2); + graph.append_step(p1, h3); + + assert(graph.get_step_count(p1) == 3); + + // graph can traverse a path + check_path(graph, p1, {h1, h2, h3}); + + // graph preserves paths when reversing nodes + check_flips(graph, p1, {h1, h2, h3}); + + // make a circular path + path_handle_t p2 = graph.create_path_handle("2", true); + assert(graph.get_path_count() == 2); + + graph.append_step(p2, h1); + graph.append_step(p2, graph.flip(h2)); + graph.append_step(p2, h3); + + check_path(graph, p2, {h1, graph.flip(h2), h3}); + + // graph can query steps of a node on paths + + bool found1 = false, found2 = false; + vector steps = graph.steps_of_handle(h1); + for (auto &step : steps) { + if (graph.get_path_handle_of_step(step) == p1 && + graph.get_handle_of_step(step) == h1) { + found1 = true; + } else if (graph.get_path_handle_of_step(step) == p2 && + graph.get_handle_of_step(step) == h1) { + found2 = true; + } else { + assert(false); + } + } + assert(found1); + assert(found2); + found1 = found2 = false; + + steps = graph.steps_of_handle(h1, true); + for (auto &step : steps) { + if (graph.get_path_handle_of_step(step) == p1 && + graph.get_handle_of_step(step) == h1) { + found1 = true; + } else if (graph.get_path_handle_of_step(step) == p2 && + graph.get_handle_of_step(step) == h1) { + found2 = true; + } else { + assert(false); + } + } + assert(found1); + assert(found2); + found1 = found2 = false; + + steps = graph.steps_of_handle(graph.flip(h1), true); + for (auto &step : steps) { + assert(false); + } + + steps = graph.steps_of_handle(h2, true); + for (auto &step : steps) { + if (graph.get_path_handle_of_step(step) == p1 && + graph.get_handle_of_step(step) == h2) { + found1 = true; + } else { + assert(false); + } + } + steps = graph.steps_of_handle(graph.flip(h2), true); + for (auto &step : steps) { + if (graph.get_path_handle_of_step(step) == p2 && + graph.get_handle_of_step(step) == graph.flip(h2)) { + found2 = true; + } else { + assert(false); + } + } + assert(found1); + assert(found2); + found1 = found2 = false; + + vector segments = graph.divide_handle(h2, {size_t(2), size_t(4)}); + + // graph preserves paths when dividing nodes + + check_path(graph, p1, {h1, segments[0], segments[1], segments[2], h3}); + check_path(graph, p2, + {h1, graph.flip(segments[2]), graph.flip(segments[1]), + graph.flip(segments[0]), h3}); + + path_handle_t p3 = graph.create_path_handle("3"); + graph.append_step(p3, h1); + graph.append_step(p3, segments[0]); + + assert(graph.has_path("3")); + assert(graph.get_path_count() == 3); + + // graph can toggle circularity + + graph.for_each_path_handle([&](const path_handle_t &p) { + vector steps; + + for (handle_t h : graph.scan_path(p)) { + steps.push_back(h); + } + + bool starting_circularity = graph.get_is_circular(p); + + // make every transition occur + for (bool circularity : {true, true, false, false, true}) { + graph.set_circularity(p, circularity); + assert(graph.get_is_circular(p) == circularity); + check_path(graph, p, steps); + } + + graph.set_circularity(p, starting_circularity); + }); + + // graph can destroy paths + + graph.destroy_path(p3); + + assert(!graph.has_path("3")); + assert(graph.get_path_count() == 2); + + bool found3 = false; + + graph.for_each_path_handle([&](const path_handle_t &p) { + if (graph.get_path_name(p) == "1") { + found1 = true; + } else if (graph.get_path_name(p) == "2") { + found2 = true; + } else if (graph.get_path_name(p) == "3") { + found3 = true; + } else { + assert(false); + } + }); + + assert(found1); + assert(found2); + assert(!found3); + + // check flips to see if membership records are still functional + check_flips(graph, p1, {h1, segments[0], segments[1], segments[2], h3}); + check_flips(graph, p2, + {h1, graph.flip(segments[2]), graph.flip(segments[1]), + graph.flip(segments[0]), h3}); + + graph.destroy_path(p1); + + assert(!graph.has_path("1")); + assert(graph.get_path_count() == 1); + + found1 = found2 = found3 = false; + + graph.for_each_path_handle([&](const path_handle_t &p) { + if (graph.get_path_name(p) == "1") { + found1 = true; + } else if (graph.get_path_name(p) == "2") { + found2 = true; + } else if (graph.get_path_name(p) == "3") { + found3 = true; + } else { + assert(false); + } + }); + + assert(!found1); + assert(found2); + assert(!found3); + + // check flips to see if membership records are still functional + check_flips(graph, p2, + {h1, graph.flip(segments[2]), graph.flip(segments[1]), + graph.flip(segments[0]), h3}); + + // make a path to rewrite + path_handle_t p4 = graph.create_path_handle("4"); + graph.prepend_step(p4, h3); + graph.prepend_step(p4, segments[2]); + graph.prepend_step(p4, segments[1]); + graph.prepend_step(p4, segments[0]); + graph.prepend_step(p4, h1); + + check_flips(graph, p4, {h1, segments[0], segments[1], segments[2], h3}); + + auto check_rewritten_segment = + [&](const pair &new_segment, + const vector &steps) { + int i = 0; + for (auto step = new_segment.first; step != new_segment.second; + step = graph.get_next_step(step)) { + assert(graph.get_handle_of_step(step) == steps[i]); + i++; + } + assert(i == steps.size()); + }; + + // rewrite the middle portion of a path + + step_handle_t s1 = graph.get_next_step(graph.path_begin(p4)); + step_handle_t s2 = + graph.get_next_step(graph.get_next_step(graph.get_next_step(s1))); + + auto new_segment = + graph.rewrite_segment(s1, s2, + {graph.flip(segments[2]), graph.flip(segments[1]), + graph.flip(segments[0])}); + + check_flips(graph, p4, + {h1, graph.flip(segments[2]), graph.flip(segments[1]), + graph.flip(segments[0]), h3}); + check_rewritten_segment(new_segment, + {graph.flip(segments[2]), graph.flip(segments[1]), + graph.flip(segments[0])}); + + // rewrite around the end of a circular path to delete + + graph.create_edge(h3, h1); + graph.create_edge(segments[2], segments[0]); + graph.set_circularity(p4, true); + + s1 = graph.get_previous_step(graph.path_begin(p4)); + s2 = graph.get_next_step(graph.path_begin(p4)); + assert(s2 != graph.path_end(p4)); + + new_segment = graph.rewrite_segment(s1, s2, vector()); + // The end we get should be the same as the end we sent, since it is + // exclusive + assert(new_segment.second == s2); + + check_flips(graph, p4, + {graph.flip(segments[2]), graph.flip(segments[1]), + graph.flip(segments[0])}); + check_rewritten_segment(new_segment, vector()); + + // add into an empty slot + + new_segment = graph.rewrite_segment(new_segment.first, new_segment.second, + {graph.flip(h1), graph.flip(h3)}); + + check_flips(graph, p4, + {graph.flip(h1), graph.flip(h3), graph.flip(segments[2]), + graph.flip(segments[1]), graph.flip(segments[0])}); + check_rewritten_segment(new_segment, {graph.flip(h1), graph.flip(h3)}); + } + + { + vector< + pair> + implementations; + + // Add implementations + + HashGraph hg, hg2; + implementations.push_back(make_pair(&hg, &hg2)); + + PackedGraph pg, pg2; + implementations.push_back(make_pair(&pg, &pg2)); + + MappedPackedGraph mpg, mpg2; + implementations.push_back(make_pair(&mpg, &mpg2)); + + // And test them + for (int imp = 0; imp < implementations.size(); ++imp) { + for (bool backwards : {false, true}) { + + MutablePathMutableHandleGraph *g = backwards + ? implementations[imp].first + : implementations[imp].second; + + assert(g->get_node_count() == 0); + + handle_t handle = g->create_handle("TTATATTCCAACTCTCTG"); + if (backwards) { + handle = g->flip(handle); + } + path_handle_t path_handle = g->create_path_handle("Path"); + g->append_step(path_handle, handle); + string seq = g->get_sequence(handle); + vector true_parts = {seq.substr(0, 1), seq.substr(1, 4), + seq.substr(5, 5), seq.substr(10)}; + + // Should get (C,AGAG,AGTTG,GAATATAA) (forward) + // Should get (T,TATA,TTCCA,ACTCTCTG) (reverse) + auto parts = g->divide_handle(handle, {1, 5, 10}); + assert(parts.size() == true_parts.size()); + for (int i = 0; i < parts.size(); ++i) { + assert(g->get_sequence(parts[i]) == true_parts[i]); + assert(g->get_is_reverse(parts[i]) == backwards); + } + + vector steps; + g->for_each_step_in_path(path_handle, [&](step_handle_t step_handle) { + steps.push_back(g->get_handle_of_step(step_handle)); + }); + assert(steps.size() == true_parts.size()); + for (int i = 0; i < parts.size(); ++i) { + assert(g->get_sequence(steps[i]) == true_parts[i]); + assert(g->get_is_reverse(steps[i]) == backwards); + } + } + } + } + + cerr << "MutablePathDeletableHandleGraph tests successful!" << endl; +} + +template void test_packed_vector() { + enum vec_op_t { SET = 0, GET = 1, APPEND = 2, POP = 3, SERIALIZE = 4 }; + + random_device rd; + default_random_engine prng(rd()); + uniform_int_distribution op_distr(0, 4); + + int num_runs = 1000; + int num_ops = 200; + int gets_per_op = 5; + int sets_per_op = 5; + int appends_per_op = 3; + int pops_per_op = 1; + + for (size_t i = 0; i < num_runs; i++) { + + uint64_t next_val = 0; + + vector std_vec; + PackedVectorImpl dyn_vec; + + for (size_t j = 0; j < num_ops; j++) { + + vec_op_t op = (vec_op_t)op_distr(prng); + switch (op) { + case SET: + if (!std_vec.empty()) { + for (size_t k = 0; k < sets_per_op; k++) { + size_t idx = prng() % dyn_vec.size(); + std_vec[idx] = next_val; + dyn_vec.set(idx, next_val); + next_val++; + } + } + + break; + + case GET: + if (!std_vec.empty()) { + for (size_t k = 0; k < gets_per_op; k++) { + size_t idx = prng() % dyn_vec.size(); + assert(std_vec[idx] == dyn_vec.get(idx)); + next_val++; + } + } + + break; + + case APPEND: + for (size_t k = 0; k < appends_per_op; k++) { + std_vec.push_back(next_val); + dyn_vec.push_back(next_val); + next_val++; + } + + break; + + case POP: + if (!std_vec.empty()) { + for (size_t k = 0; k < pops_per_op; k++) { + std_vec.pop_back(); + dyn_vec.pop_back(); + } + } + + break; + + case SERIALIZE: { + stringstream strm; + + dyn_vec.serialize(strm); + strm.seekg(0); + PackedVectorImpl copy_vec(strm); + + assert(copy_vec.size() == dyn_vec.size()); + for (size_t i = 0; i < copy_vec.size(); i++) { + assert(copy_vec.get(i) == dyn_vec.get(i)); + } + break; + } + + default: + break; + } + + assert(std_vec.empty() == dyn_vec.empty()); + assert(std_vec.size() == dyn_vec.size()); + } + } + cerr << "PackedVector (" << typeid(PackedVectorImpl).name() + << ") tests successful!" << endl; +} + +/** + * Generic iterator test function that works with any vector-like container + * (PackedVector, PagedVector, RobustPagedVector, PackedDeque). + * + * Tests ForwardIterator, BidirectionalIterator, RandomAccessIterator, and + * iterator order comparison, but not OutputIterator. + */ +template void test_iterators() { + // ForwardIterator tests + + // Empty iteration + { + VectorLike vec; + assert(vec.begin() == vec.end()); + + size_t count = 0; + for (auto it = vec.begin(); it != vec.end(); ++it) { + count++; + } + assert(count == 0); + } + + // Single element + { + VectorLike vec; + vec.push_back(42); + + assert(vec.begin() != vec.end()); + + auto it = vec.begin(); + assert(*it == 42); + ++it; + assert(it == vec.end()); + } + + // Multiple elements - basic iteration + { + VectorLike vec; + vector expected = {10, 20, 30, 40, 50}; + + for (auto val : expected) { + vec.push_back(val); + } + + // Iterate and compare + size_t idx = 0; + for (auto it = vec.begin(); it != vec.end(); ++it) { + assert(idx < expected.size()); + assert(*it == expected[idx]); + idx++; + } + assert(idx == expected.size()); + } + + // Range-based for loop + { + VectorLike vec; + vector expected = {100, 200, 300, 400, 500, 600, 700, 800}; + + for (auto val : expected) { + vec.push_back(val); + } + + size_t idx = 0; + for (auto val : vec) { + assert(idx < expected.size()); + assert(val == expected[idx]); + idx++; + } + assert(idx == expected.size()); + } + + // Iterator equality and inequality + { + VectorLike vec; + vec.push_back(1); + vec.push_back(2); + vec.push_back(3); + + auto it1 = vec.begin(); + auto it2 = vec.begin(); + assert(it1 == it2); + + ++it2; + assert(it1 != it2); + + ++it1; + assert(it1 == it2); + } + + // std::distance compatibility + { + VectorLike vec; + for (size_t i = 0; i < 15; i++) { + vec.push_back(i); + } + + auto dist = std::distance(vec.begin(), vec.end()); + assert((size_t)dist == vec.size()); + assert((size_t)dist == 15); + } + + // std::find compatibility + { + VectorLike vec; + vec.push_back(10); + vec.push_back(20); + vec.push_back(30); + vec.push_back(40); + vec.push_back(50); + + auto it = std::find(vec.begin(), vec.end(), 30); + assert(it != vec.end()); + assert(*it == 30); + + auto it2 = std::find(vec.begin(), vec.end(), 999); + assert(it2 == vec.end()); + } + + // Const iterator + { + VectorLike vec; + vec.push_back(5); + vec.push_back(15); + vec.push_back(25); + + const VectorLike &const_vec = vec; + + size_t count = 0; + for (auto it = const_vec.begin(); it != const_vec.end(); ++it) { + count++; + } + assert(count == 3); + + auto it = const_vec.begin(); + assert(*it == 5); + ++it; + assert(*it == 15); + ++it; + assert(*it == 25); + } + + // Large container with various patterns + { + VectorLike vec; + random_device rd; + default_random_engine prng(rd()); + uniform_int_distribution val_distr(0, 10000); + + vector expected; + size_t num_elements = 200; + + for (size_t i = 0; i < num_elements; i++) { + uint64_t val = val_distr(prng); + expected.push_back(val); + vec.push_back(val); + } + + size_t idx = 0; + for (auto val : vec) { + assert(val == expected[idx]); + idx++; + } + assert(idx == expected.size()); + } + + // Iteration after modification + { + VectorLike vec; + vec.push_back(1); + vec.push_back(2); + vec.push_back(3); + + // First iteration + size_t count = 0; + for (auto it = vec.begin(); it != vec.end(); ++it) { + count++; + } + assert(count == 3); + + // Modify + vec.push_back(4); + vec.set(0, 100); + + // Second iteration + vector expected = {100, 2, 3, 4}; + size_t idx = 0; + for (auto val : vec) { + assert(val == expected[idx]); + idx++; + } + assert(idx == 4); + } + + // Iterator copy construction + { + VectorLike vec; + vec.push_back(10); + vec.push_back(20); + + auto it1 = vec.begin(); + auto it2(it1); // Copy constructor + + assert(it1 == it2); + assert(*it1 == *it2); + assert(*it1 == 10); + } + + // Iterator assignment + { + VectorLike vec; + vec.push_back(10); + vec.push_back(20); + vec.push_back(30); + + auto it1 = vec.begin(); + auto it2 = vec.begin(); + ++it2; + + assert(*it1 == 10); + assert(*it2 == 20); + + it1 = it2; // Assignment + assert(it1 == it2); + assert(*it1 == 20); + } + + // BidirectionalIterator tests. + { + VectorLike vec; + vec.push_back(10); + vec.push_back(20); + vec.push_back(30); + + auto it1 = vec.begin(); + auto it2 = it1; + ++it2; + auto also_decremented = --it2; + + assert(it2 == it1); + assert(also_decremented == it1); + + it2++; + auto not_decremented = it2--; + + assert(it2 == it1); + assert(not_decremented != it1); + assert(*not_decremented == 20); + + auto it3 = vec.end(); + it3--; + assert(it3 != vec.end()); + assert(*it3 == 30); + } + + // RandomAccessIterator tests + { + VectorLike vec; + vec.push_back(10); + vec.push_back(20); + vec.push_back(30); + + auto it1 = vec.begin(); + auto it2 = it1; + + it1 += 1; + assert(*it1 == 20); + + it1 += 2; + assert(it1 == vec.end()); + + it1 -= 1; + auto it3 = it2 + 2; + assert(it1 == it3); + assert(*it1 == 30); + assert(it2 == vec.begin()); + + auto it4 = it1 - 2; + assert(*it4 == 10); + + assert(*it1 == vec.begin()[2]); + assert(*it4 == vec.begin()[0]); + assert(it4[2] == *it1); + assert(it1[-2] == *it4); + + assert(it1 + -2 == it4); + assert(it4 - -2 == it1); + + it1 += -2; + assert(it1 == it4); + + it1 -= -1; + it4++; + assert(it1 == it4); + } + + // Iterator comparison tests + { + VectorLike vec; + vec.push_back(10); + vec.push_back(20); + vec.push_back(30); + + auto it1 = vec.begin(); + auto it2 = it1; + + assert(it1 >= it2); + assert(it1 <= it2); + assert(!(it1 < it2)); + assert(!(it1 > it2)); + it1++; + + assert(it1 >= it2); + assert(!(it1 <= it2)); + assert(!(it2 >= it1)); + assert(it2 <= it1); + assert(!(it1 < it2)); + assert(it1 > it2); + assert(it2 < it1); + assert(!(it2 > it1)); + } + + // Iterator distance tests + { + VectorLike vec; + vec.push_back(10); + vec.push_back(20); + vec.push_back(30); + + assert(vec.end() - vec.begin() == vec.size()); + + auto it1 = vec.begin(); + auto it2 = it1; + + it1 += 1; + it2 += 2; + + assert(it2 - it1 == 1); + assert(it1 - it2 == -1); + + it1--; + assert(it2 - it1 == 2); + assert(it1 - it2 == -2); + } + + cerr << "Iterator (" << typeid(typename VectorLike::iterator).name() + << ") tests successful!" << endl; +} + +template void test_paged_vector() { + enum vec_op_t { SET = 0, GET = 1, APPEND = 2, POP = 3, SERIALIZE = 4 }; + std::random_device rd; + std::default_random_engine prng(rd()); + std::uniform_int_distribution op_distr(0, 4); + std::uniform_int_distribution val_distr(0, 100); + + int num_runs = 200; + int num_ops = 200; + int gets_per_op = 5; + int sets_per_op = 5; + int appends_per_op = 3; + int pops_per_op = 1; + + for (size_t i = 0; i < num_runs; i++) { + + uint64_t next_val = val_distr(prng); + + std::vector std_vec; + PagedVectorImpl dyn_vec; + + for (size_t j = 0; j < num_ops; j++) { + + vec_op_t op = (vec_op_t)op_distr(prng); + switch (op) { + case SET: + if (!std_vec.empty()) { + for (size_t k = 0; k < sets_per_op; k++) { + size_t idx = prng() % dyn_vec.size(); + std_vec[idx] = next_val; + dyn_vec.set(idx, next_val); + next_val = val_distr(prng); + } + } + + break; + + case GET: + if (!std_vec.empty()) { + for (size_t k = 0; k < gets_per_op; k++) { + size_t idx = prng() % dyn_vec.size(); + assert(std_vec[idx] == dyn_vec.get(idx)); + next_val = val_distr(prng); + } + } + + break; + + case APPEND: + for (size_t k = 0; k < appends_per_op; k++) { + std_vec.push_back(next_val); + dyn_vec.push_back(next_val); + next_val = val_distr(prng); + } + + break; + + case POP: + if (!std_vec.empty()) { + for (size_t k = 0; k < pops_per_op; k++) { + std_vec.pop_back(); + dyn_vec.pop_back(); + } + } + + break; + + case SERIALIZE: { + stringstream strm; + + dyn_vec.serialize(strm); + strm.seekg(0); + PagedVectorImpl copy_vec(strm); + + assert(copy_vec.size() == dyn_vec.size()); + for (size_t i = 0; i < copy_vec.size(); i++) { + assert(copy_vec.get(i) == dyn_vec.get(i)); + } + break; + } + + default: + break; + } + + assert(std_vec.empty() == dyn_vec.empty()); + assert(std_vec.size() == dyn_vec.size()); + } + } + cerr << "PagedVector (" << typeid(PagedVectorImpl).name() + << ") tests successful!" << endl; +} + +void test_packed_deque() { + enum deque_op_t { + SET = 0, + GET = 1, + APPEND_LEFT = 2, + POP_LEFT = 3, + APPEND_RIGHT = 4, + POP_RIGHT = 5, + SERIALIZE = 6 + }; + std::random_device rd; + std::default_random_engine prng(rd()); + std::uniform_int_distribution op_distr(0, 6); + + int num_runs = 1000; + int num_ops = 200; + int gets_per_op = 5; + int sets_per_op = 5; + int appends_per_op = 3; + int pops_per_op = 1; + + for (size_t i = 0; i < num_runs; i++) { + + uint64_t next_val = 0; + + std::deque std_deq; + PackedDeque<> suc_deq; + + for (size_t j = 0; j < num_ops; j++) { + + deque_op_t op = (deque_op_t)op_distr(prng); + switch (op) { + case SET: + if (!std_deq.empty()) { + for (size_t k = 0; k < sets_per_op; k++) { + size_t idx = prng() % std_deq.size(); + std_deq[idx] = next_val; + suc_deq.set(idx, next_val); + next_val++; + } + } + + break; + + case GET: + if (!std_deq.empty()) { + for (size_t k = 0; k < gets_per_op; k++) { + size_t idx = prng() % std_deq.size(); + assert(std_deq[idx] == suc_deq.get(idx)); + next_val++; + } + } + + break; + + case APPEND_LEFT: + for (size_t k = 0; k < appends_per_op; k++) { + std_deq.push_front(next_val); + suc_deq.push_front(next_val); + next_val++; + } + + break; + + case POP_LEFT: + for (size_t k = 0; k < pops_per_op && !std_deq.empty(); k++) { + std_deq.pop_front(); + suc_deq.pop_front(); + } + + break; + + case APPEND_RIGHT: + for (size_t k = 0; k < appends_per_op; k++) { + std_deq.push_back(next_val); + suc_deq.push_back(next_val); + next_val++; + } + + break; + + case POP_RIGHT: + for (size_t k = 0; k < pops_per_op && !std_deq.empty(); k++) { + std_deq.pop_back(); + suc_deq.pop_back(); + } + + break; + + case SERIALIZE: { + stringstream strm; + + suc_deq.serialize(strm); + strm.seekg(0); + PackedDeque<> copy_deq(strm); + + assert(copy_deq.size() == suc_deq.size()); + for (size_t i = 0; i < copy_deq.size(); i++) { + assert(copy_deq.get(i) == suc_deq.get(i)); + } + break; + } + + default: + break; + } + + assert(std_deq.empty() == suc_deq.empty()); + assert(std_deq.size() == suc_deq.size()); + } + } + cerr << "PackedDeque tests successful!" << endl; +} + +void test_packed_set() { + enum set_op_t { INSERT = 0, REMOVE = 1, FIND = 2 }; + + random_device rd; + default_random_engine prng(rd()); + uniform_int_distribution op_distr(0, 2); + + int num_runs = 1000; + int num_ops = 200; + int inserts_per_op = 2; + int prev_inserts_per_op = 1; + int removes_per_op = 1; + int finds_per_op = 5; + + for (size_t i = 0; i < num_runs; i++) { + uint64_t next_val = 0; + + unordered_set std_set; + PackedSet<> packed_set; + + for (size_t j = 0; j < num_ops; j++) { + set_op_t op = (set_op_t)op_distr(prng); + switch (op) { + case INSERT: + + for (size_t k = 0; k < inserts_per_op; ++k) { + packed_set.insert(next_val); + std_set.insert(next_val); + next_val++; + } + for (size_t k = 0; k < prev_inserts_per_op; ++k) { + uint64_t val = prng() % next_val; + packed_set.insert(val); + std_set.insert(val); + } + + break; + + case REMOVE: + if (next_val > 0) { + for (size_t k = 0; k < removes_per_op; ++k) { + uint64_t val = prng() % next_val; + packed_set.remove(val); + std_set.erase(val); + } + } else { + packed_set.remove(0); + packed_set.remove(1); + packed_set.remove(2); + std_set.erase(0); + std_set.erase(1); + std_set.erase(2); + } + + break; + + case FIND: + if (next_val) { + for (size_t k = 0; k < finds_per_op; k++) { + uint64_t val = prng() % next_val; + assert(packed_set.find(val) == (bool)std_set.count(val)); + } + } else { + assert(packed_set.find(0) == (bool)std_set.count(0)); + assert(packed_set.find(1) == (bool)std_set.count(1)); + assert(packed_set.find(2) == (bool)std_set.count(2)); + } + + break; + + // case SERIALIZE: + // { + // stringstream strm; + // + // dyn_vec.serialize(strm); + // strm.seekg(0); + // PackedVector<> copy_vec(strm); + // + // assert(copy_vec.size() == dyn_vec.size()); + // for (size_t i = 0; i < copy_vec.size(); i++) { + // assert(copy_vec.get(i) == dyn_vec.get(i)); + // } + // break; + // } + + default: + break; + } + + assert(std_set.empty() == packed_set.empty()); + assert(std_set.size() == packed_set.size()); + } + } + cerr << "PackedSet tests successful!" << endl; +} + +void test_packed_graph() { + + auto check_path = [&](MutablePathDeletableHandleGraph &graph, + const path_handle_t &p, const vector &steps) { + assert(graph.get_step_count(p) == steps.size()); + + step_handle_t step = graph.path_begin(p); + for (int i = 0; i < steps.size(); i++) { + + assert(graph.get_path_handle_of_step(step) == p); + assert(graph.get_handle_of_step(step) == steps[i]); + + if (graph.get_is_circular(p)) { + assert(graph.has_next_step(step)); + assert(graph.has_previous_step(step)); + } else { + assert(graph.has_next_step(step) == i + 1 < steps.size()); + assert(graph.has_previous_step(step) == i > 0); + } + + step = graph.get_next_step(step); + } + + if (graph.get_is_circular(p) && !graph.is_empty(p)) { + assert(step == graph.path_begin(p)); + } else { + assert(step == graph.path_end(p)); + } + + step = graph.path_back(p); + + for (int i = steps.size() - 1; i >= 0; i--) { + + assert(graph.get_path_handle_of_step(step) == p); + assert(graph.get_handle_of_step(step) == steps[i]); + + if (graph.get_is_circular(p)) { + assert(graph.has_next_step(step)); + assert(graph.has_previous_step(step)); + } else { + assert(graph.has_next_step(step) == i + 1 < steps.size()); + assert(graph.has_previous_step(step) == i > 0); + } + + step = graph.get_previous_step(step); + } + + if (graph.get_is_circular(p) && !graph.is_empty(p)) { + assert(step == graph.path_back(p)); + } else { + assert(step == graph.path_front_end(p)); + } + }; + + auto check_flips = [&](MutablePathDeletableHandleGraph &graph, + const path_handle_t &p, + const vector &steps) { + auto flipped = steps; + for (size_t i = 0; i < steps.size(); i++) { + graph.apply_orientation(graph.flip(graph.forward(flipped[i]))); + flipped[i] = graph.flip(flipped[i]); + check_path(graph, p, flipped); + + graph.apply_orientation(graph.flip(graph.forward(flipped[i]))); + flipped[i] = graph.flip(flipped[i]); + check_path(graph, p, flipped); + } + }; + + // defragmentation + { + PackedGraph graph; + + handle_t h1 = graph.create_handle("ATGTAG"); + handle_t h2 = graph.create_handle("ACCCC"); + handle_t h3 = graph.create_handle("C"); + handle_t h4 = graph.create_handle("ATT"); + handle_t h5 = graph.create_handle("GGCA"); + + graph.create_edge(h1, h2); + graph.create_edge(h1, h3); + graph.create_edge(h2, h3); + graph.create_edge(h3, h5); + graph.create_edge(h3, h4); + graph.create_edge(h4, h5); + + path_handle_t p0 = graph.create_path_handle("0"); + path_handle_t p1 = graph.create_path_handle("1"); + path_handle_t p2 = graph.create_path_handle("2"); + + graph.append_step(p0, h3); + graph.append_step(p0, h4); + graph.append_step(p0, h5); + + graph.append_step(p1, h1); + graph.append_step(p1, h3); + graph.append_step(p1, h5); + + graph.append_step(p2, h1); + graph.append_step(p2, h2); + graph.append_step(p2, h3); + graph.append_step(p2, h4); + graph.append_step(p2, h5); + + graph.destroy_path(p0); + graph.destroy_path(p2); + graph.destroy_handle(h2); + graph.destroy_handle(h4); + + assert(graph.get_sequence(h1) == "ATGTAG"); + assert(graph.get_sequence(h3) == "C"); + assert(graph.get_sequence(h5) == "GGCA"); + + bool found = false; + graph.follow_edges(h1, false, [&](const handle_t &next) { + if (next == h3) { + found = true; + } else { + assert(false); + } + return true; + }); + assert(found); + + found = false; + graph.follow_edges(h3, false, [&](const handle_t &next) { + if (next == h5) { + found = true; + } else { + assert(false); + } + return true; + }); + assert(found); + + check_flips(graph, p1, {h1, h3, h5}); + } + + // tightening vector allocations + { + PackedGraph graph; + handle_t h1 = graph.create_handle("ATGTAG"); + handle_t h2 = graph.create_handle("ACCCC"); + handle_t h3 = graph.create_handle("C"); + handle_t h4 = graph.create_handle("ATT"); + handle_t h5 = graph.create_handle("GGCA"); + + graph.create_edge(h1, h2); + graph.create_edge(h1, h3); + graph.create_edge(h2, h3); + graph.create_edge(h3, h5); + graph.create_edge(h3, h4); + graph.create_edge(h4, h5); + + path_handle_t p0 = graph.create_path_handle("0"); + path_handle_t p1 = graph.create_path_handle("1"); + path_handle_t p2 = graph.create_path_handle("2"); + + graph.append_step(p0, h3); + graph.append_step(p0, h4); + graph.append_step(p0, h5); + + graph.append_step(p1, h1); + graph.append_step(p1, h3); + graph.append_step(p1, h5); + + graph.append_step(p2, h1); + graph.append_step(p2, h2); + graph.append_step(p2, h3); + graph.append_step(p2, h4); + graph.append_step(p2, h5); + + // delete some things, but not enough to trigger defragmentation + graph.destroy_path(p2); + graph.destroy_handle(h2); + // reallocate and compress down to the smaller size + graph.optimize(false); + + assert(graph.get_sequence(h1) == "ATGTAG"); + assert(graph.get_sequence(h3) == "C"); + assert(graph.get_sequence(h4) == "ATT"); + assert(graph.get_sequence(h5) == "GGCA"); + + int count = 0; + bool found1 = false, found2 = false; + graph.follow_edges(h1, false, [&](const handle_t &h) { + if (h == h3) { + found1 = true; + } + count++; + }); + assert(found1); + assert(count == 1); + + count = 0; + found1 = false, found2 = false; + graph.follow_edges(h1, true, [&](const handle_t &h) { count++; }); + assert(count == 0); + + count = 0; + found1 = false, found2 = false; + graph.follow_edges(h3, false, [&](const handle_t &h) { + if (h == h4) { + found1 = true; + } + if (h == h5) { + found2 = true; + } + count++; + }); + assert(found1); + assert(found2); + assert(count == 2); + + count = 0; + found1 = false, found2 = false; + graph.follow_edges(h3, true, [&](const handle_t &h) { + if (h == h1) { + found1 = true; + } + count++; + }); + assert(found1); + assert(count == 1); + + count = 0; + found1 = false, found2 = false; + graph.follow_edges(h4, false, [&](const handle_t &h) { + if (h == h5) { + found1 = true; + } + count++; + }); + assert(found1); + assert(count == 1); + + count = 0; + found1 = false, found2 = false; + graph.follow_edges(h4, true, [&](const handle_t &h) { + if (h == h3) { + found1 = true; + } + count++; + }); + assert(found1); + assert(count == 1); + + count = 0; + found1 = false, found2 = false; + graph.follow_edges(h5, false, [&](const handle_t &h) { count++; }); + assert(count == 0); + + count = 0; + found1 = false, found2 = false; + graph.follow_edges(h5, true, [&](const handle_t &h) { + if (h == h3) { + found1 = true; + } else if (h == h4) { + found2 = true; + } + count++; + }); + assert(found1); + assert(found2); + assert(count == 2); + + check_flips(graph, p0, {h3, h4, h5}); + check_flips(graph, p1, {h1, h3, h5}); + } + + // optimizing with id reassignment + { + PackedGraph graph; + handle_t h1 = graph.create_handle("ATGTAG"); + handle_t h2 = graph.create_handle("ACCCC"); + handle_t h3 = graph.create_handle("C"); + handle_t h4 = graph.create_handle("ATT"); + handle_t h5 = graph.create_handle("GGCA"); + + graph.create_edge(h1, h2); + graph.create_edge(h1, h3); + graph.create_edge(h2, h3); + graph.create_edge(h3, h5); + graph.create_edge(h3, h4); + graph.create_edge(h4, h5); + + path_handle_t p0 = graph.create_path_handle("0"); + path_handle_t p1 = graph.create_path_handle("1"); + path_handle_t p2 = graph.create_path_handle("2"); + + graph.append_step(p0, h3); + graph.append_step(p0, h4); + graph.append_step(p0, h5); + + graph.append_step(p1, h1); + graph.append_step(p1, h3); + graph.append_step(p1, h5); + + graph.append_step(p2, h1); + graph.append_step(p2, h2); + graph.append_step(p2, h3); + graph.append_step(p2, h4); + graph.append_step(p2, h5); + + // delete some things, but not enough to trigger defragmentation + graph.destroy_path(p2); + graph.destroy_handle(h2); + // reallocate and compress down to the smaller size, reassigning IDs + graph.optimize(true); + set seen_ids; + + int count = 0; + bool found1 = false, found2 = false, found3 = false, found4 = false; + graph.for_each_handle([&](const handle_t &handle) { + if (graph.get_sequence(handle) == "ATGTAG") { + h1 = handle; + found1 = true; + } else if (graph.get_sequence(handle) == "C") { + h3 = handle; + found2 = true; + } else if (graph.get_sequence(handle) == "ATT") { + h4 = handle; + found3 = true; + } else if (graph.get_sequence(handle) == "GGCA") { + h5 = handle; + found4 = true; + } else { + assert(false); + } + count++; + + seen_ids.insert(graph.get_id(handle)); + + assert(graph.get_id(handle) >= 1); + assert(graph.get_id(handle) <= 4); + }); + + assert(found1); + assert(found2); + assert(found3); + assert(found4); + assert(count == 4); + assert(seen_ids.size() == 4); + + count = 0; + found1 = found2 = found3 = found4 = false; + + graph.follow_edges(h1, false, [&](const handle_t &h) { + if (h == h3) { + found1 = true; + } + count++; + }); + assert(found1); + assert(count == 1); + + count = 0; + found1 = false, found2 = false; + graph.follow_edges(h1, true, [&](const handle_t &h) { count++; }); + assert(count == 0); + + count = 0; + found1 = false, found2 = false; + graph.follow_edges(h3, false, [&](const handle_t &h) { + if (h == h4) { + found1 = true; + } + if (h == h5) { + found2 = true; + } + count++; + }); + assert(found1); + assert(found2); + assert(count == 2); + + count = 0; + found1 = false, found2 = false; + graph.follow_edges(h3, true, [&](const handle_t &h) { + if (h == h1) { + found1 = true; + } + count++; + }); + assert(found1); + assert(count == 1); + + count = 0; + found1 = false, found2 = false; + graph.follow_edges(h4, false, [&](const handle_t &h) { + if (h == h5) { + found1 = true; + } + count++; + }); + assert(found1); + assert(count == 1); + + count = 0; + found1 = false, found2 = false; + graph.follow_edges(h4, true, [&](const handle_t &h) { + if (h == h3) { + found1 = true; + } + count++; + }); + assert(found1); + assert(count == 1); + + count = 0; + found1 = false, found2 = false; + graph.follow_edges(h5, false, [&](const handle_t &h) { count++; }); + assert(count == 0); + + count = 0; + found1 = false, found2 = false; + graph.follow_edges(h5, true, [&](const handle_t &h) { + if (h == h3) { + found1 = true; + } else if (h == h4) { + found2 = true; + } + count++; + }); + assert(found1); + assert(found2); + assert(count == 2); + + check_flips(graph, p0, {h3, h4, h5}); + check_flips(graph, p1, {h1, h3, h5}); + } + + cerr << "PackedGraph tests successful!" << endl; +} + +void test_multithreaded_overlay_construction() { + HashGraph graph; + + std::string node_content = "GATTACACATTAG"; + size_t node_count = 1000; + size_t true_path_length = node_count * node_content.size(); + size_t path_count = 10; + // We should coalesce 2 paths into each index. + size_t steps_per_index = node_count * 2; + + // Make a long linear graph + std::vector nodes; + for (size_t i = 0; i < node_count; i++) { + nodes.push_back(graph.create_handle(node_content)); + if (nodes.size() > 1) { + graph.create_edge(nodes[nodes.size() - 2], nodes[nodes.size() - 1]); + } + } + + // Make a bunch of paths and keep their names + std::vector paths; + for (size_t i = 0; i < path_count; i++) { + string path_name = "path" + std::to_string(i); + paths.push_back(path_name); + path_handle_t path_handle = graph.create_path_handle(path_name); + for (auto &visit : nodes) { + graph.append_step(path_handle, visit); + } + } + + // Back up the thread count we have been using. + int backup_thread_count = omp_get_max_threads(); + for (int thread_count = 1; thread_count <= 4; thread_count++) { + // Try this number of threads + omp_set_num_threads(thread_count); + + // Make an overlay with this many threads for construction + PackedPositionOverlay overlay(&graph, {}, steps_per_index); + + // Make sure it is right + for (auto &path_name : paths) { + assert(overlay.has_path(path_name)); + path_handle_t path_handle = overlay.get_path_handle(path_name); + // Make sure they have the right name and length. + assert(overlay.get_path_name(path_handle) == path_name); + assert(overlay.get_path_length(path_handle) == true_path_length); + for (size_t i = 0; i < true_path_length; i++) { + // For each position + // Figure out what node and orientation it should have. + handle_t true_underlying_handle = nodes.at(i / node_content.size()); + // Find its step + step_handle_t seen_step = overlay.get_step_at_position(path_handle, i); + // Make sure it is on the right path + assert(overlay.get_path_handle_of_step(seen_step) == path_handle); + // Make sure it is the right node + handle_t observed_handle = overlay.get_handle_of_step(seen_step); + assert(overlay.get_underlying_handle(observed_handle) == + true_underlying_handle); + // Make sure the step is at the right place + size_t true_step_start = i - (i % node_content.size()); + assert(overlay.get_position_of_step(seen_step) == true_step_start); + } + } + } + // Go back to the default thread count. + omp_set_num_threads(backup_thread_count); + + cerr << "Multithreaded PackedPositionOverlay tests successful!" << endl; +} + +void test_path_position_overlays() { + + vector implementations; + + HashGraph hg; + implementations.push_back(&hg); + + PackedGraph pg; + implementations.push_back(&pg); + + MappedPackedGraph mpg; + implementations.push_back(&mpg); + + for (MutablePathDeletableHandleGraph *implementation : implementations) { + + MutablePathDeletableHandleGraph &graph = *implementation; + + handle_t h1 = graph.create_handle("AAA"); + handle_t h2 = graph.create_handle("A"); + handle_t h3 = graph.create_handle("T"); + handle_t h4 = graph.create_handle("AAAAA"); + + graph.create_edge(h1, h2); + graph.create_edge(h1, h3); + graph.create_edge(h2, h4); + graph.create_edge(h3, h4); + + path_handle_t p1 = graph.create_path_handle("p1"); + step_handle_t s1 = graph.append_step(p1, h1); + step_handle_t s2 = graph.append_step(p1, h2); + step_handle_t s3 = graph.append_step(p1, h4); + + // static position overlays + { + vector overlays; + + PositionOverlay basic_overlay(&graph); + PackedPositionOverlay packed_overlay(&graph); + + overlays.push_back(&basic_overlay); + overlays.push_back(&packed_overlay); + + for (PathPositionHandleGraph *implementation : overlays) { + PathPositionHandleGraph &overlay = *implementation; + + assert(overlay.get_path_length(p1) == 9); + + assert(overlay.get_position_of_step(s1) == 0); + assert(overlay.get_position_of_step(s2) == 3); + assert(overlay.get_position_of_step(s3) == 4); + + assert(overlay.get_step_at_position(p1, 0) == s1); + assert(overlay.get_step_at_position(p1, 1) == s1); + assert(overlay.get_step_at_position(p1, 2) == s1); + assert(overlay.get_step_at_position(p1, 3) == s2); + assert(overlay.get_step_at_position(p1, 4) == s3); + assert(overlay.get_step_at_position(p1, 5) == s3); + assert(overlay.get_step_at_position(p1, 6) == s3); + assert(overlay.get_step_at_position(p1, 7) == s3); + assert(overlay.get_step_at_position(p1, 8) == s3); + assert(overlay.get_step_at_position(p1, 9) == overlay.path_end(p1)); + assert(overlay.get_step_at_position(p1, 10) == overlay.path_end(p1)); + assert(overlay.get_step_at_position(p1, 1000) == overlay.path_end(p1)); + } } - - cerr << "PackedGraph tests successful!" << endl; -} -void test_multithreaded_overlay_construction() { - HashGraph graph; - - std::string node_content = "GATTACACATTAG"; - size_t node_count = 1000; - size_t true_path_length = node_count * node_content.size(); - size_t path_count = 10; - // We should coalesce 2 paths into each index. - size_t steps_per_index = node_count * 2; - - // Make a long linear graph - std::vector nodes; - for (size_t i = 0; i < node_count; i++) { - nodes.push_back(graph.create_handle(node_content)); - if (nodes.size() > 1) { - graph.create_edge(nodes[nodes.size() - 2], nodes[nodes.size() - 1]); - } - } - - // Make a bunch of paths and keep their names - std::vector paths; - for (size_t i = 0; i < path_count; i++) { - string path_name = "path" + std::to_string(i); - paths.push_back(path_name); - path_handle_t path_handle = graph.create_path_handle(path_name); - for (auto& visit : nodes) { - graph.append_step(path_handle, visit); - } - } - - // Back up the thread count we have been using. - int backup_thread_count = omp_get_max_threads(); - for (int thread_count = 1; thread_count <= 4; thread_count++) { - // Try this number of threads - omp_set_num_threads(thread_count); - - // Make an overlay with this many threads for construction - PackedPositionOverlay overlay(&graph, {}, steps_per_index); - - // Make sure it is right - for (auto& path_name : paths) { - assert(overlay.has_path(path_name)); - path_handle_t path_handle = overlay.get_path_handle(path_name); - // Make sure they have the right name and length. - assert(overlay.get_path_name(path_handle) == path_name); - assert(overlay.get_path_length(path_handle) == true_path_length); - for (size_t i = 0; i < true_path_length; i++) { - // For each position - // Figure out what node and orientation it should have. - handle_t true_underlying_handle = nodes.at(i / node_content.size()); - // Find its step - step_handle_t seen_step = overlay.get_step_at_position(path_handle, i); - // Make sure it is on the right path - assert(overlay.get_path_handle_of_step(seen_step) == path_handle); - // Make sure it is the right node - handle_t observed_handle = overlay.get_handle_of_step(seen_step); - assert(overlay.get_underlying_handle(observed_handle) == true_underlying_handle); - // Make sure the step is at the right place - size_t true_step_start = i - (i % node_content.size()); - assert(overlay.get_position_of_step(seen_step) == true_step_start); - } - } - + // mutable position overlay + { + MutablePositionOverlay overlay(&graph); + + handle_t h5 = overlay.create_handle("AAAA"); + + overlay.create_edge(h4, h5); + overlay.create_edge(h5, h5); + + step_handle_t s4 = overlay.append_step(p1, h5); + + assert(overlay.get_path_length(p1) == 13); + + assert(overlay.get_position_of_step(s4) == 9); + + assert(overlay.get_step_at_position(p1, 9) == s4); + assert(overlay.get_step_at_position(p1, 10) == s4); + assert(overlay.get_step_at_position(p1, 11) == s4); + assert(overlay.get_step_at_position(p1, 12) == s4); + assert(overlay.get_step_at_position(p1, 13) == overlay.path_end(p1)); + assert(overlay.get_step_at_position(p1, 14) == overlay.path_end(p1)); + assert(overlay.get_step_at_position(p1, 1000) == overlay.path_end(p1)); + + step_handle_t s5 = overlay.append_step(p1, h5); + + assert(overlay.get_path_length(p1) == 17); + + assert(overlay.get_position_of_step(s5) == 13); + + assert(overlay.get_step_at_position(p1, 13) == s5); + assert(overlay.get_step_at_position(p1, 14) == s5); + assert(overlay.get_step_at_position(p1, 15) == s5); + assert(overlay.get_step_at_position(p1, 16) == s5); + assert(overlay.get_step_at_position(p1, 17) == overlay.path_end(p1)); + assert(overlay.get_step_at_position(p1, 18) == overlay.path_end(p1)); + assert(overlay.get_step_at_position(p1, 1000) == overlay.path_end(p1)); + + path_handle_t p2 = overlay.create_path_handle("p2"); + + assert(overlay.get_path_length(p2) == 0); + + step_handle_t s6 = overlay.prepend_step(p2, h3); + + assert(overlay.get_path_length(p2) == 1); + + assert(overlay.get_position_of_step(s6) == 0); + + assert(overlay.get_step_at_position(p2, 0) == s6); + assert(overlay.get_step_at_position(p2, 1) == overlay.path_end(p2)); + assert(overlay.get_step_at_position(p2, 2) == overlay.path_end(p2)); + assert(overlay.get_step_at_position(p2, 1000) == overlay.path_end(p2)); + + step_handle_t s7 = overlay.prepend_step(p2, h1); + + assert(overlay.get_path_length(p2) == 4); + + assert(overlay.get_position_of_step(s7) == 0); + assert(overlay.get_position_of_step(s6) == 3); + + assert(overlay.get_step_at_position(p2, 0) == s7); + assert(overlay.get_step_at_position(p2, 1) == s7); + assert(overlay.get_step_at_position(p2, 2) == s7); + assert(overlay.get_step_at_position(p2, 3) == s6); + assert(overlay.get_step_at_position(p2, 4) == overlay.path_end(p2)); + assert(overlay.get_step_at_position(p2, 5) == overlay.path_end(p2)); + assert(overlay.get_step_at_position(p2, 1000) == overlay.path_end(p2)); + + handle_t h2_flip = overlay.apply_orientation(overlay.flip(h2)); + assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 3)) == + overlay.flip(h2_flip)); + + vector offs_1{1}; + auto parts_1 = overlay.divide_handle(overlay.flip(h1), offs_1); + assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 0)) == + overlay.flip(parts_1[1])); + assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 1)) == + overlay.flip(parts_1[1])); + assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 2)) == + overlay.flip(parts_1[0])); + assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 3)) == + overlay.flip(h2_flip)); + assert(overlay.get_handle_of_step(overlay.get_step_at_position(p2, 0)) == + overlay.flip(parts_1[1])); + assert(overlay.get_handle_of_step(overlay.get_step_at_position(p2, 1)) == + overlay.flip(parts_1[1])); + assert(overlay.get_handle_of_step(overlay.get_step_at_position(p2, 2)) == + overlay.flip(parts_1[0])); + assert(overlay.get_handle_of_step(overlay.get_step_at_position(p2, 3)) == + h3); + + vector offs_2{1, 3}; + auto parts_2 = overlay.divide_handle(h5, offs_2); + assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 9)) == + parts_2[0]); + assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 10)) == + parts_2[1]); + assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 11)) == + parts_2[1]); + assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 12)) == + parts_2[2]); + assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 13)) == + parts_2[0]); + assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 14)) == + parts_2[1]); + assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 15)) == + parts_2[1]); + assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 16)) == + parts_2[2]); + assert(overlay.get_step_at_position(p1, 17) == overlay.path_end(p1)); + assert(overlay.get_step_at_position(p1, 18) == overlay.path_end(p1)); + assert(overlay.get_step_at_position(p1, 1000) == overlay.path_end(p1)); } - // Go back to the default thread count. - omp_set_num_threads(backup_thread_count); - - cerr << "Multithreaded PackedPositionOverlay tests successful!" << endl; + } + cerr << "PathPositionOverlay tests successful!" << endl; } -void test_path_position_overlays() { - - vector implementations; +void test_packed_reference_path_overlay() { - HashGraph hg; - implementations.push_back(&hg); + vector implementations; - PackedGraph pg; - implementations.push_back(&pg); - - MappedPackedGraph mpg; - implementations.push_back(&mpg); - - for (MutablePathDeletableHandleGraph* implementation : implementations) { - - MutablePathDeletableHandleGraph& graph = *implementation; - - handle_t h1 = graph.create_handle("AAA"); - handle_t h2 = graph.create_handle("A"); - handle_t h3 = graph.create_handle("T"); - handle_t h4 = graph.create_handle("AAAAA"); - - graph.create_edge(h1, h2); - graph.create_edge(h1, h3); - graph.create_edge(h2, h4); - graph.create_edge(h3, h4); - - path_handle_t p1 = graph.create_path_handle("p1"); - step_handle_t s1 = graph.append_step(p1, h1); - step_handle_t s2 = graph.append_step(p1, h2); - step_handle_t s3 = graph.append_step(p1, h4); - - // static position overlays - { - vector overlays; - - PositionOverlay basic_overlay(&graph); - PackedPositionOverlay packed_overlay(&graph); - - overlays.push_back(&basic_overlay); - overlays.push_back(&packed_overlay); - - for (PathPositionHandleGraph* implementation : overlays) { - PathPositionHandleGraph& overlay = *implementation; - - assert(overlay.get_path_length(p1) == 9); - - assert(overlay.get_position_of_step(s1) == 0); - assert(overlay.get_position_of_step(s2) == 3); - assert(overlay.get_position_of_step(s3) == 4); - - assert(overlay.get_step_at_position(p1, 0) == s1); - assert(overlay.get_step_at_position(p1, 1) == s1); - assert(overlay.get_step_at_position(p1, 2) == s1); - assert(overlay.get_step_at_position(p1, 3) == s2); - assert(overlay.get_step_at_position(p1, 4) == s3); - assert(overlay.get_step_at_position(p1, 5) == s3); - assert(overlay.get_step_at_position(p1, 6) == s3); - assert(overlay.get_step_at_position(p1, 7) == s3); - assert(overlay.get_step_at_position(p1, 8) == s3); - assert(overlay.get_step_at_position(p1, 9) == overlay.path_end(p1)); - assert(overlay.get_step_at_position(p1, 10) == overlay.path_end(p1)); - assert(overlay.get_step_at_position(p1, 1000) == overlay.path_end(p1)); - } + HashGraph hg; + implementations.push_back(&hg); + + PackedGraph pg; + implementations.push_back(&pg); + + MappedPackedGraph mpg; + implementations.push_back(&mpg); + + for (MutablePathDeletableHandleGraph *implementation : implementations) { + + MutablePathDeletableHandleGraph &graph = *implementation; + + handle_t h1 = graph.create_handle("AAA"); + handle_t h2 = graph.create_handle("A"); + handle_t h3 = graph.create_handle("T"); + handle_t h4 = graph.create_handle("AAAAA"); + + graph.create_edge(h1, h2); + graph.create_edge(h1, h3); + graph.create_edge(h2, h4); + graph.create_edge(h3, h4); + + path_handle_t p1 = graph.create_path_handle("p1"); + step_handle_t s1 = graph.append_step(p1, h1); + step_handle_t s2 = graph.append_step(p1, h2); + step_handle_t s3 = graph.append_step(p1, h4); + + path_handle_t p2 = graph.create_path_handle("p2"); + step_handle_t s2_1 = graph.append_step(p2, graph.flip(h4)); + step_handle_t s2_2 = graph.append_step(p2, graph.flip(h3)); + step_handle_t s2_3 = graph.append_step(p2, graph.flip(h1)); + + { + + PackedReferencePathOverlay overlay(&graph); + + assert(overlay.get_path_length(p1) == 9); + + assert(overlay.get_position_of_step(s1) == 0); + assert(overlay.get_position_of_step(s2) == 3); + assert(overlay.get_position_of_step(s3) == 4); + + assert(overlay.get_step_at_position(p1, 0) == s1); + assert(overlay.get_step_at_position(p1, 1) == s1); + assert(overlay.get_step_at_position(p1, 2) == s1); + assert(overlay.get_step_at_position(p1, 3) == s2); + assert(overlay.get_step_at_position(p1, 4) == s3); + assert(overlay.get_step_at_position(p1, 5) == s3); + assert(overlay.get_step_at_position(p1, 6) == s3); + assert(overlay.get_step_at_position(p1, 7) == s3); + assert(overlay.get_step_at_position(p1, 8) == s3); + assert(overlay.get_step_at_position(p1, 9) == overlay.path_end(p1)); + assert(overlay.get_step_at_position(p1, 10) == overlay.path_end(p1)); + assert(overlay.get_step_at_position(p1, 1000) == overlay.path_end(p1)); + + bool found1 = false; + bool found2 = false; + overlay.for_each_step_on_handle(h1, [&](const step_handle_t &s) { + if (s == s1) { + found1 = true; + } else if (s == s2_3) { + found2 = true; + } else { + assert(false); } - - - // mutable position overlay - { - MutablePositionOverlay overlay(&graph); - - handle_t h5 = overlay.create_handle("AAAA"); - - overlay.create_edge(h4, h5); - overlay.create_edge(h5, h5); - - step_handle_t s4 = overlay.append_step(p1, h5); - - assert(overlay.get_path_length(p1) == 13); - - assert(overlay.get_position_of_step(s4) == 9); - - assert(overlay.get_step_at_position(p1, 9) == s4); - assert(overlay.get_step_at_position(p1, 10) == s4); - assert(overlay.get_step_at_position(p1, 11) == s4); - assert(overlay.get_step_at_position(p1, 12) == s4); - assert(overlay.get_step_at_position(p1, 13) == overlay.path_end(p1)); - assert(overlay.get_step_at_position(p1, 14) == overlay.path_end(p1)); - assert(overlay.get_step_at_position(p1, 1000) == overlay.path_end(p1)); - - step_handle_t s5 = overlay.append_step(p1, h5); - - assert(overlay.get_path_length(p1) == 17); - - assert(overlay.get_position_of_step(s5) == 13); - - assert(overlay.get_step_at_position(p1, 13) == s5); - assert(overlay.get_step_at_position(p1, 14) == s5); - assert(overlay.get_step_at_position(p1, 15) == s5); - assert(overlay.get_step_at_position(p1, 16) == s5); - assert(overlay.get_step_at_position(p1, 17) == overlay.path_end(p1)); - assert(overlay.get_step_at_position(p1, 18) == overlay.path_end(p1)); - assert(overlay.get_step_at_position(p1, 1000) == overlay.path_end(p1)); - - path_handle_t p2 = overlay.create_path_handle("p2"); - - assert(overlay.get_path_length(p2) == 0); - - step_handle_t s6 = overlay.prepend_step(p2, h3); - - assert(overlay.get_path_length(p2) == 1); - - assert(overlay.get_position_of_step(s6) == 0); - - assert(overlay.get_step_at_position(p2, 0) == s6); - assert(overlay.get_step_at_position(p2, 1) == overlay.path_end(p2)); - assert(overlay.get_step_at_position(p2, 2) == overlay.path_end(p2)); - assert(overlay.get_step_at_position(p2, 1000) == overlay.path_end(p2)); - - step_handle_t s7 = overlay.prepend_step(p2, h1); - - assert(overlay.get_path_length(p2) == 4); - - assert(overlay.get_position_of_step(s7) == 0); - assert(overlay.get_position_of_step(s6) == 3); - - assert(overlay.get_step_at_position(p2, 0) == s7); - assert(overlay.get_step_at_position(p2, 1) == s7); - assert(overlay.get_step_at_position(p2, 2) == s7); - assert(overlay.get_step_at_position(p2, 3) == s6); - assert(overlay.get_step_at_position(p2, 4) == overlay.path_end(p2)); - assert(overlay.get_step_at_position(p2, 5) == overlay.path_end(p2)); - assert(overlay.get_step_at_position(p2, 1000) == overlay.path_end(p2)); - - handle_t h2_flip = overlay.apply_orientation(overlay.flip(h2)); - assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 3)) == overlay.flip(h2_flip)); - - vector offs_1{1}; - auto parts_1 = overlay.divide_handle(overlay.flip(h1), offs_1); - assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 0)) == overlay.flip(parts_1[1])); - assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 1)) == overlay.flip(parts_1[1])); - assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 2)) == overlay.flip(parts_1[0])); - assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 3)) == overlay.flip(h2_flip)); - assert(overlay.get_handle_of_step(overlay.get_step_at_position(p2, 0)) == overlay.flip(parts_1[1])); - assert(overlay.get_handle_of_step(overlay.get_step_at_position(p2, 1)) == overlay.flip(parts_1[1])); - assert(overlay.get_handle_of_step(overlay.get_step_at_position(p2, 2)) == overlay.flip(parts_1[0])); - assert(overlay.get_handle_of_step(overlay.get_step_at_position(p2, 3)) == h3); - - - vector offs_2{1, 3}; - auto parts_2 = overlay.divide_handle(h5, offs_2); - assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 9)) == parts_2[0]); - assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 10)) == parts_2[1]); - assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 11)) == parts_2[1]); - assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 12)) == parts_2[2]); - assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 13)) == parts_2[0]); - assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 14)) == parts_2[1]); - assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 15)) == parts_2[1]); - assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 16)) == parts_2[2]); - assert(overlay.get_step_at_position(p1, 17) == overlay.path_end(p1)); - assert(overlay.get_step_at_position(p1, 18) == overlay.path_end(p1)); - assert(overlay.get_step_at_position(p1, 1000) == overlay.path_end(p1)); + }); + assert(found1); + assert(found2); + found1 = false; + found2 = false; + + overlay.for_each_step_on_handle(h2, [&](const step_handle_t &s) { + if (s == s2) { + found1 = true; + } else { + assert(false); } + }); + assert(found1); + found1 = false; + + overlay.for_each_step_on_handle(h3, [&](const step_handle_t &s) { + if (s == s2_2) { + found1 = true; + } else { + assert(false); + } + }); + assert(found1); + found1 = false; + + overlay.for_each_step_on_handle(h4, [&](const step_handle_t &s) { + if (s == s3) { + found1 = true; + } else if (s == s2_1) { + found2 = true; + } else { + assert(false); + } + }); + assert(found1); + assert(found2); + found1 = false; + found2 = false; } - cerr << "PathPositionOverlay tests successful!" << endl; -} -void test_packed_reference_path_overlay() { - - vector implementations; + { - HashGraph hg; - implementations.push_back(&hg); + // Make sure we can handle a lot of paths + for (size_t i = 0; i < 100; i++) { + path_handle_t pn = graph.create_path_handle("pn" + std::to_string(i)); + graph.append_step(pn, h1); + graph.append_step(pn, h2); + graph.append_step(pn, h4); + } - PackedGraph pg; - implementations.push_back(&pg); - - MappedPackedGraph mpg; - implementations.push_back(&mpg); - - for (MutablePathDeletableHandleGraph* implementation : implementations) { - - MutablePathDeletableHandleGraph& graph = *implementation; - - handle_t h1 = graph.create_handle("AAA"); - handle_t h2 = graph.create_handle("A"); - handle_t h3 = graph.create_handle("T"); - handle_t h4 = graph.create_handle("AAAAA"); - - graph.create_edge(h1, h2); - graph.create_edge(h1, h3); - graph.create_edge(h2, h4); - graph.create_edge(h3, h4); - - path_handle_t p1 = graph.create_path_handle("p1"); - step_handle_t s1 = graph.append_step(p1, h1); - step_handle_t s2 = graph.append_step(p1, h2); - step_handle_t s3 = graph.append_step(p1, h4); - - path_handle_t p2 = graph.create_path_handle("p2"); - step_handle_t s2_1 = graph.append_step(p2, graph.flip(h4)); - step_handle_t s2_2 = graph.append_step(p2, graph.flip(h3)); - step_handle_t s2_3 = graph.append_step(p2, graph.flip(h1)); - - { - - PackedReferencePathOverlay overlay(&graph); - - assert(overlay.get_path_length(p1) == 9); - - assert(overlay.get_position_of_step(s1) == 0); - assert(overlay.get_position_of_step(s2) == 3); - assert(overlay.get_position_of_step(s3) == 4); - - assert(overlay.get_step_at_position(p1, 0) == s1); - assert(overlay.get_step_at_position(p1, 1) == s1); - assert(overlay.get_step_at_position(p1, 2) == s1); - assert(overlay.get_step_at_position(p1, 3) == s2); - assert(overlay.get_step_at_position(p1, 4) == s3); - assert(overlay.get_step_at_position(p1, 5) == s3); - assert(overlay.get_step_at_position(p1, 6) == s3); - assert(overlay.get_step_at_position(p1, 7) == s3); - assert(overlay.get_step_at_position(p1, 8) == s3); - assert(overlay.get_step_at_position(p1, 9) == overlay.path_end(p1)); - assert(overlay.get_step_at_position(p1, 10) == overlay.path_end(p1)); - assert(overlay.get_step_at_position(p1, 1000) == overlay.path_end(p1)); - - bool found1 = false; - bool found2 = false; - overlay.for_each_step_on_handle(h1, [&](const step_handle_t& s) { - if (s == s1) { - found1 = true; - } else if (s == s2_3) { - found2 = true; - } else { - assert(false); - } - }); - assert(found1); - assert(found2); - found1 = false; - found2 = false; - - overlay.for_each_step_on_handle(h2, [&](const step_handle_t& s) { - if (s == s2) { - found1 = true; - } else { - assert(false); - } - }); - assert(found1); - found1 = false; - - overlay.for_each_step_on_handle(h3, [&](const step_handle_t& s) { - if (s == s2_2) { - found1 = true; - } else { - assert(false); - } - }); - assert(found1); - found1 = false; - - overlay.for_each_step_on_handle(h4, [&](const step_handle_t& s) { - if (s == s3) { - found1 = true; - } else if (s == s2_1) { - found2 = true; - } else { - assert(false); - } - }); - assert(found1); - assert(found2); - found1 = false; - found2 = false; - } - - { - - // Make sure we can handle a lot of paths - for (size_t i = 0; i < 100; i++) { - path_handle_t pn = graph.create_path_handle("pn" + std::to_string(i)); - graph.append_step(pn, h1); - graph.append_step(pn, h2); - graph.append_step(pn, h4); - } - - // Split the paths up agross many indexes for testing - PackedReferencePathOverlay overlay(&graph, {}, 10); - - std::unordered_set seen_paths; - overlay.for_each_step_on_handle(h1, [&](const step_handle_t& s) { - seen_paths.insert(overlay.get_path_name(overlay.get_path_handle_of_step(s))); - }); - // Should have the 2 original paths and the 100 new ones. - assert(seen_paths.size() == 102); - } + // Split the paths up agross many indexes for testing + PackedReferencePathOverlay overlay(&graph, {}, 10); + + std::unordered_set seen_paths; + overlay.for_each_step_on_handle(h1, [&](const step_handle_t &s) { + seen_paths.insert( + overlay.get_path_name(overlay.get_path_handle_of_step(s))); + }); + // Should have the 2 original paths and the 100 new ones. + assert(seen_paths.size() == 102); } - cerr << "PackedReferencePathOverlay tests successful!" << endl; + } + cerr << "PackedReferencePathOverlay tests successful!" << endl; } void test_reference_path_overlay() { - - vector implementations; - - HashGraph hg; - implementations.push_back(&hg); - - PackedGraph pg; - implementations.push_back(&pg); - - MappedPackedGraph mpg; - implementations.push_back(&mpg); - - for (MutablePathDeletableHandleGraph* implementation : implementations) { - - MutablePathDeletableHandleGraph& graph = *implementation; - - auto h1 = graph.create_handle("AAAA"); - auto h2 = graph.create_handle("AA"); - auto h3 = graph.create_handle("A"); - auto h4 = graph.create_handle("AAAAAA"); - - graph.create_edge(h1, h2); - graph.create_edge(h1, h3); - graph.create_edge(h2, h4); - graph.create_edge(h3, h4); - - auto p = graph.create_path_handle("p"); - auto s1 = graph.append_step(p, h1); - auto s2 = graph.append_step(p, h2); - auto s3 = graph.append_step(p, h4); - - { - ReferencePathOverlay ref_overlay(&graph); - - auto os1 = ref_overlay.path_begin(p); - auto os2 = ref_overlay.get_next_step(os1); - auto os3 = ref_overlay.get_next_step(os2); - - assert(ref_overlay.get_next_step(os3) == ref_overlay.path_end(p)); - assert(ref_overlay.get_previous_step(os1) == ref_overlay.path_front_end(p)); - - assert(ref_overlay.has_next_step(os1)); - assert(ref_overlay.has_next_step(os2)); - assert(!ref_overlay.has_next_step(os3)); - - assert(!ref_overlay.has_previous_step(os1)); - assert(ref_overlay.has_previous_step(os2)); - assert(ref_overlay.has_previous_step(os3)); - - assert(ref_overlay.get_next_step(os1) == os2); - assert(ref_overlay.get_next_step(os2) == os3); - assert(ref_overlay.get_next_step(os3) == ref_overlay.path_end(p)); - assert(ref_overlay.get_previous_step(os1) == ref_overlay.path_front_end(p)); - assert(ref_overlay.get_previous_step(os2) == os1); - assert(ref_overlay.get_previous_step(os3) == os2); - - assert(ref_overlay.get_step_count(p) == 3); - - assert(ref_overlay.get_path_length(p) == 12); - - assert(ref_overlay.get_position_of_step(os1) == 0); - assert(ref_overlay.get_position_of_step(os2) == 4); - assert(ref_overlay.get_position_of_step(os3) == 6); - - for (size_t i = 0; i < 25; ++i) { - if (i < 4) { - assert(ref_overlay.get_step_at_position(p, i) == os1); - } - else if (i < 6) { - assert(ref_overlay.get_step_at_position(p, i) == os2); - } - else if (i < 12) { - assert(ref_overlay.get_step_at_position(p, i) == os3); - } - else { - assert(ref_overlay.get_step_at_position(p, i) == ref_overlay.path_end(p)); - } - } - - int count = 0; - ref_overlay.for_each_step_on_handle(h1, [&](const step_handle_t& s) { - assert(s == os1); - ++count; - }); - assert(count == 1); - count = 0; - ref_overlay.for_each_step_on_handle(h2, [&](const step_handle_t& s) { - assert(s == os2); - ++count; - }); - assert(count == 1); - count = 0; - ref_overlay.for_each_step_on_handle(h3, [&](const step_handle_t& s) { - ++count; - }); - assert(count == 0); - count = 0; - ref_overlay.for_each_step_on_handle(h4, [&](const step_handle_t& s) { - assert(s == os3); - ++count; - }); - assert(count == 1); - } - - random_device rd; - default_random_engine prng(12261988);//(rd()); - - uniform_int_distribution node_len_distr(1, 5); - - vector paths(1, p); - - paths.push_back(graph.create_path_handle(std::to_string(paths.size()))); - paths.push_back(graph.create_path_handle(std::to_string(paths.size()))); - - uniform_int_distribution path_distr(0, paths.size() - 1); - - std::vector handles; - - // add enough nodes to stress test the parallel code - for (size_t i = 0; i < 200000; ++i) { - auto p = paths[path_distr(prng)]; - string seq(node_len_distr(prng), 'A'); - auto h = graph.create_handle(seq); - handles.push_back(h); - if (graph.get_step_count(p) != 0) { - graph.create_edge(graph.get_handle_of_step(graph.path_back(p)), h); - } - graph.append_step(p, h); + + vector implementations; + + HashGraph hg; + implementations.push_back(&hg); + + PackedGraph pg; + implementations.push_back(&pg); + + MappedPackedGraph mpg; + implementations.push_back(&mpg); + + for (MutablePathDeletableHandleGraph *implementation : implementations) { + + MutablePathDeletableHandleGraph &graph = *implementation; + + auto h1 = graph.create_handle("AAAA"); + auto h2 = graph.create_handle("AA"); + auto h3 = graph.create_handle("A"); + auto h4 = graph.create_handle("AAAAAA"); + + graph.create_edge(h1, h2); + graph.create_edge(h1, h3); + graph.create_edge(h2, h4); + graph.create_edge(h3, h4); + + auto p = graph.create_path_handle("p"); + auto s1 = graph.append_step(p, h1); + auto s2 = graph.append_step(p, h2); + auto s3 = graph.append_step(p, h4); + + { + ReferencePathOverlay ref_overlay(&graph); + + auto os1 = ref_overlay.path_begin(p); + auto os2 = ref_overlay.get_next_step(os1); + auto os3 = ref_overlay.get_next_step(os2); + + assert(ref_overlay.get_next_step(os3) == ref_overlay.path_end(p)); + assert(ref_overlay.get_previous_step(os1) == + ref_overlay.path_front_end(p)); + + assert(ref_overlay.has_next_step(os1)); + assert(ref_overlay.has_next_step(os2)); + assert(!ref_overlay.has_next_step(os3)); + + assert(!ref_overlay.has_previous_step(os1)); + assert(ref_overlay.has_previous_step(os2)); + assert(ref_overlay.has_previous_step(os3)); + + assert(ref_overlay.get_next_step(os1) == os2); + assert(ref_overlay.get_next_step(os2) == os3); + assert(ref_overlay.get_next_step(os3) == ref_overlay.path_end(p)); + assert(ref_overlay.get_previous_step(os1) == + ref_overlay.path_front_end(p)); + assert(ref_overlay.get_previous_step(os2) == os1); + assert(ref_overlay.get_previous_step(os3) == os2); + + assert(ref_overlay.get_step_count(p) == 3); + + assert(ref_overlay.get_path_length(p) == 12); + + assert(ref_overlay.get_position_of_step(os1) == 0); + assert(ref_overlay.get_position_of_step(os2) == 4); + assert(ref_overlay.get_position_of_step(os3) == 6); + + for (size_t i = 0; i < 25; ++i) { + if (i < 4) { + assert(ref_overlay.get_step_at_position(p, i) == os1); + } else if (i < 6) { + assert(ref_overlay.get_step_at_position(p, i) == os2); + } else if (i < 12) { + assert(ref_overlay.get_step_at_position(p, i) == os3); + } else { + assert(ref_overlay.get_step_at_position(p, i) == + ref_overlay.path_end(p)); } - - uniform_int_distribution handle_distr(0, handles.size() - 1); - - // add enough path steps that some nodes will have >= 3 path coverage - for (size_t i = 0; i < 100000; ++i) { - auto p = paths[path_distr(prng)]; - auto h1 = graph.get_handle_of_step(graph.path_back(p)); - auto h2 = handles[handle_distr(prng)]; - graph.create_edge(h1, h2); - graph.append_step(p, h2); + } + + int count = 0; + ref_overlay.for_each_step_on_handle(h1, [&](const step_handle_t &s) { + assert(s == os1); + ++count; + }); + assert(count == 1); + count = 0; + ref_overlay.for_each_step_on_handle(h2, [&](const step_handle_t &s) { + assert(s == os2); + ++count; + }); + assert(count == 1); + count = 0; + ref_overlay.for_each_step_on_handle( + h3, [&](const step_handle_t &s) { ++count; }); + assert(count == 0); + count = 0; + ref_overlay.for_each_step_on_handle(h4, [&](const step_handle_t &s) { + assert(s == os3); + ++count; + }); + assert(count == 1); + } + + random_device rd; + default_random_engine prng(12261988); //(rd()); + + uniform_int_distribution node_len_distr(1, 5); + + vector paths(1, p); + + paths.push_back(graph.create_path_handle(std::to_string(paths.size()))); + paths.push_back(graph.create_path_handle(std::to_string(paths.size()))); + + uniform_int_distribution path_distr(0, paths.size() - 1); + + std::vector handles; + + // add enough nodes to stress test the parallel code + for (size_t i = 0; i < 200000; ++i) { + auto p = paths[path_distr(prng)]; + string seq(node_len_distr(prng), 'A'); + auto h = graph.create_handle(seq); + handles.push_back(h); + if (graph.get_step_count(p) != 0) { + graph.create_edge(graph.get_handle_of_step(graph.path_back(p)), h); + } + graph.append_step(p, h); + } + + uniform_int_distribution handle_distr(0, handles.size() - 1); + + // add enough path steps that some nodes will have >= 3 path coverage + for (size_t i = 0; i < 100000; ++i) { + auto p = paths[path_distr(prng)]; + auto h1 = graph.get_handle_of_step(graph.path_back(p)); + auto h2 = handles[handle_distr(prng)]; + graph.create_edge(h1, h2); + graph.append_step(p, h2); + } + + { + ReferencePathOverlay ref_overlay(&graph); + + assert(ref_overlay.get_path_count() == paths.size()); + + std::unordered_map> steps_on_handle; + + ref_overlay.for_each_path_handle([&](const path_handle_t &path) { + size_t walked_len = 0; + for (auto s = ref_overlay.path_begin(path), + end = ref_overlay.path_end(path); + s != end; s = ref_overlay.get_next_step(s)) { + assert(ref_overlay.get_path_handle_of_step(s) == path); + assert(ref_overlay.get_position_of_step(s) == walked_len); + auto h = ref_overlay.get_handle_of_step(s); + size_t len = ref_overlay.get_length(h); + for (size_t i = 0; i < len; ++i) { + auto s2 = ref_overlay.get_step_at_position(path, walked_len + i); + assert(s2 == s); + } + steps_on_handle[h].push_back(s); + walked_len += len; } - - { - ReferencePathOverlay ref_overlay(&graph); - - assert(ref_overlay.get_path_count() == paths.size()); - - std::unordered_map> steps_on_handle; - - ref_overlay.for_each_path_handle([&](const path_handle_t& path) { - size_t walked_len = 0; - for (auto s = ref_overlay.path_begin(path), end = ref_overlay.path_end(path); s != end; s = ref_overlay.get_next_step(s)) { - assert(ref_overlay.get_path_handle_of_step(s) == path); - assert(ref_overlay.get_position_of_step(s) == walked_len); - auto h = ref_overlay.get_handle_of_step(s); - size_t len = ref_overlay.get_length(h); - for (size_t i = 0; i < len; ++i) { - auto s2 = ref_overlay.get_step_at_position(path, walked_len + i); - assert(s2 == s); - } - steps_on_handle[h].push_back(s); - walked_len += len; - } - assert(ref_overlay.get_path_length(path) == walked_len); - }); - - ref_overlay.for_each_handle([&](const handle_t& handle) { -// std::cerr << "check handles on " << ref_overlay.get_id(handle) << '\n'; - auto& direct = steps_on_handle[handle]; - std::sort(direct.begin(), direct.end()); - vector indexed; - ref_overlay.for_each_step_on_handle(handle, [&](const step_handle_t& step) { - indexed.push_back(step); - }); - std::sort(indexed.begin(), indexed.end()); - if (direct != indexed) { - std::cerr << "error on node " << ref_overlay.get_id(handle) << '\n'; - std::cerr << "direct\n"; - for (auto s : direct) { - std::cerr << '\t' << handlegraph::as_integers(s)[0] << '\t' << handlegraph::as_integers(s)[1] << '\t' << handlegraph::as_integer(ref_overlay.get_path_handle_of_step(s)) << '\t' << ref_overlay.get_id(ref_overlay.get_handle_of_step(s)) << '\n'; - } - std::cerr << "indexed\n"; - for (auto s : indexed) { - std::cerr << '\t' << handlegraph::as_integers(s)[0] << '\t' << handlegraph::as_integers(s)[1] << '\t' << handlegraph::as_integer(ref_overlay.get_path_handle_of_step(s)) << '\t' << ref_overlay.get_id(ref_overlay.get_handle_of_step(s)) << '\n'; - } - } - assert(direct == indexed); - }); + assert(ref_overlay.get_path_length(path) == walked_len); + }); + + ref_overlay.for_each_handle([&](const handle_t &handle) { + // std::cerr << "check handles on " << + // ref_overlay.get_id(handle) << '\n'; + auto &direct = steps_on_handle[handle]; + std::sort(direct.begin(), direct.end()); + vector indexed; + ref_overlay.for_each_step_on_handle( + handle, + [&](const step_handle_t &step) { indexed.push_back(step); }); + std::sort(indexed.begin(), indexed.end()); + if (direct != indexed) { + std::cerr << "error on node " << ref_overlay.get_id(handle) << '\n'; + std::cerr << "direct\n"; + for (auto s : direct) { + std::cerr << '\t' << handlegraph::as_integers(s)[0] << '\t' + << handlegraph::as_integers(s)[1] << '\t' + << handlegraph::as_integer( + ref_overlay.get_path_handle_of_step(s)) + << '\t' + << ref_overlay.get_id(ref_overlay.get_handle_of_step(s)) + << '\n'; + } + std::cerr << "indexed\n"; + for (auto s : indexed) { + std::cerr << '\t' << handlegraph::as_integers(s)[0] << '\t' + << handlegraph::as_integers(s)[1] << '\t' + << handlegraph::as_integer( + ref_overlay.get_path_handle_of_step(s)) + << '\t' + << ref_overlay.get_id(ref_overlay.get_handle_of_step(s)) + << '\n'; + } } + assert(direct == indexed); + }); } - - cerr << "ReferencePathOverlay tests successful!" << endl; + } + + cerr << "ReferencePathOverlay tests successful!" << endl; } void test_vectorizable_overlays() { - - vector implementations; - HashGraph hg; - implementations.push_back(&hg); + vector implementations; - PackedGraph pg; - implementations.push_back(&pg); - - MappedPackedGraph mpg; - implementations.push_back(&mpg); - - for (MutablePathDeletableHandleGraph* implementation : implementations) { - - MutablePathDeletableHandleGraph& graph = *implementation; - - handle_t h1 = graph.create_handle("AAA"); - handle_t h2 = graph.create_handle("A"); - handle_t h3 = graph.create_handle("T"); - handle_t h4 = graph.create_handle("AAAAA"); - - graph.create_edge(h1, h2); - graph.create_edge(h1, h3); - graph.create_edge(h2, h4); - graph.create_edge(h3, h4); - - path_handle_t p1 = graph.create_path_handle("p1"); - step_handle_t s1 = graph.append_step(p1, h1); - step_handle_t s2 = graph.append_step(p1, h2); - step_handle_t s3 = graph.append_step(p1, h4); - - - bdsg::VectorizableOverlay overlay(&graph); - - set edge_ranks; - size_t edge_count = 0; - graph.for_each_edge([&](edge_t edge) { - edge_ranks.insert(overlay.edge_index(edge)); - ++edge_count; - }); + HashGraph hg; + implementations.push_back(&hg); - // every edge gets a unique rank - assert(edge_ranks.size() == edge_count); + PackedGraph pg; + implementations.push_back(&pg); - size_t node_count = 0; - map pos_to_node; - graph.for_each_handle([&](handle_t handle) { - pos_to_node[overlay.node_vector_offset(graph.get_id(handle))] = graph.get_id(handle); - ++node_count; - }); + MappedPackedGraph mpg; + implementations.push_back(&mpg); - // every node gets a unique rank - assert(pos_to_node.size() == node_count); + for (MutablePathDeletableHandleGraph *implementation : implementations) { - auto pni = pos_to_node.begin(); - auto next = pni; - for (++next; next != pos_to_node.end(); ++pni, ++next) { - assert(next->first - pni->first == graph.get_length(graph.get_handle(pni->second))); - } + MutablePathDeletableHandleGraph &graph = *implementation; + + handle_t h1 = graph.create_handle("AAA"); + handle_t h2 = graph.create_handle("A"); + handle_t h3 = graph.create_handle("T"); + handle_t h4 = graph.create_handle("AAAAA"); + + graph.create_edge(h1, h2); + graph.create_edge(h1, h3); + graph.create_edge(h2, h4); + graph.create_edge(h3, h4); + + path_handle_t p1 = graph.create_path_handle("p1"); + step_handle_t s1 = graph.append_step(p1, h1); + step_handle_t s2 = graph.append_step(p1, h2); + step_handle_t s3 = graph.append_step(p1, h4); + + bdsg::VectorizableOverlay overlay(&graph); - // check that node_at_vector_offset works - graph.for_each_handle([&](handle_t handle) { - size_t pos = overlay.node_vector_offset(graph.get_id(handle)); - for (size_t i = 0; i < graph.get_length(handle); ++i) { - assert(overlay.node_at_vector_offset(pos + i + 1) == graph.get_id(handle)); - } - }); + set edge_ranks; + size_t edge_count = 0; + graph.for_each_edge([&](edge_t edge) { + edge_ranks.insert(overlay.edge_index(edge)); + ++edge_count; + }); + + // every edge gets a unique rank + assert(edge_ranks.size() == edge_count); + + size_t node_count = 0; + map pos_to_node; + graph.for_each_handle([&](handle_t handle) { + pos_to_node[overlay.node_vector_offset(graph.get_id(handle))] = + graph.get_id(handle); + ++node_count; + }); + + // every node gets a unique rank + assert(pos_to_node.size() == node_count); + + auto pni = pos_to_node.begin(); + auto next = pni; + for (++next; next != pos_to_node.end(); ++pni, ++next) { + assert(next->first - pni->first == + graph.get_length(graph.get_handle(pni->second))); } - cerr << "VectorizableOverlay tests successful!" << endl; + + // check that node_at_vector_offset works + graph.for_each_handle([&](handle_t handle) { + size_t pos = overlay.node_vector_offset(graph.get_id(handle)); + for (size_t i = 0; i < graph.get_length(handle); ++i) { + assert(overlay.node_at_vector_offset(pos + i + 1) == + graph.get_id(handle)); + } + }); + } + cerr << "VectorizableOverlay tests successful!" << endl; } void test_packed_subgraph_overlay() { - - vector implementations; - HashGraph hg; - implementations.push_back(&hg); + vector implementations; - PackedGraph pg; - implementations.push_back(&pg); - - MappedPackedGraph mpg; - implementations.push_back(&mpg); - - for (MutablePathDeletableHandleGraph* implementation : implementations) { - - MutablePathDeletableHandleGraph& graph = *implementation; - - handle_t h1 = graph.create_handle("AAA"); - handle_t h2 = graph.create_handle("A"); - handle_t h3 = graph.create_handle("T"); - handle_t h4 = graph.create_handle("AAAAA"); - - graph.create_edge(h1, h2); - graph.create_edge(h1, h3); - graph.create_edge(h2, h4); - graph.create_edge(h3, h4); - - PackedSubgraphOverlay subgraph(&graph); - assert(subgraph.get_node_count() == 0); - subgraph.for_each_handle([&](const handle_t& h) { - assert(false); - }); - assert(!subgraph.has_node(graph.get_id(h1))); - assert(!subgraph.has_node(graph.get_id(h2))); - assert(!subgraph.has_node(graph.get_id(h3))); - assert(!subgraph.has_node(graph.get_id(h4))); - - subgraph.add_node(h1); - - assert(subgraph.get_node_count() == 1); - bool found1 = false; - subgraph.for_each_handle([&](const handle_t& h) { - if (subgraph.get_id(h) == graph.get_id(h1)) { - found1 = true; - assert(graph.get_sequence(h) == graph.get_sequence(h1)); - } - else { - assert(false); - } - }); - assert(found1); - found1 = false; - - assert(subgraph.has_node(graph.get_id(h1))); - assert(!subgraph.has_node(graph.get_id(h2))); - assert(!subgraph.has_node(graph.get_id(h3))); - assert(!subgraph.has_node(graph.get_id(h4))); - - subgraph.follow_edges(h1, true, [&](const handle_t& h) { - assert(false); - }); - subgraph.follow_edges(h1, false, [&](const handle_t& h) { - assert(false); - }); - - assert(subgraph.get_degree(h1, true) == 0); - assert(subgraph.get_degree(h1, false) == 0); - - subgraph.add_node(h4); - - assert(subgraph.get_node_count() == 2); - bool found2 = false; - subgraph.for_each_handle([&](const handle_t& h) { - if (subgraph.get_id(h) == graph.get_id(h1)) { - found1 = true; - assert(graph.get_sequence(h) == graph.get_sequence(h1)); - } - else if (subgraph.get_id(h) == graph.get_id(h4)) { - found2 = true; - assert(graph.get_sequence(h) == graph.get_sequence(h4)); - } - else { - assert(false); - } - }); - assert(found1); - assert(found2); - found1 = false; - found2 = false; - - assert(subgraph.has_node(graph.get_id(h1))); - assert(!subgraph.has_node(graph.get_id(h2))); - assert(!subgraph.has_node(graph.get_id(h3))); - assert(subgraph.has_node(graph.get_id(h4))); - - subgraph.follow_edges(h1, true, [&](const handle_t& h) { - assert(false); - }); - subgraph.follow_edges(h1, false, [&](const handle_t& h) { - assert(false); - }); - subgraph.follow_edges(h4, true, [&](const handle_t& h) { - assert(false); - }); - subgraph.follow_edges(h4, false, [&](const handle_t& h) { - assert(false); - }); - - - assert(subgraph.get_degree(h1, true) == 0); - assert(subgraph.get_degree(h1, false) == 0); - assert(subgraph.get_degree(h4, true) == 0); - assert(subgraph.get_degree(h4, false) == 0); - - subgraph.add_node(graph.flip(h2)); - - assert(subgraph.get_node_count() == 3); - bool found3 = false; - subgraph.for_each_handle([&](const handle_t& h) { - if (subgraph.get_id(h) == graph.get_id(h1)) { - found1 = true; - assert(graph.get_sequence(h) == graph.get_sequence(h1)); - } - else if (subgraph.get_id(h) == graph.get_id(h2)) { - found2 = true; - assert(graph.get_sequence(h) == graph.get_sequence(h2)); - } - else if (subgraph.get_id(h) == graph.get_id(h4)) { - found3 = true; - assert(graph.get_sequence(h) == graph.get_sequence(h4)); - } - else { - assert(false); - } - }); - assert(found1); - assert(found2); - assert(found3); - found1 = false; - found2 = false; - found3 = false; - - assert(subgraph.has_node(graph.get_id(h1))); - assert(subgraph.has_node(graph.get_id(h2))); - assert(!subgraph.has_node(graph.get_id(h3))); - assert(subgraph.has_node(graph.get_id(h4))); - - bool found4 = false; - subgraph.follow_edges(h1, true, [&](const handle_t& h) { - assert(false); - }); - subgraph.follow_edges(h1, false, [&](const handle_t& h) { - if (subgraph.get_id(h) == graph.get_id(h2) && !graph.get_is_reverse(h)) { - found1 = true; - } - else { - assert(false); - } - }); - subgraph.follow_edges(h2, true, [&](const handle_t& h) { - if (subgraph.get_id(h) == graph.get_id(h1) && !graph.get_is_reverse(h)) { - found2 = true; - } - else { - assert(false); - } - }); - subgraph.follow_edges(h2, false, [&](const handle_t& h) { - if (subgraph.get_id(h) == graph.get_id(h4) && !graph.get_is_reverse(h)) { - found3 = true; - } - else { - assert(false); - } - }); - subgraph.follow_edges(h4, true, [&](const handle_t& h) { - if (subgraph.get_id(h) == graph.get_id(h2) && !graph.get_is_reverse(h)) { - found4 = true; - } - else { - assert(false); - } - }); - subgraph.follow_edges(h4, false, [&](const handle_t& h) { - assert(false); - }); - - assert(subgraph.get_degree(h1, true) == 0); - assert(subgraph.get_degree(h1, false) == 1); - assert(subgraph.get_degree(h2, true) == 1); - assert(subgraph.get_degree(h2, false) == 1); - assert(subgraph.get_degree(h4, true) == 1); - assert(subgraph.get_degree(h4, false) == 0); - - assert(found1); - assert(found2); - assert(found3); - assert(found4); - found1 = false; - found2 = false; - found3 = false; - found4 = false; - - subgraph.remove_node(h1); - - assert(subgraph.get_node_count() == 2); - subgraph.for_each_handle([&](const handle_t& h) { - if (subgraph.get_id(h) == graph.get_id(h2)) { - found1 = true; - assert(graph.get_sequence(h) == graph.get_sequence(h2)); - } - else if (subgraph.get_id(h) == graph.get_id(h4)) { - found2 = true; - assert(graph.get_sequence(h) == graph.get_sequence(h4)); - } - else { - assert(false); - } - }); - assert(found1); - assert(found2); - found1 = false; - found2 = false; - - assert(!subgraph.has_node(graph.get_id(h1))); - assert(subgraph.has_node(graph.get_id(h2))); - assert(!subgraph.has_node(graph.get_id(h3))); - assert(subgraph.has_node(graph.get_id(h4))); - - subgraph.follow_edges(h2, true, [&](const handle_t& h) { - assert(false); - }); - subgraph.follow_edges(h2, false, [&](const handle_t& h) { - if (subgraph.get_id(h) == graph.get_id(h4) && !graph.get_is_reverse(h)) { - found1 = true; - } - else { - assert(false); - } - }); - subgraph.follow_edges(h4, true, [&](const handle_t& h) { - if (subgraph.get_id(h) == graph.get_id(h2) && !graph.get_is_reverse(h)) { - found2 = true; - } - else { - assert(false); - } - }); - subgraph.follow_edges(h4, false, [&](const handle_t& h) { - assert(false); - }); - - assert(subgraph.get_degree(h2, true) == 0); - assert(subgraph.get_degree(h2, false) == 1); - assert(subgraph.get_degree(h4, true) == 1); - assert(subgraph.get_degree(h4, false) == 0); - - assert(found1); - assert(found2); - found1 = false; - found2 = false; - } - - cerr << "PackedSubgraphOverlay tests successful!" << endl; + HashGraph hg; + implementations.push_back(&hg); + + PackedGraph pg; + implementations.push_back(&pg); + + MappedPackedGraph mpg; + implementations.push_back(&mpg); + + for (MutablePathDeletableHandleGraph *implementation : implementations) { + + MutablePathDeletableHandleGraph &graph = *implementation; + + handle_t h1 = graph.create_handle("AAA"); + handle_t h2 = graph.create_handle("A"); + handle_t h3 = graph.create_handle("T"); + handle_t h4 = graph.create_handle("AAAAA"); + + graph.create_edge(h1, h2); + graph.create_edge(h1, h3); + graph.create_edge(h2, h4); + graph.create_edge(h3, h4); + + PackedSubgraphOverlay subgraph(&graph); + assert(subgraph.get_node_count() == 0); + subgraph.for_each_handle([&](const handle_t &h) { assert(false); }); + assert(!subgraph.has_node(graph.get_id(h1))); + assert(!subgraph.has_node(graph.get_id(h2))); + assert(!subgraph.has_node(graph.get_id(h3))); + assert(!subgraph.has_node(graph.get_id(h4))); + + subgraph.add_node(h1); + + assert(subgraph.get_node_count() == 1); + bool found1 = false; + subgraph.for_each_handle([&](const handle_t &h) { + if (subgraph.get_id(h) == graph.get_id(h1)) { + found1 = true; + assert(graph.get_sequence(h) == graph.get_sequence(h1)); + } else { + assert(false); + } + }); + assert(found1); + found1 = false; + + assert(subgraph.has_node(graph.get_id(h1))); + assert(!subgraph.has_node(graph.get_id(h2))); + assert(!subgraph.has_node(graph.get_id(h3))); + assert(!subgraph.has_node(graph.get_id(h4))); + + subgraph.follow_edges(h1, true, [&](const handle_t &h) { assert(false); }); + subgraph.follow_edges(h1, false, [&](const handle_t &h) { assert(false); }); + + assert(subgraph.get_degree(h1, true) == 0); + assert(subgraph.get_degree(h1, false) == 0); + + subgraph.add_node(h4); + + assert(subgraph.get_node_count() == 2); + bool found2 = false; + subgraph.for_each_handle([&](const handle_t &h) { + if (subgraph.get_id(h) == graph.get_id(h1)) { + found1 = true; + assert(graph.get_sequence(h) == graph.get_sequence(h1)); + } else if (subgraph.get_id(h) == graph.get_id(h4)) { + found2 = true; + assert(graph.get_sequence(h) == graph.get_sequence(h4)); + } else { + assert(false); + } + }); + assert(found1); + assert(found2); + found1 = false; + found2 = false; + + assert(subgraph.has_node(graph.get_id(h1))); + assert(!subgraph.has_node(graph.get_id(h2))); + assert(!subgraph.has_node(graph.get_id(h3))); + assert(subgraph.has_node(graph.get_id(h4))); + + subgraph.follow_edges(h1, true, [&](const handle_t &h) { assert(false); }); + subgraph.follow_edges(h1, false, [&](const handle_t &h) { assert(false); }); + subgraph.follow_edges(h4, true, [&](const handle_t &h) { assert(false); }); + subgraph.follow_edges(h4, false, [&](const handle_t &h) { assert(false); }); + + assert(subgraph.get_degree(h1, true) == 0); + assert(subgraph.get_degree(h1, false) == 0); + assert(subgraph.get_degree(h4, true) == 0); + assert(subgraph.get_degree(h4, false) == 0); + + subgraph.add_node(graph.flip(h2)); + + assert(subgraph.get_node_count() == 3); + bool found3 = false; + subgraph.for_each_handle([&](const handle_t &h) { + if (subgraph.get_id(h) == graph.get_id(h1)) { + found1 = true; + assert(graph.get_sequence(h) == graph.get_sequence(h1)); + } else if (subgraph.get_id(h) == graph.get_id(h2)) { + found2 = true; + assert(graph.get_sequence(h) == graph.get_sequence(h2)); + } else if (subgraph.get_id(h) == graph.get_id(h4)) { + found3 = true; + assert(graph.get_sequence(h) == graph.get_sequence(h4)); + } else { + assert(false); + } + }); + assert(found1); + assert(found2); + assert(found3); + found1 = false; + found2 = false; + found3 = false; + + assert(subgraph.has_node(graph.get_id(h1))); + assert(subgraph.has_node(graph.get_id(h2))); + assert(!subgraph.has_node(graph.get_id(h3))); + assert(subgraph.has_node(graph.get_id(h4))); + + bool found4 = false; + subgraph.follow_edges(h1, true, [&](const handle_t &h) { assert(false); }); + subgraph.follow_edges(h1, false, [&](const handle_t &h) { + if (subgraph.get_id(h) == graph.get_id(h2) && !graph.get_is_reverse(h)) { + found1 = true; + } else { + assert(false); + } + }); + subgraph.follow_edges(h2, true, [&](const handle_t &h) { + if (subgraph.get_id(h) == graph.get_id(h1) && !graph.get_is_reverse(h)) { + found2 = true; + } else { + assert(false); + } + }); + subgraph.follow_edges(h2, false, [&](const handle_t &h) { + if (subgraph.get_id(h) == graph.get_id(h4) && !graph.get_is_reverse(h)) { + found3 = true; + } else { + assert(false); + } + }); + subgraph.follow_edges(h4, true, [&](const handle_t &h) { + if (subgraph.get_id(h) == graph.get_id(h2) && !graph.get_is_reverse(h)) { + found4 = true; + } else { + assert(false); + } + }); + subgraph.follow_edges(h4, false, [&](const handle_t &h) { assert(false); }); + + assert(subgraph.get_degree(h1, true) == 0); + assert(subgraph.get_degree(h1, false) == 1); + assert(subgraph.get_degree(h2, true) == 1); + assert(subgraph.get_degree(h2, false) == 1); + assert(subgraph.get_degree(h4, true) == 1); + assert(subgraph.get_degree(h4, false) == 0); + + assert(found1); + assert(found2); + assert(found3); + assert(found4); + found1 = false; + found2 = false; + found3 = false; + found4 = false; + + subgraph.remove_node(h1); + + assert(subgraph.get_node_count() == 2); + subgraph.for_each_handle([&](const handle_t &h) { + if (subgraph.get_id(h) == graph.get_id(h2)) { + found1 = true; + assert(graph.get_sequence(h) == graph.get_sequence(h2)); + } else if (subgraph.get_id(h) == graph.get_id(h4)) { + found2 = true; + assert(graph.get_sequence(h) == graph.get_sequence(h4)); + } else { + assert(false); + } + }); + assert(found1); + assert(found2); + found1 = false; + found2 = false; + + assert(!subgraph.has_node(graph.get_id(h1))); + assert(subgraph.has_node(graph.get_id(h2))); + assert(!subgraph.has_node(graph.get_id(h3))); + assert(subgraph.has_node(graph.get_id(h4))); + + subgraph.follow_edges(h2, true, [&](const handle_t &h) { assert(false); }); + subgraph.follow_edges(h2, false, [&](const handle_t &h) { + if (subgraph.get_id(h) == graph.get_id(h4) && !graph.get_is_reverse(h)) { + found1 = true; + } else { + assert(false); + } + }); + subgraph.follow_edges(h4, true, [&](const handle_t &h) { + if (subgraph.get_id(h) == graph.get_id(h2) && !graph.get_is_reverse(h)) { + found2 = true; + } else { + assert(false); + } + }); + subgraph.follow_edges(h4, false, [&](const handle_t &h) { assert(false); }); + + assert(subgraph.get_degree(h2, true) == 0); + assert(subgraph.get_degree(h2, false) == 1); + assert(subgraph.get_degree(h4, true) == 1); + assert(subgraph.get_degree(h4, false) == 0); + + assert(found1); + assert(found2); + found1 = false; + found2 = false; + } + + cerr << "PackedSubgraphOverlay tests successful!" << endl; } void test_mapped_packed_graph() { - auto check_graph = [](const MappedPackedGraph& mpg) { - // Dump it into this map - unordered_map graph_contents; - mpg.for_each_handle([&](const handle_t& h) { - graph_contents[mpg.get_id(h)] = mpg.get_sequence(h); - }); - - // Make sure it has the right things - assert(graph_contents.at(1) == "GATTACA"); - assert(graph_contents.at(2) == "CATTAG"); - }; + auto check_graph = [](const MappedPackedGraph &mpg) { + // Dump it into this map + unordered_map graph_contents; + mpg.for_each_handle([&](const handle_t &h) { + graph_contents[mpg.get_id(h)] = mpg.get_sequence(h); + }); - char filename[] = "tmpXXXXXX"; - int fd = mkstemp(filename); - assert(fd != -1); - { - // Make a graph - MappedPackedGraph mpg; - // Give it a node - mpg.create_handle("GATTACA", 1); - // Save it to an FD - mpg.serialize(fd); - // Make sure write-back works - mpg.create_handle("CATTAG", 2); - - // Make sure it looks right now - check_graph(mpg); - } - { - // Make a graph again - MappedPackedGraph mpg; - // Load it from the fd - mpg.deserialize(fd); - // Make sure it looks right - check_graph(mpg); - } - assert(close(fd) == 0); - { - // Make a graph again - MappedPackedGraph mpg; - // Load it from the file - mpg.deserialize(filename); - // Make sure it looks right - check_graph(mpg); - } - { - // Make a graph again - MappedPackedGraph mpg; - // Load it from a stream - std::ifstream stream(filename); - mpg.deserialize(stream); - // Make sure it looks right - check_graph(mpg); - } - unlink(filename); - - cerr << "MappedPackedGraph tests successful!" << endl; + // Make sure it has the right things + assert(graph_contents.at(1) == "GATTACA"); + assert(graph_contents.at(2) == "CATTAG"); + }; + + char filename[] = "tmpXXXXXX"; + int fd = mkstemp(filename); + assert(fd != -1); + { + // Make a graph + MappedPackedGraph mpg; + // Give it a node + mpg.create_handle("GATTACA", 1); + // Save it to an FD + mpg.serialize(fd); + // Make sure write-back works + mpg.create_handle("CATTAG", 2); + + // Make sure it looks right now + check_graph(mpg); + } + { + // Make a graph again + MappedPackedGraph mpg; + // Load it from the fd + mpg.deserialize(fd); + // Make sure it looks right + check_graph(mpg); + } + assert(close(fd) == 0); + { + // Make a graph again + MappedPackedGraph mpg; + // Load it from the file + mpg.deserialize(filename); + // Make sure it looks right + check_graph(mpg); + } + { + // Make a graph again + MappedPackedGraph mpg; + // Load it from a stream + std::ifstream stream(filename); + mpg.deserialize(stream); + // Make sure it looks right + check_graph(mpg); + } + unlink(filename); + + cerr << "MappedPackedGraph tests successful!" << endl; } void test_hash_graph() { - - // make sure the copy and moves work as expected - - HashGraph g; - - handle_t h1 = g.create_handle("A"); - handle_t h2 = g.create_handle("T"); - handle_t h3 = g.create_handle("G"); - - g.create_edge(h1, h2); - g.create_edge(h2, h3); - - path_handle_t p = g.create_path_handle("p"); - g.append_step(p, h1); - g.append_step(p, h2); - g.append_step(p, h3); - - HashGraph g_copy_1 = g; - HashGraph g_copy_2(g); - HashGraph g_copy_3(g); - HashGraph g_copy_4(g); - - HashGraph g_move_1 = std::move(g_copy_3); - HashGraph g_move_2(std::move(g_copy_4)); - - assert(handlegraph::algorithms::are_equivalent_with_paths(&g, &g_copy_1, true)); - assert(handlegraph::algorithms::are_equivalent_with_paths(&g, &g_copy_2, true)); - assert(handlegraph::algorithms::are_equivalent_with_paths(&g, &g_move_1, true)); - assert(handlegraph::algorithms::are_equivalent_with_paths(&g, &g_move_2, true)); - - // delete a handle on a path to trigger the occurrence index to be accessed - g_copy_1.destroy_handle(g_copy_1.get_handle(g.get_id(h2))); - g_copy_2.destroy_handle(g_copy_2.get_handle(g.get_id(h2))); - g_move_1.destroy_handle(g_move_1.get_handle(g.get_id(h2))); - g_move_2.destroy_handle(g_move_2.get_handle(g.get_id(h2))); - g.destroy_handle(h2); - - assert(handlegraph::algorithms::are_equivalent_with_paths(&g, &g_copy_1, true)); - assert(handlegraph::algorithms::are_equivalent_with_paths(&g, &g_copy_2, true)); - assert(handlegraph::algorithms::are_equivalent_with_paths(&g, &g_move_1, true)); - assert(handlegraph::algorithms::are_equivalent_with_paths(&g, &g_move_2, true)); - - cerr << "HashGraph tests successful!" << endl; + + // make sure the copy and moves work as expected + + HashGraph g; + + handle_t h1 = g.create_handle("A"); + handle_t h2 = g.create_handle("T"); + handle_t h3 = g.create_handle("G"); + + g.create_edge(h1, h2); + g.create_edge(h2, h3); + + path_handle_t p = g.create_path_handle("p"); + g.append_step(p, h1); + g.append_step(p, h2); + g.append_step(p, h3); + + HashGraph g_copy_1 = g; + HashGraph g_copy_2(g); + HashGraph g_copy_3(g); + HashGraph g_copy_4(g); + + HashGraph g_move_1 = std::move(g_copy_3); + HashGraph g_move_2(std::move(g_copy_4)); + + assert( + handlegraph::algorithms::are_equivalent_with_paths(&g, &g_copy_1, true)); + assert( + handlegraph::algorithms::are_equivalent_with_paths(&g, &g_copy_2, true)); + assert( + handlegraph::algorithms::are_equivalent_with_paths(&g, &g_move_1, true)); + assert( + handlegraph::algorithms::are_equivalent_with_paths(&g, &g_move_2, true)); + + // delete a handle on a path to trigger the occurrence index to be accessed + g_copy_1.destroy_handle(g_copy_1.get_handle(g.get_id(h2))); + g_copy_2.destroy_handle(g_copy_2.get_handle(g.get_id(h2))); + g_move_1.destroy_handle(g_move_1.get_handle(g.get_id(h2))); + g_move_2.destroy_handle(g_move_2.get_handle(g.get_id(h2))); + g.destroy_handle(h2); + + assert( + handlegraph::algorithms::are_equivalent_with_paths(&g, &g_copy_1, true)); + assert( + handlegraph::algorithms::are_equivalent_with_paths(&g, &g_copy_2, true)); + assert( + handlegraph::algorithms::are_equivalent_with_paths(&g, &g_move_1, true)); + assert( + handlegraph::algorithms::are_equivalent_with_paths(&g, &g_move_2, true)); + + cerr << "HashGraph tests successful!" << endl; } void test_hub_labeling() { // To make the tests easier to write we have a widget that does the full dance // to build a packed label vector. - auto get_packed_labels = [](const HashGraph& test_g) { - //test HashGraph -> Boost graph + auto get_packed_labels = [](const HashGraph &test_g) { + // test HashGraph -> Boost graph CHOverlay bg = make_boost_graph(test_g); auto [edges_start, edges_end] = boost::edges(bg); - std::for_each(edges_start, edges_end, [&] (auto e) { - cerr << source(e,bg) << " -> " << target(e,bg) << endl; + std::for_each(edges_start, edges_end, [&](auto e) { + cerr << source(e, bg) << " -> " << target(e, bg) << endl; }); make_contraction_hierarchy(bg); - //cerr << " - made contraction hierarchy" << endl; + // cerr << " - made contraction hierarchy" << endl; - vector> labels_fwd; labels_fwd.resize(num_vertices(bg)); - vector> labels_back; labels_back.resize(num_vertices(bg)); + vector> labels_fwd; + labels_fwd.resize(num_vertices(bg)); + vector> labels_back; + labels_back.resize(num_vertices(bg)); /* for (auto v: labels_fwd) { for (auto sz: v) { cerr << "(" << sz.hub << "," << sz.dist << ") "; } - cerr << " | "; + cerr << " | "; } - cerr << endl; + cerr << endl; cerr<<"back:" << endl; for (auto v: labels_back) { for (auto sz: v) { - cerr << "(" << sz.hub << "," << sz.dist << ") "; - } + cerr << "(" << sz.hub << "," << sz.dist << ") "; + } cerr << " | "; } cerr << endl; - cerr << "pack:" << endl; + cerr << "pack:" << endl; for (auto sz: packed_labels) { cerr << sz << " "; } @@ -5032,7 +5053,7 @@ void test_hub_labeling() { create_labels(labels_fwd, labels_back, bg); - //linearization + // linearization return pack_labels(labels_fwd, labels_back); }; @@ -5046,8 +5067,9 @@ void test_hub_labeling() { { // Simple stick graph of 3 nodes HashGraph test_g; - vector handles; handles.resize(3); - for (auto n: {0,1,2}) { + vector handles; + handles.resize(3); + for (auto n : {0, 1, 2}) { handles[n] = test_g.create_handle("A"); } test_g.create_edge(handles[0], handles[1]); @@ -5056,69 +5078,84 @@ void test_hub_labeling() { vector packed_labels = get_packed_labels(test_g); // 0th forward to 1st forward: no intervening bases - assert(hhl_query(packed_labels.begin(), rank(0, false), rank(1, false)) == 0); + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(1, false)) == + 0); // When asking about the same node twice, we look for self loops. // Here there aren't any. - assert(hhl_query(packed_labels.begin(), rank(0, false), rank(0, false)) == INF_INT); + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(0, false)) == + INF_INT); // 2nd reverse to 1st reverse: 1 intervening base assert(hhl_query(packed_labels.begin(), rank(2, true), rank(0, true)) == 1); // 0th reverse to 1st forward: no connection - assert(hhl_query(packed_labels.begin(), rank(0, true), rank(1, false)) == INF_INT); + assert(hhl_query(packed_labels.begin(), rank(0, true), rank(1, false)) == + INF_INT); - //TODO: check that error occurs when nodeside out of range is given + // TODO: check that error occurs when nodeside out of range is given } { // Graph with several nodes but only one edge HashGraph test_g; - vector handles; handles.resize(8); - for (auto n: {0,1,2,3,4,5,6,7}) { - handles[n] = test_g.create_handle(string(n+1, 'A')); + vector handles; + handles.resize(8); + for (auto n : {0, 1, 2, 3, 4, 5, 6, 7}) { + handles[n] = test_g.create_handle(string(n + 1, 'A')); } - vector> edges={{1,3}}; - for (auto e: edges) { - auto [s,t] = e; + vector> edges = {{1, 3}}; + for (auto e : edges) { + auto [s, t] = e; test_g.create_edge(handles[s], handles[t]); } vector packed_labels = get_packed_labels(test_g); // 1st forward to 3rd forward: the only edge there is - assert(hhl_query(packed_labels.begin(), rank(1, false), rank(3, false)) == 0); + assert(hhl_query(packed_labels.begin(), rank(1, false), rank(3, false)) == + 0); // nonexistent path - assert(hhl_query(packed_labels.begin(), rank(0, false), rank(7, false)) == INF_INT); + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(7, false)) == + INF_INT); } { // Graph with several nodes and several edges HashGraph test_g; - vector handles; handles.resize(8); - for (auto n: {0,1,2,3,4,5,6,7}) { - handles[n] = test_g.create_handle(string(n+1, 'A')); + vector handles; + handles.resize(8); + for (auto n : {0, 1, 2, 3, 4, 5, 6, 7}) { + handles[n] = test_g.create_handle(string(n + 1, 'A')); } - vector> edges={{0,1},{0,2},{1,0},{2,0},{1,3},{1,4},{4,1},{5,5}}; - for (auto e: edges) { - auto [s,t] = e; + vector> edges = {{0, 1}, {0, 2}, {1, 0}, {2, 0}, + {1, 3}, {1, 4}, {4, 1}, {5, 5}}; + for (auto e : edges) { + auto [s, t] = e; test_g.create_edge(handles[s], handles[t]); } vector packed_labels = get_packed_labels(test_g); // 1st forward to 3rd forward: direct connection - assert(hhl_query(packed_labels.begin(), rank(1, false), rank(3, false)) == 0); + assert(hhl_query(packed_labels.begin(), rank(1, false), rank(3, false)) == + 0); // 0th forward to 7th forward: nonexistent path - assert(hhl_query(packed_labels.begin(), rank(0, false), rank(7, false)) == INF_INT); + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(7, false)) == + INF_INT); // check node lengths are taken into account - // 0th forward to 3rd forward: should need to go through 1st which has length 2 - assert(hhl_query(packed_labels.begin(), rank(0, false), rank(3, false)) == 2); + // 0th forward to 3rd forward: should need to go through 1st which has + // length 2 + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(3, false)) == + 2); // check u -> v and v -> u are different - // 3rd forward to 1st forward: shouldn't connect because nothing is downstream of 3rd - assert(hhl_query(packed_labels.begin(), rank(3, false), rank(1, false)) == INF_INT); + // 3rd forward to 1st forward: shouldn't connect because nothing is + // downstream of 3rd + assert(hhl_query(packed_labels.begin(), rank(3, false), rank(1, false)) == + INF_INT); // 1st forward to 3rd forward: direct connection - assert(hhl_query(packed_labels.begin(), rank(1, false), rank(3, false)) == 0); + assert(hhl_query(packed_labels.begin(), rank(1, false), rank(3, false)) == + 0); // need to debug for (size_t a = 0; a < handles.size() * 2; a++) { @@ -5126,16 +5163,19 @@ void test_hub_labeling() { } // node to itself in the same direction (edge exists) - assert(hhl_query(packed_labels.begin(), rank(5, false), rank(5, false)) == 0); + assert(hhl_query(packed_labels.begin(), rank(5, false), rank(5, false)) == + 0); // node to itself in the same direction (edge doesn't exist) - assert(hhl_query(packed_labels.begin(), rank(3, false), rank(3, false)) == INF_INT); + assert(hhl_query(packed_labels.begin(), rank(3, false), rank(3, false)) == + INF_INT); } { // Test case: Cycle back to the same node with minimum distance > 0 // Creates a triangle: 0 -> 1 -> 2 -> 0 // Node lengths: 0=1, 1=2, 2=3 HashGraph test_g; - vector handles; handles.resize(3); + vector handles; + handles.resize(3); for (auto n : {0, 1, 2}) { handles[n] = test_g.create_handle(string(n + 1, 'A')); } @@ -5147,24 +5187,28 @@ void test_hub_labeling() { // Forward cycle: 0->1->2->0 // 0_fwd to 0_fwd via cycle: intermediate nodes 1 and 2, lengths 2+3=5 - assert(hhl_query(packed_labels.begin(), rank(0, false), rank(0, false)) == 5); + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(0, false)) == + 5); // 1_fwd to 1_fwd via cycle: intermediate nodes 2 and 0, lengths 3+1=4 - assert(hhl_query(packed_labels.begin(), rank(1, false), rank(1, false)) == 4); + assert(hhl_query(packed_labels.begin(), rank(1, false), rank(1, false)) == + 4); // 2_fwd to 2_fwd via cycle: intermediate nodes 0 and 1, lengths 1+2=3 - assert(hhl_query(packed_labels.begin(), rank(2, false), rank(2, false)) == 3); + assert(hhl_query(packed_labels.begin(), rank(2, false), rank(2, false)) == + 3); - // The same cycle is visible in reverse. + // The same cycle is visible in reverse. assert(hhl_query(packed_labels.begin(), rank(0, true), rank(0, true)) == 5); assert(hhl_query(packed_labels.begin(), rank(1, true), rank(1, true)) == 4); assert(hhl_query(packed_labels.begin(), rank(2, true), rank(2, true)) == 3); } { - // Test case: Forward and reverse orientations of different nodes reaching each other - // Node 0 (len 1) and Node 1 (len 2) - // Edges: 0_fwd -> 1_rev, 2_fwd -> 1_fwd - // This creates a "reversing" pattern where you enter one side and exit the other + // Test case: Forward and reverse orientations of different nodes reaching + // each other Node 0 (len 1) and Node 1 (len 2) Edges: 0_fwd -> 1_rev, 2_fwd + // -> 1_fwd This creates a "reversing" pattern where you enter one side and + // exit the other HashGraph test_g; - vector handles; handles.resize(3); + vector handles; + handles.resize(3); handles[0] = test_g.create_handle("A"); handles[1] = test_g.create_handle("AA"); handles[2] = test_g.create_handle("AAA"); @@ -5173,18 +5217,24 @@ void test_hub_labeling() { test_g.create_edge(handles[2], handles[1]); vector packed_labels = get_packed_labels(test_g); - + // We see the 1st node attached the right way around - assert(hhl_query(packed_labels.begin(), rank(0, false), rank(1, true)) == 0); - assert(hhl_query(packed_labels.begin(), rank(0, false), rank(1, false)) == INF_INT); + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(1, true)) == + 0); + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(1, false)) == + INF_INT); // We see the 0th node connected to the 2nd node the right way around - assert(hhl_query(packed_labels.begin(), rank(0, false), rank(2, true)) == 2); - assert(hhl_query(packed_labels.begin(), rank(0, false), rank(2, false)) == INF_INT); + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(2, true)) == + 2); + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(2, false)) == + INF_INT); // We see the same thing looking the other way - assert(hhl_query(packed_labels.begin(), rank(2, false), rank(0, true)) == 2); - assert(hhl_query(packed_labels.begin(), rank(2, false), rank(0, false)) == INF_INT); + assert(hhl_query(packed_labels.begin(), rank(2, false), rank(0, true)) == + 2); + assert(hhl_query(packed_labels.begin(), rank(2, false), rank(0, false)) == + INF_INT); } { // Test case: Diamond graph with multiple paths of different lengths @@ -5203,7 +5253,8 @@ void test_hub_labeling() { // Path 0->2->3 has intermediate length 10 // Should find minimum = 2 HashGraph test_g; - vector handles; handles.resize(4); + vector handles; + handles.resize(4); handles[0] = test_g.create_handle("A"); handles[1] = test_g.create_handle("AA"); handles[2] = test_g.create_handle("AAAAAAAAAA"); @@ -5216,7 +5267,8 @@ void test_hub_labeling() { vector packed_labels = get_packed_labels(test_g); - assert(hhl_query(packed_labels.begin(), rank(0, false), rank(3, false)) == 2); + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(3, false)) == + 2); // Same paths in reverse, should also be 2 assert(hhl_query(packed_labels.begin(), rank(3, true), rank(0, true)) == 2); @@ -5229,7 +5281,8 @@ void test_hub_labeling() { // 1_rev -> 1_fwd (turning around) // This means you can go 0_fwd -> 1_rev -> (through 1) -> 1_fwd -> 2_fwd HashGraph test_g; - vector handles; handles.resize(3); + vector handles; + handles.resize(3); handles[0] = test_g.create_handle("A"); handles[1] = test_g.create_handle("AAA"); handles[2] = test_g.create_handle("AA"); @@ -5241,13 +5294,15 @@ void test_hub_labeling() { vector packed_labels = get_packed_labels(test_g); // Must go through 1, turn around, and come back through 1 - assert(hhl_query(packed_labels.begin(), rank(0, false), rank(2, false)) == 6); + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(2, false)) == + 6); } { // Test case: Graph requiring traversal through a node and back again, but // without the hairpin edge. HashGraph test_g; - vector handles; handles.resize(3); + vector handles; + handles.resize(3); handles[0] = test_g.create_handle("A"); handles[1] = test_g.create_handle("AAA"); handles[2] = test_g.create_handle("AA"); @@ -5258,13 +5313,16 @@ void test_hub_labeling() { vector packed_labels = get_packed_labels(test_g); // We can't turn around inside 1, so we can't make it. - assert(hhl_query(packed_labels.begin(), rank(0, false), rank(2, false)) == INF_INT); + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(2, false)) == + INF_INT); } { // Test case: Disconnected components - // Nodes 0,1 are connected; Nodes 2,3 are connected; No edges between components + // Nodes 0,1 are connected; Nodes 2,3 are connected; No edges between + // components HashGraph test_g; - vector handles; handles.resize(4); + vector handles; + handles.resize(4); handles[0] = test_g.create_handle("A"); handles[1] = test_g.create_handle("AA"); handles[2] = test_g.create_handle("AAA"); @@ -5276,18 +5334,26 @@ void test_hub_labeling() { vector packed_labels = get_packed_labels(test_g); // Within first component - assert(hhl_query(packed_labels.begin(), rank(0, false), rank(1, false)) == 0); // 0_fwd to 1_fwd - assert(hhl_query(packed_labels.begin(), rank(1, true), rank(0, true)) == 0); // 1_rev to 0_rev + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(1, false)) == + 0); // 0_fwd to 1_fwd + assert(hhl_query(packed_labels.begin(), rank(1, true), rank(0, true)) == + 0); // 1_rev to 0_rev // Within second component - assert(hhl_query(packed_labels.begin(), rank(2, false), rank(3, false)) == 0); // 2_fwd to 3_fwd - assert(hhl_query(packed_labels.begin(), rank(3, true), rank(2, true)) == 0); // 3_rev to 2_rev + assert(hhl_query(packed_labels.begin(), rank(2, false), rank(3, false)) == + 0); // 2_fwd to 3_fwd + assert(hhl_query(packed_labels.begin(), rank(3, true), rank(2, true)) == + 0); // 3_rev to 2_rev // Between components: no path - assert(hhl_query(packed_labels.begin(), rank(0, false), rank(2, false)) == INF_INT); // 0_fwd to 2_fwd - assert(hhl_query(packed_labels.begin(), rank(0, false), rank(3, false)) == INF_INT); // 0_fwd to 3_fwd - assert(hhl_query(packed_labels.begin(), rank(1, false), rank(2, false)) == INF_INT); // 1_fwd to 2_fwd - assert(hhl_query(packed_labels.begin(), rank(2, true), rank(0, true)) == INF_INT); // 2_rev to 0_rev + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(2, false)) == + INF_INT); // 0_fwd to 2_fwd + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(3, false)) == + INF_INT); // 0_fwd to 3_fwd + assert(hhl_query(packed_labels.begin(), rank(1, false), rank(2, false)) == + INF_INT); // 1_fwd to 2_fwd + assert(hhl_query(packed_labels.begin(), rank(2, true), rank(0, true)) == + INF_INT); // 2_rev to 0_rev } cerr << "HubLabeling tests successful!" << endl; @@ -5295,105 +5361,106 @@ void test_hub_labeling() { void test_snarl_distance_index() { - char filename[] = "tmpXXXXXX"; - int fd = -1; - { - // Make an empty index - SnarlDistanceIndex index; - - // Set it up for a completely empty graph. - vector empty_temp_indexes; - HashGraph empty_graph; - index.get_snarl_tree_records(empty_temp_indexes, &empty_graph); - - // It should be empty but working - assert(index.get_max_tree_depth() == 0); - - // Save it - fd = mkstemp(filename); - assert(fd != -1); - index.serialize(fd); - } - assert(close(fd) == 0); - - { - // Load it again - SnarlDistanceIndex index2; - index2.deserialize(filename); - - // It should be empty but working - assert(index2.get_max_tree_depth() == 0); - } - - // Make the file un-writable. - assert(chmod(filename, S_IRUSR) == 0); - - { - // Load it a third time - SnarlDistanceIndex index2; - index2.deserialize(filename); - - // It should be empty but working - assert(index2.get_max_tree_depth() == 0); - } - - // Make the file writable again - assert(chmod(filename, S_IRUSR | S_IWUSR) == 0); - - // And remove it - unlink(filename); - - cerr << "SnarlDistanceIndex tests successful!" << endl; + char filename[] = "tmpXXXXXX"; + int fd = -1; + { + // Make an empty index + SnarlDistanceIndex index; + + // Set it up for a completely empty graph. + vector + empty_temp_indexes; + HashGraph empty_graph; + index.get_snarl_tree_records(empty_temp_indexes, &empty_graph); + + // It should be empty but working + assert(index.get_max_tree_depth() == 0); + + // Save it + fd = mkstemp(filename); + assert(fd != -1); + index.serialize(fd); + } + assert(close(fd) == 0); + + { + // Load it again + SnarlDistanceIndex index2; + index2.deserialize(filename); + + // It should be empty but working + assert(index2.get_max_tree_depth() == 0); + } + + // Make the file un-writable. + assert(chmod(filename, S_IRUSR) == 0); + + { + // Load it a third time + SnarlDistanceIndex index2; + index2.deserialize(filename); + + // It should be empty but working + assert(index2.get_max_tree_depth() == 0); + } + + // Make the file writable again + assert(chmod(filename, S_IRUSR | S_IWUSR) == 0); + + // And remove it + unlink(filename); + + cerr << "SnarlDistanceIndex tests successful!" << endl; } int main(void) { - /*test_reference_path_overlay(); - test_bit_packing(); - test_mapped_structs(); - test_int_vector(); - test_packed_vector>(); - test_packed_vector>(); - test_packed_vector>(); - test_iterators>(); - test_iterators>(); - test_iterators>(); - test_paged_vector>(); - test_paged_vector>(); - test_paged_vector>(); - test_paged_vector>(); - test_paged_vector>(); - test_paged_vector>(); - test_paged_vector>(); - test_iterators>(); - test_iterators>(); - test_iterators>(); - test_iterators>(); - test_iterators>(); - test_iterators>(); - test_iterators>(); - test_iterators>(); - test_iterators>(); - test_iterators>(); - test_iterators>(); - test_iterators>(); - test_iterators>(); - test_iterators>(); - test_packed_deque(); - test_iterators>(); - test_iterators>(); - test_iterators>(); - test_packed_set(); - test_mutable_path_handle_graphs(); - test_deletable_handle_graphs(); - test_serializable_handle_graphs(); - test_packed_graph(); - test_path_position_overlays(); - test_packed_reference_path_overlay(); - test_vectorizable_overlays(); - test_packed_subgraph_overlay(); - test_multithreaded_overlay_construction(); - test_mapped_packed_graph(); - test_hash_graph(); */ - test_hub_labeling(); - //test_snarl_distance_index(); + /*test_reference_path_overlay(); + test_bit_packing(); + test_mapped_structs(); + test_int_vector(); + test_packed_vector>(); + test_packed_vector>(); + test_packed_vector>(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_paged_vector>(); + test_paged_vector>(); + test_paged_vector>(); + test_paged_vector>(); + test_paged_vector>(); + test_paged_vector>(); + test_paged_vector>(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_packed_deque(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_packed_set(); + test_mutable_path_handle_graphs(); + test_deletable_handle_graphs(); + test_serializable_handle_graphs(); + test_packed_graph(); + test_path_position_overlays(); + test_packed_reference_path_overlay(); + test_vectorizable_overlays(); + test_packed_subgraph_overlay(); + test_multithreaded_overlay_construction(); + test_mapped_packed_graph(); + test_hash_graph(); */ + test_hub_labeling(); + // test_snarl_distance_index(); } From 91b0e91cafd95adceb198b48be13fe77b664d2bd Mon Sep 17 00:00:00 2001 From: Zia <194475824+electricEpilith@users.noreply.github.com> Date: Wed, 13 May 2026 11:24:18 -0700 Subject: [PATCH 63/75] uncomment non-hub-labeling tests --- bdsg/src/test_libbdsg.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bdsg/src/test_libbdsg.cpp b/bdsg/src/test_libbdsg.cpp index 0a4ca0d9..95490451 100644 --- a/bdsg/src/test_libbdsg.cpp +++ b/bdsg/src/test_libbdsg.cpp @@ -5414,7 +5414,7 @@ void test_snarl_distance_index() { } int main(void) { - /*test_reference_path_overlay(); + test_reference_path_overlay(); test_bit_packing(); test_mapped_structs(); test_int_vector(); @@ -5460,7 +5460,7 @@ int main(void) { test_packed_subgraph_overlay(); test_multithreaded_overlay_construction(); test_mapped_packed_graph(); - test_hash_graph(); */ + test_hash_graph(); test_hub_labeling(); - // test_snarl_distance_index(); + test_snarl_distance_index(); } From 1c35a3bf0b6b5c6edfde30e9724b47257a81ef18 Mon Sep 17 00:00:00 2001 From: Zia <194475824+electricEpilith@users.noreply.github.com> Date: Wed, 13 May 2026 11:28:47 -0700 Subject: [PATCH 64/75] mention Boost dep --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 83969dec..b9c22643 100644 --- a/README.md +++ b/README.md @@ -94,6 +94,8 @@ The documentation can then be found at `docs/_build/html/index.html`. - [`BBHash/alltypes`](https://github.com/rizkg/BBHash/tree/alltypes) - [`jansson`](https://github.com/akheron/jansson) +There is also a dependency on [`Boost`](https://www.boost.org/). + The build process with `make` assumes that these libraries and their headers have been installed in a place on the system where the compiler can find them (e.g. in `CPLUS_INCLUDE_PATH`). #### Easy `make` installation From 78b6d68b20858f993ed8e72dffc7b8598de5a6c1 Mon Sep 17 00:00:00 2001 From: electricEpilith <194475824+electricEpilith@users.noreply.github.com> Date: Fri, 15 May 2026 14:20:57 -0700 Subject: [PATCH 65/75] Remove unnecessary assert Co-authored-by: Adam Novak --- bdsg/include/bdsg/internal/mapped_structs.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/bdsg/include/bdsg/internal/mapped_structs.hpp b/bdsg/include/bdsg/internal/mapped_structs.hpp index aab5aedd..0e3fc6cc 100644 --- a/bdsg/include/bdsg/internal/mapped_structs.hpp +++ b/bdsg/include/bdsg/internal/mapped_structs.hpp @@ -1879,7 +1879,6 @@ CompatIntVector::ConstProxy::operator uint64_t () const { template auto CompatIntVector::at(size_t index) -> Proxy { if (index > size()) { - assert(false); throw std::out_of_range("Accessing index " + std::to_string(index) + " in integer vector of length " + std::to_string(size())); } From 768a6a08d3593629ad94f6f450d5e32081a4f4b5 Mon Sep 17 00:00:00 2001 From: Zia <194475824+electricEpilith@users.noreply.github.com> Date: Fri, 15 May 2026 16:40:58 -0700 Subject: [PATCH 66/75] add include guard --- bdsg/include/bdsg/ch.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bdsg/include/bdsg/ch.hpp b/bdsg/include/bdsg/ch.hpp index 4aedbe29..b09b649a 100644 --- a/bdsg/include/bdsg/ch.hpp +++ b/bdsg/include/bdsg/ch.hpp @@ -1,6 +1,8 @@ /* file for the contraction hierarchy method */ +#ifndef BDSG_CH_HPP_INCLUDED +#define BDSG_CH_HPP_INCLUDED #include #include @@ -369,3 +371,5 @@ void write_to_gr(CHOverlay &ov, string out_path); vector read_node_order(string in_path); } // namespace bdsg + +#endif \ No newline at end of file From b06d1e1b42f7d77f2ca120dfa7528fb1a2795cb6 Mon Sep 17 00:00:00 2001 From: Zia <194475824+electricEpilith@users.noreply.github.com> Date: Fri, 15 May 2026 16:42:52 -0700 Subject: [PATCH 67/75] mention Boost in dep list --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index b9c22643..4c15644d 100644 --- a/README.md +++ b/README.md @@ -93,8 +93,7 @@ The documentation can then be found at `docs/_build/html/index.html`. - [`DYNAMIC`](https://github.com/xxsds/DYNAMIC) - [`BBHash/alltypes`](https://github.com/rizkg/BBHash/tree/alltypes) - [`jansson`](https://github.com/akheron/jansson) - -There is also a dependency on [`Boost`](https://www.boost.org/). +- [`Boost`](https://www.boost.org/). The build process with `make` assumes that these libraries and their headers have been installed in a place on the system where the compiler can find them (e.g. in `CPLUS_INCLUDE_PATH`). From 4fbdd5a2f77b2de0c99abbae0f6f6d52e6a3249e Mon Sep 17 00:00:00 2001 From: Zia <194475824+electricEpilith@users.noreply.github.com> Date: Fri, 15 May 2026 16:49:25 -0700 Subject: [PATCH 68/75] reject v4 indexes --- bdsg/include/bdsg/snarl_distance_index.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bdsg/include/bdsg/snarl_distance_index.hpp b/bdsg/include/bdsg/snarl_distance_index.hpp index c17ad549..614f3c60 100644 --- a/bdsg/include/bdsg/snarl_distance_index.hpp +++ b/bdsg/include/bdsg/snarl_distance_index.hpp @@ -973,7 +973,7 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab // that start with component count const static size_t CURRENT_VERSION_NUMBER = 5; // A version to allow though but warn about - const static size_t WARN_VERSION_NUMBER = 4; + const static size_t WARN_VERSION_NUMBER = 9999; //placeholder value /// Arbitrary large number which doens't overflow the number of bits we give const static size_t VERSION_NUMBER_SENTINEL = (1 << 10) - 1; From 7a416121d34a445f01d4e2fa9aac81eab4283a87 Mon Sep 17 00:00:00 2001 From: Zia <194475824+electricEpilith@users.noreply.github.com> Date: Fri, 15 May 2026 17:19:21 -0700 Subject: [PATCH 69/75] convert /// comments to doc comments --- bdsg/include/bdsg/snarl_distance_index.hpp | 396 +++++++++++++++------ 1 file changed, 295 insertions(+), 101 deletions(-) diff --git a/bdsg/include/bdsg/snarl_distance_index.hpp b/bdsg/include/bdsg/snarl_distance_index.hpp index 614f3c60..4bee7dca 100644 --- a/bdsg/include/bdsg/snarl_distance_index.hpp +++ b/bdsg/include/bdsg/snarl_distance_index.hpp @@ -199,20 +199,32 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab void serialize_members(std::ostream& out) const; void deserialize_members(std::istream& in); - /// Call when loading a distance index; will error if wrong version + /* Call when loading a distance index; will error if wrong version + * + */ void check_version_on_load() const; virtual uint32_t get_magic_number() const; std::string get_prefix() const; - /// Allow for preloading the index for more accurate timing of algorithms - /// that use it, if it fits in memory. If blocking is true, waits for the - /// index to be paged in. Otherwise, just tells the OS that we will want to - /// use it. + /* Allow for preloading the index for more accurate timing of algorithms + * + */ + /* that use it, if it fits in memory. If blocking is true, waits for the + * + */ + /* index to be paged in. Otherwise, just tells the OS that we will want to + * + */ + /* use it. + * + */ void preload(bool blocking = false) const; -//////////////////////////////////// How we define different properties of a net handle +//////////////////////////////////* How we define different properties of a net handle + * + */ public: @@ -221,13 +233,23 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab END_START, END_END, END_TIP, TIP_START, TIP_END, TIP_TIP}; - /// Type of a net_handle_t, which may not be the type of the record - /// This is to allow a node record to be seen as a chain from the perspective of a handle. - /// And to allow a simple snarl record to be seen as a node, a chain, or a snarl. - /// TODO: What does that really mean? Why can that happen? + /* Type of a net_handle_t, which may not be the type of the record + * + */ + /* This is to allow a node record to be seen as a chain from the perspective of a handle. + * + */ + /* And to allow a simple snarl record to be seen as a node, a chain, or a snarl. + * + */ + /* TODO: What does that really mean? Why can that happen? + * + */ enum net_handle_record_t {ROOT_HANDLE=0, NODE_HANDLE, SNARL_HANDLE, CHAIN_HANDLE, SENTINEL_HANDLE}; -///////////////////////////// functions for distance calculations using net_handle_t's +///////////////////////////* functions for distance calculations using net_handle_t's + * + */ public: @@ -395,7 +417,9 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab public: -////////////////// SnarlDecomposition methods +////////////////* SnarlDecomposition methods + * + */ ///Get a net handle referring to a tip-to-tip traversal of the contents of the root snarl. net_handle_t get_root() const ; @@ -426,12 +450,22 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab ///edges are allowed bool is_simple_snarl(const net_handle_t& net) const; - /// Returns true if the given net handle refers to (a traversal of) a regular snarl - /// A regular snarl is the same as a simple snarl, except that the children may be - /// nested chains, rather than being restricted to nodes, as long as the - /// nested chains don't allow reversals. + /* Returns true if the given net handle refers to (a traversal of) a regular snarl + * + */ + /* A regular snarl is the same as a simple snarl, except that the children may be + * + */ + /* nested chains, rather than being restricted to nodes, as long as the + * + */ + /* nested chains don't allow reversals. + * + */ /// - /// Simple and trivial snarls also count as regular snarls. + /* Simple and trivial snarls also count as regular snarls. + * + */ bool is_regular_snarl(const net_handle_t& net) const; ///Returns the number of direct children of a snarl (not counting boundary nodes). @@ -546,9 +580,13 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab ///For 0 or 1, returns the sentinel facing in. Otherwise return the child as a chain going START_END net_handle_t get_snarl_child_from_rank(const net_handle_t& snarl, const size_t& rank) const; - /// Does this net handle store distances? + /* Does this net handle store distances? + * + */ bool has_distances(const net_handle_t& net) const; - /// Does the distance index in general store distances? + /* Does the distance index in general store distances? + * + */ bool has_distances() const; protected: @@ -612,13 +650,19 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab vector>* to_duplicate) const; -////////////////////////////// How to interpret net_handle_ts +////////////////////////////* How to interpret net_handle_ts + * + */ // public: ///A record_t is the type of structure that a record can be. - /// The actual distance index is stored as a series of "records" for each snarl/node/chain. - /// The record type defines what is stored in a record + /* The actual distance index is stored as a series of "records" for each snarl/node/chain. + * + */ + /* The record type defines what is stored in a record + * + */ /// ///NODE, SNARL, and CHAIN indicate that they don't store distances. ///SIMPLE_SNARL is a snarl with all children connecting only to the boundary nodes in one direction (ie, a bubble). @@ -626,14 +670,24 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab ///NODE represents a node that is a trivial chain. A node can only be the child of a snarl. ///OVERSIZED_SNARL stores hub labeling data to compute distances. ///ROOT_SNARL represents a connected component of the root. It has no start or end node so - /// its children technically belong to the root. + /* its children technically belong to the root. + * + */ ///MULTICOMPONENT_CHAIN can represent a chain with snarls that are not start-end connected. - /// The chain is split up into components between these snarls, each node is tagged with - /// which component it belongs to. + /* The chain is split up into components between these snarls, each node is tagged with + * + */ + /* which component it belongs to. + * + */ /// - /// TODO: What is a CHILDREN record? Is it ever used? + /* TODO: What is a CHILDREN record? Is it ever used? + * + */ /// - /// These MUST match the order in record_t_as_string! + /* These MUST match the order in record_t_as_string! + * + */ enum record_t {ROOT=1, NODE, DISTANCED_NODE, TRIVIAL_SNARL, DISTANCED_TRIVIAL_SNARL, @@ -647,7 +701,9 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab // Because the record_t encodes a complex taxonomy of snarls not *quite* // decomposable to flags, we use these accessors to look at facets of it. - /// Return true if records of the given type have stored distances. + /*Return true if records of the given type have stored distances. + * + */ constexpr static bool has_distances(record_t type) { return type == DISTANCED_NODE || type == DISTANCED_TRIVIAL_SNARL || type == DISTANCED_SIMPLE_SNARL @@ -656,86 +712,127 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab || type == DISTANCED_ROOT_SNARL || type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN; } - /// Return true if the given record type represents a root snarl. + + /*Return true if the given record type represents a root snarl. + * + */ constexpr static bool is_root_snarl(record_t type) { return type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL; } - /// Return true if the given record type represents a root or a root snarl. + + /*Return true if the given record type represents a root or a root snarl. + * + */ constexpr static bool is_any_root(record_t type) { return is_root_snarl(type) || type == ROOT; } - /// Return true if the given record type represents a node. + + /*Return true if the given record type represents a node. + * + */ constexpr static bool is_node(record_t type) { return type == NODE || type == DISTANCED_NODE; } - /// Return true if the given record type represents a chain. + + /* Return true if the given record type represents a chain. + * + */ constexpr static bool is_chain(record_t type) { return type == CHAIN || type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN; } - /// Return true if the given record type represents a trivial snarl. + + /* Return true if the given record type represents a trivial snarl. + * + */ constexpr static bool is_trivial_snarl(record_t type) { return type == TRIVIAL_SNARL || type == DISTANCED_TRIVIAL_SNARL; } - /// Return true if the given record type represents a simple (but not a - /// trivial) snarl. + + /* Return true if the given record type represents a simple (but not a + * trivial) snarl. + */ constexpr static bool is_simple_snarl(record_t type) { return type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL; } - /// Return true if the given record type represents an oversized snarl. + + /* Return true if the given record type represents an oversized snarl. + * + */ constexpr static bool is_oversized_snarl(record_t type) { return type == OVERSIZED_SNARL || type == OVERSIZED_REGULAR_SNARL; } - /// Determine if a record type is a regular, but not a not simple (or - /// trivial), snarl. Root snarls cannot be regular. + + /* Determine if a record type is a regular, but not a not simple (or + * trivial), snarl. Root snarls cannot be regular. + * + */ constexpr static bool is_regular_nonsimple_snarl(record_t type) { return type == REGULAR_SNARL || type == DISTANCED_REGULAR_SNARL || type == OVERSIZED_REGULAR_SNARL; } - /// Determine if a record type is a regular snarl. Root snarls cannot be - /// regular. Counts simple and trivial snarls as regular. + + /* Determine if a record type is a regular snarl. Root snarls cannot be + * regular. Counts simple and trivial snarls as regular. + * + */ constexpr static bool is_regular_snarl(record_t type) { return is_regular_nonsimple_snarl(type) || is_simple_snarl(type) || is_trivial_snarl(type); } - /// Determine if a record type is a snarl that isn't also a root or a - /// simple (or trivial) snarl. A "nonsimple" snarl is implicitly - /// nontrivial. + + /* Determine if a record type is a snarl that isn't also a root or a + * simple (or trivial) snarl. A "nonsimple" snarl is implicitly + * nontrivial. + * + */ constexpr static bool is_nonroot_nonsimple_snarl(record_t type) { return is_regular_nonsimple_snarl(type) || type == SNARL || type == DISTANCED_SNARL || type == OVERSIZED_SNARL; } - /// Return true if the given record type represents a snarl that is not - /// simple or trivial. + + /* Return true if the given record type represents a snarl that is not + * simple or trivial. + * + */ constexpr static bool is_nonsimple_snarl(record_t type) { return is_nonroot_nonsimple_snarl(type) || is_root_snarl(type); } - /// Return true if the given record type represents a snarl that is not - /// simple or trivial, and also isn't a root snarl. + + /* Return true if the given record type represents a snarl that is not + * simple or trivial, and also isn't a root snarl. + * + */ constexpr static bool is_nonroot_nontrivial_snarl(record_t type) { return is_nonroot_nonsimple_snarl(type) || type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL; } - /// Return true if the given record type represents a snarl that is not - /// trivial. + + /* Return true if the given record type represents a snarl that is not + * trivial. + * + */ constexpr static bool is_nontrivial_snarl(record_t type) { return is_nonroot_nontrivial_snarl(type) || is_root_snarl(type); } - /// Make sure a record_t is a known type other than CHILDREN + + /* Make sure a record_t is a known type other than CHILDREN + * + */ constexpr static bool is_any_nonchildren(record_t type) { return is_any_root(type) || is_node(type) @@ -744,18 +841,24 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab || is_trivial_snarl(type); } - /// Encode the type of a root snarl that may or may not have distances. + /* Encode the type of a root snarl that may or may not have distances. + * + */ constexpr static record_t encode_root_snarl(bool has_distances) { return has_distances ? DISTANCED_ROOT_SNARL : ROOT_SNARL; } - /// Encode the type of a simple snarl that may or may not have distances. + /* Encode the type of a simple snarl that may or may not have distances. + * + */ constexpr static record_t encode_simple_snarl(bool has_distances) { return has_distances ? DISTANCED_SIMPLE_SNARL : SIMPLE_SNARL; } - /// Encode the type of a snarl that isn't a root snarl or a simple (or trivial) snarl. - /// It may have distances, it may be regular, and it may be oversized. + /* Encode the type of a snarl that isn't a root snarl or a simple (or trivial) snarl. + * It may have distances, it may be regular, and it may be oversized. + * + */ constexpr static record_t encode_nonroot_nonsimple_snarl(bool has_distances, bool is_regular, bool is_oversized) { if (is_oversized) { if (!has_distances) { @@ -773,13 +876,16 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab } } - /// Encode the type of a node that may or may not have distances. + /* Encode the type of a node that may or may not have distances. + * + */ constexpr static record_t encode_node(bool has_distances) { return has_distances ? DISTANCED_NODE : NODE; } - /// Encode the type of a chain. - /// It may have distances, and it may be a multicomponent chain. + /* Encode the type of a chain. + * It may have distances, and it may be a multicomponent chain. + */ constexpr static record_t encode_chain(bool has_distances, bool is_multicomponent) { if (is_multicomponent) { if (!has_distances) { @@ -939,7 +1045,9 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab -////////////////////////////////////////// The actual distance index +////////////////////////////////////////* The actual distance index + * + */ private: @@ -974,7 +1082,9 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab const static size_t CURRENT_VERSION_NUMBER = 5; // A version to allow though but warn about const static size_t WARN_VERSION_NUMBER = 9999; //placeholder value - /// Arbitrary large number which doens't overflow the number of bits we give + /* Arbitrary large number which doens't overflow the number of bits we give + * + */ const static size_t VERSION_NUMBER_SENTINEL = (1 << 10) - 1; /*Node record @@ -1164,7 +1274,9 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab * * The remainder of the tag will be the record_t of the record */ - /////////// Methods for interpreting the tags for each snarl tree record + /////////* Methods for interpreting the tags for each snarl tree record + * + */ const static record_t get_record_type(const size_t tag) {return static_cast(tag >> 9);} @@ -1183,7 +1295,9 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab ///////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////// SnarlTreeRecord class for interpreting the records in a distance index +//////////////////////////////* SnarlTreeRecord class for interpreting the records in a distance index + * + */ // /* Define a struct for interpreting each type of snarl tree node record (For node, snarl, chain) * @@ -1621,7 +1735,9 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab size_t get_distance_left_end(); size_t get_distance_right_end(); - ////////////////////////// methods for navigating the snarl tree from this chain + ////////////////////////* methods for navigating the snarl tree from this chain + * + */ //Get the offset into snarl_tree_records of the first node in the chain size_t get_first_node_offset() const; @@ -1693,7 +1809,9 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab private: - ////////////////////// More methods for dealing with net_handle_ts + ////////////////////* More methods for dealing with net_handle_ts + * + */ SnarlTreeRecord get_snarl_tree_record(const handlegraph::net_handle_t& net_handle) const { return SnarlTreeRecord(get_record_offset(net_handle), &snarl_tree_records); } @@ -1830,15 +1948,31 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab //TODO This would probably be more efficient as a vector of a struct of five ints vector prefix_sum; vector max_prefix_sum; - /// Forward looping distances. If no loop is possible, an entry - /// will be std::numeric_limits::max(). If any loop is - /// possible anywhere along the chain, the first entry will contain - /// a possible loop distance. + /* Forward looping distances. If no loop is possible, an entry + * + */ + /* will be std::numeric_limits::max(). If any loop is + * + */ + /* possible anywhere along the chain, the first entry will contain + * + */ + /* a possible loop distance. + * + */ vector forward_loops; - /// Backward lopping distances. If no loop is possible, an entry - /// will be std::numeric_limits::max(). If any lopp is - /// possible anywhere along the chain, the last entry will contain - /// a possible loop distance. + /* Backward lopping distances. If no loop is possible, an entry + * + */ + /* will be std::numeric_limits::max(). If any lopp is + * + */ + /* possible anywhere along the chain, the last entry will contain + * + */ + /* a possible loop distance. + * + */ vector backward_loops; vector chain_components;//Which component does each node belong to, usually all 0s @@ -1853,18 +1987,30 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab handlegraph::nid_t end_node_id; size_t end_node_length=0; size_t node_count=0; - /// Minimum distance across the snarl from start to end, not - /// including boundary nodes. + /* Minimum distance across the snarl from start to end, not + * + */ + /* including boundary nodes. + * + */ size_t min_length = std::numeric_limits::max(); size_t max_length = 0; size_t max_distance = 0; size_t tree_depth = 0; //TODO: This isn't used but I left it because I couldn't get the python bindings to build when I changed it - /// Minimum distance from the start back to itself within the - /// snarl, not including boundary nodes. + /* Minimum distance from the start back to itself within the + * + */ + /* snarl, not including boundary nodes. + * + */ size_t distance_start_start = std::numeric_limits::max(); - /// Minimum distance from the end back to itself within the snarl, - /// not including boundary nodes. + /* Minimum distance from the end back to itself within the snarl, + * + */ + /* not including boundary nodes. + * + */ size_t distance_end_end = std::numeric_limits::max(); size_t rank_in_parent=0; @@ -1880,8 +2026,12 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab bool end_node_rev; bool is_trivial; bool is_simple; - /// Set to true if the snarl is regular (see SnarlDistanceIndex::is_regular_snarl()). - /// If is_simple is true, this must also be set to true when filling in the TemporarySnarlRecord. + /* Set to true if the snarl is regular (see SnarlDistanceIndex::is_regular_snarl()). + * + */ + /* If is_simple is true, this must also be set to true when filling in the TemporarySnarlRecord. + * + */ bool is_regular = false; bool is_tip = false; bool is_root_snarl = false; @@ -1926,27 +2076,51 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab vector root_snarl_components; vector temp_chain_records; vector temp_snarl_records; - /// Holds temporary indexes for all the nodes. + /* Holds temporary indexes for all the nodes. + * + */ /// - /// While temporary snarl and chain records are stored at more or less - /// arbitrary indexes, temporary node records are laid out by node ID, - /// with the one for the node with ID min_node_id at index 0. This means - /// you can look up the TemporaryNodeRecord for a node by its ID, and - /// that some positions in the vector are empty temporary indexes for - /// nonexistent nodes. + /* While temporary snarl and chain records are stored at more or less + * + */ + /* arbitrary indexes, temporary node records are laid out by node ID, + * + */ + /* with the one for the node with ID min_node_id at index 0. This means + * + */ + /* you can look up the TemporaryNodeRecord for a node by its ID, and + * + */ + /* that some positions in the vector are empty temporary indexes for + * + */ + /* nonexistent nodes. + * + */ vector temp_node_records; - /// Look up a chain from a temporary record reference. - /// Throws an error if the reference is not to a chain or is out of bounds. + /* Look up a chain from a temporary record reference. + * + */ + /* Throws an error if the reference is not to a chain or is out of bounds. + * + */ inline TemporaryChainRecord& get_chain(const temp_record_ref_t& ref) { // Delegate to the const version and un-const the result. See // return const_cast(std::as_const(*this).get_chain(ref)); } - /// Look up a chain from a temporary record reference. - /// Throws an error if the reference is not to a chain or is out of bounds. - /// This version can be used when the object is const. + /* Look up a chain from a temporary record reference. + * + */ + /* Throws an error if the reference is not to a chain or is out of bounds. + * + */ + /* This version can be used when the object is const. + * + */ inline const TemporaryChainRecord& get_chain(const temp_record_ref_t& ref) const { if (ref.first != TEMP_CHAIN) { throw std::invalid_argument("Trying to look up a non-chain as a chain"); @@ -1957,15 +2131,25 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab return temp_chain_records[ref.second]; } - /// Look up a snarl from a temporary record reference. - /// Throws an error if the reference is not to a snarl or is out of bounds. + /* Look up a snarl from a temporary record reference. + * + */ + /* Throws an error if the reference is not to a snarl or is out of bounds. + * + */ inline TemporarySnarlRecord& get_snarl(const temp_record_ref_t& ref) { return const_cast(std::as_const(*this).get_snarl(ref)); } - /// Look up a snarl from a temporary record reference. - /// Throws an error if the reference is not to a snarl or is out of bounds. - /// This version can be used when the object is const. + /* Look up a snarl from a temporary record reference. + * + */ + /* Throws an error if the reference is not to a snarl or is out of bounds. + * + */ + /* This version can be used when the object is const. + * + */ inline const TemporarySnarlRecord& get_snarl(const temp_record_ref_t& ref) const { if (ref.first != TEMP_SNARL) { throw std::invalid_argument("Trying to look up a non-snarl as a snarl"); @@ -1976,15 +2160,25 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab return temp_snarl_records[ref.second]; } - /// Look up a node from a temporary record reference. - /// Throws an error if the reference is not to a node or is out of bounds. + /* Look up a node from a temporary record reference. + * + */ + /* Throws an error if the reference is not to a node or is out of bounds. + * + */ inline TemporaryNodeRecord& get_node(const temp_record_ref_t& ref) { return const_cast(std::as_const(*this).get_node(ref)); } - /// Look up a node from a temporary record reference. - /// Throws an error if the reference is not to a node or is out of bounds. - /// This version can be used when the object is const. + /* Look up a node from a temporary record reference. + * + */ + /* Throws an error if the reference is not to a node or is out of bounds. + * + */ + /* This version can be used when the object is const. + * + */ inline const TemporaryNodeRecord& get_node(const temp_record_ref_t& ref) const { if (ref.first != TEMP_NODE) { throw std::invalid_argument("Trying to look up a non-node as a node"); From 421ea29c45a0083b7eb734bfa1b63ed1b0072e5b Mon Sep 17 00:00:00 2001 From: Zia <194475824+electricEpilith@users.noreply.github.com> Date: Fri, 15 May 2026 17:23:15 -0700 Subject: [PATCH 70/75] convert /// comment to doc comment --- bdsg/include/bdsg/snarl_distance_index.hpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/bdsg/include/bdsg/snarl_distance_index.hpp b/bdsg/include/bdsg/snarl_distance_index.hpp index 4bee7dca..dd341f16 100644 --- a/bdsg/include/bdsg/snarl_distance_index.hpp +++ b/bdsg/include/bdsg/snarl_distance_index.hpp @@ -1562,9 +1562,10 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab //How big is the entire snarl record? static size_t distance_vector_size(record_t type, size_t node_count); - //vec_size parameter only needed for oversized snarls - //represents size of hub labeling-related data - //the value needed should be the first entry after the fixed-size record data + /* vec_size parameter only needed for oversized snarls + * represents size of hub labeling-related data + * the value needed should be the first entry after the fixed-size record data + */ static size_t record_size (record_t type, size_t node_count, size_t vec_size) ; size_t record_size() ; From a9ccb259bf5a64b12f43a3e9b83d5fe058bf8a37 Mon Sep 17 00:00:00 2001 From: Zia <194475824+electricEpilith@users.noreply.github.com> Date: Fri, 15 May 2026 17:25:02 -0700 Subject: [PATCH 71/75] remove TODO, we're not making separate RecordWriter for oversized snarls --- bdsg/include/bdsg/snarl_distance_index.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/bdsg/include/bdsg/snarl_distance_index.hpp b/bdsg/include/bdsg/snarl_distance_index.hpp index dd341f16..7877d9fc 100644 --- a/bdsg/include/bdsg/snarl_distance_index.hpp +++ b/bdsg/include/bdsg/snarl_distance_index.hpp @@ -1609,7 +1609,6 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab * * May only be called once. * - * TODO: Make separate SnarlRecordWriter for oversized snarls? * * Putting vec_size in the SNARL_RECORD_SIZE'th slot due to it being the first one after the header */ From 1b720484888eb72644e7c967a290e36a4ca65187 Mon Sep 17 00:00:00 2001 From: Zia <194475824+electricEpilith@users.noreply.github.com> Date: Fri, 15 May 2026 17:41:33 -0700 Subject: [PATCH 72/75] fix commenting errors --- bdsg/include/bdsg/snarl_distance_index.hpp | 112 ++++++--------------- 1 file changed, 31 insertions(+), 81 deletions(-) diff --git a/bdsg/include/bdsg/snarl_distance_index.hpp b/bdsg/include/bdsg/snarl_distance_index.hpp index 7877d9fc..b62e9b81 100644 --- a/bdsg/include/bdsg/snarl_distance_index.hpp +++ b/bdsg/include/bdsg/snarl_distance_index.hpp @@ -208,23 +208,15 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab std::string get_prefix() const; /* Allow for preloading the index for more accurate timing of algorithms - * - */ - /* that use it, if it fits in memory. If blocking is true, waits for the - * - */ - /* index to be paged in. Otherwise, just tells the OS that we will want to - * - */ - /* use it. + * that use it, if it fits in memory. If blocking is true, waits for the + * index to be paged in. Otherwise, just tells the OS that we will want to + * use it. * */ void preload(bool blocking = false) const; -//////////////////////////////////* How we define different properties of a net handle - * - */ +///////////////////////////////// How we define different properties of a net handle? public: @@ -234,22 +226,15 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab TIP_START, TIP_END, TIP_TIP}; /* Type of a net_handle_t, which may not be the type of the record + * This is to allow a node record to be seen as a chain from the perspective of a handle. + * And to allow a simple snarl record to be seen as a node, a chain, or a snarl. * - */ - /* This is to allow a node record to be seen as a chain from the perspective of a handle. - * - */ - /* And to allow a simple snarl record to be seen as a node, a chain, or a snarl. - * - */ - /* TODO: What does that really mean? Why can that happen? + * TODO: What does that really mean? Why can that happen? * */ enum net_handle_record_t {ROOT_HANDLE=0, NODE_HANDLE, SNARL_HANDLE, CHAIN_HANDLE, SENTINEL_HANDLE}; -///////////////////////////* functions for distance calculations using net_handle_t's - * - */ +///////////////////////////// functions for distance calculations using net_handle_t's public: @@ -417,9 +402,7 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab public: -////////////////* SnarlDecomposition methods - * - */ +////////////////// SnarlDecomposition methods ///Get a net handle referring to a tip-to-tip traversal of the contents of the root snarl. net_handle_t get_root() const ; @@ -450,20 +433,13 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab ///edges are allowed bool is_simple_snarl(const net_handle_t& net) const; - /* Returns true if the given net handle refers to (a traversal of) a regular snarl - * - */ - /* A regular snarl is the same as a simple snarl, except that the children may be - * - */ - /* nested chains, rather than being restricted to nodes, as long as the + /* Returns true if the given net handle refers to (a traversal of) a regular snarl. * - */ - /* nested chains don't allow reversals. + * A regular snarl is the same as a simple snarl, except that the children may be + * nested chains, rather than being restricted to nodes, as long as the + * nested chains don't allow reversals. * - */ - /// - /* Simple and trivial snarls also count as regular snarls. + * Simple and trivial snarls also count as regular snarls. * */ bool is_regular_snarl(const net_handle_t& net) const; @@ -650,44 +626,28 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab vector>* to_duplicate) const; -////////////////////////////* How to interpret net_handle_ts - * - */ -// +////////////////////////////// How to interpret net_handle_ts + public: ///A record_t is the type of structure that a record can be. - /* The actual distance index is stored as a series of "records" for each snarl/node/chain. - * - */ - /* The record type defines what is stored in a record - * - */ - /// + ///The actual distance index is stored as a series of "records" for each snarl/node/chain. + ///The record type defines what is stored in a record. + ///NODE, SNARL, and CHAIN indicate that they don't store distances. ///SIMPLE_SNARL is a snarl with all children connecting only to the boundary nodes in one direction (ie, a bubble). ///TRIVIAL_SNARL represents consecutive nodes in a chain. ///NODE represents a node that is a trivial chain. A node can only be the child of a snarl. ///OVERSIZED_SNARL stores hub labeling data to compute distances. ///ROOT_SNARL represents a connected component of the root. It has no start or end node so - /* its children technically belong to the root. - * - */ + ///its children technically belong to the root. ///MULTICOMPONENT_CHAIN can represent a chain with snarls that are not start-end connected. - /* The chain is split up into components between these snarls, each node is tagged with - * - */ - /* which component it belongs to. - * - */ + ///The chain is split up into components between these snarls, each node is tagged with + ///which component it belongs to. /// - /* TODO: What is a CHILDREN record? Is it ever used? - * - */ + ///TODO: What is a CHILDREN record? Is it ever used? /// - /* These MUST match the order in record_t_as_string! - * - */ + /// These MUST match the order in record_t_as_string! enum record_t {ROOT=1, NODE, DISTANCED_NODE, TRIVIAL_SNARL, DISTANCED_TRIVIAL_SNARL, @@ -1045,9 +1005,7 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab -////////////////////////////////////////* The actual distance index - * - */ +////////////////////////////////////////// The actual distance index private: @@ -1274,9 +1232,7 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab * * The remainder of the tag will be the record_t of the record */ - /////////* Methods for interpreting the tags for each snarl tree record - * - */ + /////////// Methods for interpreting the tags for each snarl tree record const static record_t get_record_type(const size_t tag) {return static_cast(tag >> 9);} @@ -1294,11 +1250,8 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab const static bool is_externally_end_end_connected(const size_t tag) {return tag & 256;} -///////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////* SnarlTreeRecord class for interpreting the records in a distance index - * - */ -// +//////////////////////////////// SnarlTreeRecord class for interpreting the records in a distance index + /* Define a struct for interpreting each type of snarl tree node record (For node, snarl, chain) * * This is meant to be a layer in between snarl_tree_records and the public interface. @@ -1735,9 +1688,7 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab size_t get_distance_left_end(); size_t get_distance_right_end(); - ////////////////////////* methods for navigating the snarl tree from this chain - * - */ + ////////////////////////// methods for navigating the snarl tree from this chain //Get the offset into snarl_tree_records of the first node in the chain size_t get_first_node_offset() const; @@ -1809,9 +1760,8 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab private: - ////////////////////* More methods for dealing with net_handle_ts - * - */ + ////////////////////// More methods for dealing with net_handle_ts + SnarlTreeRecord get_snarl_tree_record(const handlegraph::net_handle_t& net_handle) const { return SnarlTreeRecord(get_record_offset(net_handle), &snarl_tree_records); } From 376501abb68e18e361314b1028e3b87b845ffff2 Mon Sep 17 00:00:00 2001 From: Zia <194475824+electricEpilith@users.noreply.github.com> Date: Fri, 15 May 2026 17:46:19 -0700 Subject: [PATCH 73/75] better file comment --- bdsg/src/ch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bdsg/src/ch.cpp b/bdsg/src/ch.cpp index 21276c15..7ffb7a4f 100644 --- a/bdsg/src/ch.cpp +++ b/bdsg/src/ch.cpp @@ -1,5 +1,5 @@ /* -file for quickly playing around with stuff +Hub labeling with contraction hierarchy node ordering. */ #include "bdsg/ch.hpp" From d650c3f0c4ff2e4a3f7b59c762d6a4f379e6c2c2 Mon Sep 17 00:00:00 2001 From: Zia <194475824+electricEpilith@users.noreply.github.com> Date: Fri, 15 May 2026 17:47:09 -0700 Subject: [PATCH 74/75] not a test file --- bdsg/src/ch.cpp | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/bdsg/src/ch.cpp b/bdsg/src/ch.cpp index 7ffb7a4f..29f93f84 100644 --- a/bdsg/src/ch.cpp +++ b/bdsg/src/ch.cpp @@ -7,24 +7,6 @@ Hub labeling with contraction hierarchy node ordering. //#define debug_create namespace bdsg { -bdsg::HashGraph make_test() { - bdsg::HashGraph g; - vector h; h.resize(13); - for (int i = 1; i <= 12; i++) { - h[i] = g.create_handle("A"); - } - g.create_edge(h[1], h[2]); g.create_edge(h[2], h[3]); - g.create_edge(h[2], h[4]); g.create_edge(h[2], h[11]); - g.create_edge(h[2], g.flip(h[7])); g.create_edge(h[3], h[5]); - g.create_edge(h[4], h[6]); g.create_edge(h[4], h[7]); - g.create_edge(h[5], h[4]); g.create_edge(h[5], h[6]); - g.create_edge(h[6], h[8]); g.create_edge(h[7], h[8]); - g.create_edge(h[7], h[9]); g.create_edge(h[7], h[10]); - g.create_edge(g.flip(h[7]), g.flip(h[10])); - g.create_edge(h[9], g.flip(h[10])); g.create_edge(h[11], h[12]); - g.create_edge(h[12], g.flip(h[1])); - return g; -} NODE_UINT bgid(const handle_t& h, const bdsg::HashGraph& hg) { auto nid = hg.get_id(h); From ed24eed6ee39f457a712df40840ce322701bb16c Mon Sep 17 00:00:00 2001 From: Zia <194475824+electricEpilith@users.noreply.github.com> Date: Fri, 15 May 2026 17:52:22 -0700 Subject: [PATCH 75/75] remove unnecessary comment --- bdsg/src/ch.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/bdsg/src/ch.cpp b/bdsg/src/ch.cpp index 29f93f84..15ac5f03 100644 --- a/bdsg/src/ch.cpp +++ b/bdsg/src/ch.cpp @@ -334,7 +334,6 @@ CHOverlay make_boost_graph(const SnarlDistanceIndex::TemporaryDistanceIndex& tem int edge_diff(ContractedGraph::vertex_descriptor nid, ContractedGraph& ch, CHOverlay& ov, vector& node_dists, int hop_limit = 2) { - //using namespace boost; auto [out_start, out_end] = out_edges(nid, ch); auto [in_start, in_end] = in_edges(nid, ch);