diff --git a/.github/workflows/testmac.yml b/.github/workflows/testmac.yml index dd4d5541..990ff287 100644 --- a/.github/workflows/testmac.yml +++ b/.github/workflows/testmac.yml @@ -26,7 +26,7 @@ jobs: - name: Run build and test run: | set -e - brew install libomp doxygen jansson + brew install libomp doxygen jansson boost mkdir -p build cd build cmake .. -DRUN_DOXYGEN=ON -DPYTHON_EXECUTABLE="$(which python3)" diff --git a/CMakeLists.txt b/CMakeLists.txt index ed36e21e..c97ac6e7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,8 +18,8 @@ OPTION(USE_INSTALLED_LIBHANDLEGRAPH "Use the version of libhandlegraph installed # TODO: We can only do out-of-source builds! # TODO: How do we error out meaningfully on in-source builds? -# We build using c++14 -set(CMAKE_CXX_STANDARD 14) +# We build using c++20 +set(CMAKE_CXX_STANDARD 20) # We need library paths to be relative in the build directories so we can let # the libraries in our Python module find each other when we package them into # a wheel. This only works on CMake 3.14+; older CMake we have to bully with @@ -143,6 +143,8 @@ endif() # Find other system dependencies pkg_check_modules(Jansson REQUIRED IMPORTED_TARGET jansson) +find_package(Boost REQUIRED) + # Find our bdsg package directory where input sources and dependencies are set(bdsg_DIR "${CMAKE_CURRENT_SOURCE_DIR}/bdsg") @@ -158,7 +160,9 @@ include(ExternalProject) # sdsl-lite (gives an "sdsl" target) set(BUILD_SHARED_LIBS ON CACHE BOOL "Build sdsl-lite shared libraries") -add_subdirectory("${bdsg_DIR}/deps/sdsl-lite") +if (NOT TARGET sdsl) + add_subdirectory("${bdsg_DIR}/deps/sdsl-lite") +endif() if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin") # It produces divsufsort and divsufsort64 targets that don't know they need OMP on Mac. set_target_properties(divsufsort PROPERTIES LINK_FLAGS "-lomp") @@ -166,7 +170,9 @@ if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin") endif() # hopscotch_map (required by DYNAMIC, gives a "tsl::hopscotch_map" target) -add_subdirectory("${bdsg_DIR}/deps/hopscotch-map") +if (NOT TARGET tsl::hopscotch_map) + add_subdirectory("${bdsg_DIR}/deps/hopscotch-map") +endif() # DYNAMIC (header only) # Does not ship its own install step or define a target, so we make our own target @@ -185,7 +191,9 @@ elseif (TARGET handlegraph_objs) message("Using libhandlegraph built by another CMake") else () message("Using bundled libhandlegraph") - add_subdirectory("${bdsg_DIR}/deps/libhandlegraph") + if (NOT TARGET handlegraph_shared AND NOT TARGET handlegraph) + add_subdirectory("${bdsg_DIR}/deps/libhandlegraph") + endif() endif() @@ -201,7 +209,9 @@ add_library(sparsepp INTERFACE) target_include_directories(sparsepp INTERFACE "${bdsg_DIR}/deps/sparsepp/") # mio (header only) -add_subdirectory("${bdsg_DIR}/deps/mio") +if (NOT TARGET mio::mio) + add_subdirectory("${bdsg_DIR}/deps/mio") +endif() if (BUILD_PYTHON_BINDINGS) @@ -306,6 +316,7 @@ add_library(bdsg_objs OBJECT ${bdsg_DIR}/src/strand_split_overlay.cpp ${bdsg_DIR}/src/utility.cpp ${bdsg_DIR}/src/vectorizable_overlays.cpp + ${bdsg_DIR}/src/ch.cpp ${bdsg_DIR}/src/snarl_distance_index.cpp ) @@ -324,7 +335,8 @@ set(bdsg_TARGET_DEPS bbhash sparsepp mio::mio - PkgConfig::Jansson) + PkgConfig::Jansson + Boost::boost) set(bdsg_LIBS ${bdsg_TARGET_DEPS} @@ -338,7 +350,7 @@ target_include_directories(bdsg_objs PUBLIC ${bdsg_INCLUDES}) set_target_properties(bdsg_objs PROPERTIES POSITION_INDEPENDENT_CODE TRUE) if (CMAKE_MAJOR_VERSION EQUAL "3" AND (CMAKE_MINOR_VERSION EQUAL "10" OR CMAKE_MINOR_VERSION EQUAL "11")) - # Before CMake 3.12 we can't ise target_link_libraries on an object library to convey the need to use depencies' include directories + # Before CMake 3.12 we can't use target_link_libraries on an object library to convey the need to use depencies' include directories get_target_property(sdsl_INCLUDE sdsl INTERFACE_INCLUDE_DIRECTORIES) target_include_directories(bdsg_objs PUBLIC ${sdsl_INCLUDE}) get_target_property(hopscotch_map_INCLUDE tsl::hopscotch_map INTERFACE_INCLUDE_DIRECTORIES) diff --git a/Makefile b/Makefile index 4436f140..47c5c977 100644 --- a/Makefile +++ b/Makefile @@ -24,6 +24,7 @@ OBJS += $(OBJ_DIR)/path_subgraph_overlay.o OBJS += $(OBJ_DIR)/subgraph_overlay.o OBJS += $(OBJ_DIR)/vectorizable_overlays.o OBJS += $(OBJ_DIR)/packed_subgraph_overlay.o +OBJS += $(OBJ_DIR)/ch.o OBJS += $(OBJ_DIR)/snarl_distance_index.o OBJS += $(OBJ_DIR)/strand_split_overlay.o OBJS += $(OBJ_DIR)/utility.o diff --git a/README.md b/README.md index 83969dec..4c15644d 100644 --- a/README.md +++ b/README.md @@ -93,6 +93,7 @@ The documentation can then be found at `docs/_build/html/index.html`. - [`DYNAMIC`](https://github.com/xxsds/DYNAMIC) - [`BBHash/alltypes`](https://github.com/rizkg/BBHash/tree/alltypes) - [`jansson`](https://github.com/akheron/jansson) +- [`Boost`](https://www.boost.org/). The build process with `make` assumes that these libraries and their headers have been installed in a place on the system where the compiler can find them (e.g. in `CPLUS_INCLUDE_PATH`). diff --git a/bdsg/cmake_bindings/bdsg/snarl_distance_index.cpp b/bdsg/cmake_bindings/bdsg/snarl_distance_index.cpp index 9f3aa143..596fbbd3 100644 --- a/bdsg/cmake_bindings/bdsg/snarl_distance_index.cpp +++ b/bdsg/cmake_bindings/bdsg/snarl_distance_index.cpp @@ -15,679 +15,2170 @@ #include #include -#include #include -#include #include +#include +#include +#include #include #include -#include -#include - +#include #ifndef BINDER_PYBIND11_TYPE_CASTER - #define BINDER_PYBIND11_TYPE_CASTER - PYBIND11_DECLARE_HOLDER_TYPE(T, std::shared_ptr, false) - PYBIND11_DECLARE_HOLDER_TYPE(T, T*, false) - PYBIND11_MAKE_OPAQUE(std::shared_ptr) +#define BINDER_PYBIND11_TYPE_CASTER +PYBIND11_DECLARE_HOLDER_TYPE(T, std::shared_ptr, false) +PYBIND11_DECLARE_HOLDER_TYPE(T, T *, false) +PYBIND11_MAKE_OPAQUE(std::shared_ptr) #endif // bdsg::SnarlDistanceIndex file:bdsg/snarl_distance_index.hpp line:181 struct PyCallBack_bdsg_SnarlDistanceIndex : public bdsg::SnarlDistanceIndex { - using bdsg::SnarlDistanceIndex::SnarlDistanceIndex; + using bdsg::SnarlDistanceIndex::SnarlDistanceIndex; - void dissociate() override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "dissociate"); - if (overload) { - auto o = overload.operator()(); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::dissociate(); - } - void serialize(const class std::function & a0) const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "serialize"); - if (overload) { - auto o = overload.operator()(a0); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::serialize(a0); - } - void serialize(int a0) override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "serialize"); - if (overload) { - auto o = overload.operator()(a0); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::serialize(a0); - } - void deserialize(int a0) override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "deserialize"); - if (overload) { - auto o = overload.operator()(a0); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::deserialize(a0); - } - unsigned int get_magic_number() const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "get_magic_number"); - if (overload) { - auto o = overload.operator()(); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::get_magic_number(); - } - struct handlegraph::net_handle_t get_root() const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "get_root"); - if (overload) { - auto o = overload.operator()(); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::get_root(); - } - bool is_root(const struct handlegraph::net_handle_t & a0) const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "is_root"); - if (overload) { - auto o = overload.operator()(a0); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::is_root(a0); - } - bool is_snarl(const struct handlegraph::net_handle_t & a0) const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "is_snarl"); - if (overload) { - auto o = overload.operator()(a0); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::is_snarl(a0); - } - bool is_chain(const struct handlegraph::net_handle_t & a0) const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "is_chain"); - if (overload) { - auto o = overload.operator()(a0); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::is_chain(a0); - } - bool is_node(const struct handlegraph::net_handle_t & a0) const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "is_node"); - if (overload) { - auto o = overload.operator()(a0); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::is_node(a0); - } - bool is_sentinel(const struct handlegraph::net_handle_t & a0) const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "is_sentinel"); - if (overload) { - auto o = overload.operator()(a0); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::is_sentinel(a0); - } - struct handlegraph::net_handle_t get_net(const struct handlegraph::handle_t & a0, const class handlegraph::HandleGraph * a1) const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "get_net"); - if (overload) { - auto o = overload.operator()(a0, a1); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::get_net(a0, a1); - } - struct handlegraph::handle_t get_handle(const struct handlegraph::net_handle_t & a0, const class handlegraph::HandleGraph * a1) const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "get_handle"); - if (overload) { - auto o = overload.operator()(a0, a1); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::get_handle(a0, a1); - } - struct handlegraph::net_handle_t get_parent(const struct handlegraph::net_handle_t & a0) const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "get_parent"); - if (overload) { - auto o = overload.operator()(a0); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::get_parent(a0); - } - struct handlegraph::net_handle_t get_bound(const struct handlegraph::net_handle_t & a0, bool a1, bool a2) const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "get_bound"); - if (overload) { - auto o = overload.operator()(a0, a1, a2); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::get_bound(a0, a1, a2); - } - struct handlegraph::net_handle_t flip(const struct handlegraph::net_handle_t & a0) const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "flip"); - if (overload) { - auto o = overload.operator()(a0); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::flip(a0); - } - struct handlegraph::net_handle_t canonical(const struct handlegraph::net_handle_t & a0) const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "canonical"); - if (overload) { - auto o = overload.operator()(a0); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::canonical(a0); - } - enum handlegraph::SnarlDecomposition::endpoint_t starts_at(const struct handlegraph::net_handle_t & a0) const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "starts_at"); - if (overload) { - auto o = overload.operator()(a0); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::starts_at(a0); - } - enum handlegraph::SnarlDecomposition::endpoint_t ends_at(const struct handlegraph::net_handle_t & a0) const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "ends_at"); - if (overload) { - auto o = overload.operator()(a0); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::ends_at(a0); - } - bool for_each_child_impl(const struct handlegraph::net_handle_t & a0, const class std::function & a1) const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "for_each_child_impl"); - if (overload) { - auto o = overload.operator()(a0, a1); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::for_each_child_impl(a0, a1); - } - bool for_each_traversal_impl(const struct handlegraph::net_handle_t & a0, const class std::function & a1) const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "for_each_traversal_impl"); - if (overload) { - auto o = overload.operator()(a0, a1); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::for_each_traversal_impl(a0, a1); - } - bool follow_net_edges_impl(const struct handlegraph::net_handle_t & a0, const class handlegraph::HandleGraph * a1, bool a2, const class std::function & a3) const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "follow_net_edges_impl"); - if (overload) { - auto o = overload.operator()(a0, a1, a2, a3); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::follow_net_edges_impl(a0, a1, a2, a3); - } - struct handlegraph::net_handle_t get_parent_traversal(const struct handlegraph::net_handle_t & a0, const struct handlegraph::net_handle_t & a1) const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "get_parent_traversal"); - if (overload) { - auto o = overload.operator()(a0, a1); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDistanceIndex::get_parent_traversal(a0, a1); - } - bool for_each_tippy_child_impl(const struct handlegraph::net_handle_t & a0, const class std::function & a1) const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "for_each_tippy_child_impl"); - if (overload) { - auto o = overload.operator()(a0, a1); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDecomposition::for_each_tippy_child_impl(a0, a1); - } - bool for_each_traversal_start_impl(const struct handlegraph::net_handle_t & a0, const class std::function & a1) const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "for_each_traversal_start_impl"); - if (overload) { - auto o = overload.operator()(a0, a1); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDecomposition::for_each_traversal_start_impl(a0, a1); - } - bool for_each_traversal_end_impl(const struct handlegraph::net_handle_t & a0, const class std::function & a1) const override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "for_each_traversal_end_impl"); - if (overload) { - auto o = overload.operator()(a0, a1); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return SnarlDecomposition::for_each_traversal_end_impl(a0, a1); - } - void serialize(const std::string & a0) override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "serialize"); - if (overload) { - auto o = overload.operator()(a0); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return TriviallySerializable::serialize(a0); - } - void deserialize(const std::string & a0) override { - pybind11::gil_scoped_acquire gil; - pybind11::function overload = pybind11::get_overload(static_cast(this), "deserialize"); - if (overload) { - auto o = overload.operator()(a0); - if (pybind11::detail::cast_is_temporary_value_reference::value) { - static pybind11::detail::override_caster_t caster; - return pybind11::detail::cast_ref(std::move(o), caster); - } - return pybind11::detail::cast_safe(std::move(o)); - } - return TriviallySerializable::deserialize(a0); - } + void dissociate() override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), "dissociate"); + if (overload) { + auto o = overload.operator()(); + if (pybind11::detail::cast_is_temporary_value_reference::value) { + static pybind11::detail::override_caster_t caster; + return pybind11::detail::cast_ref(std::move(o), caster); + } + return pybind11::detail::cast_safe(std::move(o)); + } + return SnarlDistanceIndex::dissociate(); + } + void serialize(const class std::function + &a0) const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), "serialize"); + if (overload) { + auto o = + overload.operator()(a0); + if (pybind11::detail::cast_is_temporary_value_reference::value) { + static pybind11::detail::override_caster_t caster; + return pybind11::detail::cast_ref(std::move(o), caster); + } + return pybind11::detail::cast_safe(std::move(o)); + } + return SnarlDistanceIndex::serialize(a0); + } + void serialize(int a0) override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), "serialize"); + if (overload) { + auto o = + overload.operator()(a0); + if (pybind11::detail::cast_is_temporary_value_reference::value) { + static pybind11::detail::override_caster_t caster; + return pybind11::detail::cast_ref(std::move(o), caster); + } + return pybind11::detail::cast_safe(std::move(o)); + } + return SnarlDistanceIndex::serialize(a0); + } + void deserialize(int a0) override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), "deserialize"); + if (overload) { + auto o = + overload.operator()(a0); + if (pybind11::detail::cast_is_temporary_value_reference::value) { + static pybind11::detail::override_caster_t caster; + return pybind11::detail::cast_ref(std::move(o), caster); + } + return pybind11::detail::cast_safe(std::move(o)); + } + return SnarlDistanceIndex::deserialize(a0); + } + unsigned int get_magic_number() const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), + "get_magic_number"); + if (overload) { + auto o = overload.operator()(); + if (pybind11::detail::cast_is_temporary_value_reference< + unsigned int>::value) { + static pybind11::detail::override_caster_t caster; + return pybind11::detail::cast_ref(std::move(o), caster); + } + return pybind11::detail::cast_safe(std::move(o)); + } + return SnarlDistanceIndex::get_magic_number(); + } + struct handlegraph::net_handle_t get_root() const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), "get_root"); + if (overload) { + auto o = overload.operator()(); + if (pybind11::detail::cast_is_temporary_value_reference< + struct handlegraph::net_handle_t>::value) { + static pybind11::detail::override_caster_t< + struct handlegraph::net_handle_t> + caster; + return pybind11::detail::cast_ref( + std::move(o), caster); + } + return pybind11::detail::cast_safe( + std::move(o)); + } + return SnarlDistanceIndex::get_root(); + } + bool is_root(const struct handlegraph::net_handle_t &a0) const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), "is_root"); + if (overload) { + auto o = + overload.operator()(a0); + if (pybind11::detail::cast_is_temporary_value_reference::value) { + static pybind11::detail::override_caster_t caster; + return pybind11::detail::cast_ref(std::move(o), caster); + } + return pybind11::detail::cast_safe(std::move(o)); + } + return SnarlDistanceIndex::is_root(a0); + } + bool is_snarl(const struct handlegraph::net_handle_t &a0) const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), "is_snarl"); + if (overload) { + auto o = + overload.operator()(a0); + if (pybind11::detail::cast_is_temporary_value_reference::value) { + static pybind11::detail::override_caster_t caster; + return pybind11::detail::cast_ref(std::move(o), caster); + } + return pybind11::detail::cast_safe(std::move(o)); + } + return SnarlDistanceIndex::is_snarl(a0); + } + bool is_chain(const struct handlegraph::net_handle_t &a0) const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), "is_chain"); + if (overload) { + auto o = + overload.operator()(a0); + if (pybind11::detail::cast_is_temporary_value_reference::value) { + static pybind11::detail::override_caster_t caster; + return pybind11::detail::cast_ref(std::move(o), caster); + } + return pybind11::detail::cast_safe(std::move(o)); + } + return SnarlDistanceIndex::is_chain(a0); + } + bool is_node(const struct handlegraph::net_handle_t &a0) const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), "is_node"); + if (overload) { + auto o = + overload.operator()(a0); + if (pybind11::detail::cast_is_temporary_value_reference::value) { + static pybind11::detail::override_caster_t caster; + return pybind11::detail::cast_ref(std::move(o), caster); + } + return pybind11::detail::cast_safe(std::move(o)); + } + return SnarlDistanceIndex::is_node(a0); + } + bool is_sentinel(const struct handlegraph::net_handle_t &a0) const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), "is_sentinel"); + if (overload) { + auto o = + overload.operator()(a0); + if (pybind11::detail::cast_is_temporary_value_reference::value) { + static pybind11::detail::override_caster_t caster; + return pybind11::detail::cast_ref(std::move(o), caster); + } + return pybind11::detail::cast_safe(std::move(o)); + } + return SnarlDistanceIndex::is_sentinel(a0); + } + struct handlegraph::net_handle_t + get_net(const struct handlegraph::handle_t &a0, + const class handlegraph::HandleGraph *a1) const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), "get_net"); + if (overload) { + auto o = + overload.operator()(a0, a1); + if (pybind11::detail::cast_is_temporary_value_reference< + struct handlegraph::net_handle_t>::value) { + static pybind11::detail::override_caster_t< + struct handlegraph::net_handle_t> + caster; + return pybind11::detail::cast_ref( + std::move(o), caster); + } + return pybind11::detail::cast_safe( + std::move(o)); + } + return SnarlDistanceIndex::get_net(a0, a1); + } + struct handlegraph::handle_t + get_handle(const struct handlegraph::net_handle_t &a0, + const class handlegraph::HandleGraph *a1) const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), "get_handle"); + if (overload) { + auto o = + overload.operator()(a0, a1); + if (pybind11::detail::cast_is_temporary_value_reference< + struct handlegraph::handle_t>::value) { + static pybind11::detail::override_caster_t + caster; + return pybind11::detail::cast_ref( + std::move(o), caster); + } + return pybind11::detail::cast_safe( + std::move(o)); + } + return SnarlDistanceIndex::get_handle(a0, a1); + } + struct handlegraph::net_handle_t + get_parent(const struct handlegraph::net_handle_t &a0) const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), "get_parent"); + if (overload) { + auto o = + overload.operator()(a0); + if (pybind11::detail::cast_is_temporary_value_reference< + struct handlegraph::net_handle_t>::value) { + static pybind11::detail::override_caster_t< + struct handlegraph::net_handle_t> + caster; + return pybind11::detail::cast_ref( + std::move(o), caster); + } + return pybind11::detail::cast_safe( + std::move(o)); + } + return SnarlDistanceIndex::get_parent(a0); + } + struct handlegraph::net_handle_t + get_bound(const struct handlegraph::net_handle_t &a0, bool a1, + bool a2) const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), "get_bound"); + if (overload) { + auto o = overload.operator()( + a0, a1, a2); + if (pybind11::detail::cast_is_temporary_value_reference< + struct handlegraph::net_handle_t>::value) { + static pybind11::detail::override_caster_t< + struct handlegraph::net_handle_t> + caster; + return pybind11::detail::cast_ref( + std::move(o), caster); + } + return pybind11::detail::cast_safe( + std::move(o)); + } + return SnarlDistanceIndex::get_bound(a0, a1, a2); + } + struct handlegraph::net_handle_t + flip(const struct handlegraph::net_handle_t &a0) const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), "flip"); + if (overload) { + auto o = + overload.operator()(a0); + if (pybind11::detail::cast_is_temporary_value_reference< + struct handlegraph::net_handle_t>::value) { + static pybind11::detail::override_caster_t< + struct handlegraph::net_handle_t> + caster; + return pybind11::detail::cast_ref( + std::move(o), caster); + } + return pybind11::detail::cast_safe( + std::move(o)); + } + return SnarlDistanceIndex::flip(a0); + } + struct handlegraph::net_handle_t + canonical(const struct handlegraph::net_handle_t &a0) const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), "canonical"); + if (overload) { + auto o = + overload.operator()(a0); + if (pybind11::detail::cast_is_temporary_value_reference< + struct handlegraph::net_handle_t>::value) { + static pybind11::detail::override_caster_t< + struct handlegraph::net_handle_t> + caster; + return pybind11::detail::cast_ref( + std::move(o), caster); + } + return pybind11::detail::cast_safe( + std::move(o)); + } + return SnarlDistanceIndex::canonical(a0); + } + enum handlegraph::SnarlDecomposition::endpoint_t + starts_at(const struct handlegraph::net_handle_t &a0) const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), "starts_at"); + if (overload) { + auto o = + overload.operator()(a0); + if (pybind11::detail::cast_is_temporary_value_reference< + enum handlegraph::SnarlDecomposition::endpoint_t>::value) { + static pybind11::detail::override_caster_t< + enum handlegraph::SnarlDecomposition::endpoint_t> + caster; + return pybind11::detail::cast_ref< + enum handlegraph::SnarlDecomposition::endpoint_t>(std::move(o), + caster); + } + return pybind11::detail::cast_safe< + enum handlegraph::SnarlDecomposition::endpoint_t>(std::move(o)); + } + return SnarlDistanceIndex::starts_at(a0); + } + enum handlegraph::SnarlDecomposition::endpoint_t + ends_at(const struct handlegraph::net_handle_t &a0) const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), "ends_at"); + if (overload) { + auto o = + overload.operator()(a0); + if (pybind11::detail::cast_is_temporary_value_reference< + enum handlegraph::SnarlDecomposition::endpoint_t>::value) { + static pybind11::detail::override_caster_t< + enum handlegraph::SnarlDecomposition::endpoint_t> + caster; + return pybind11::detail::cast_ref< + enum handlegraph::SnarlDecomposition::endpoint_t>(std::move(o), + caster); + } + return pybind11::detail::cast_safe< + enum handlegraph::SnarlDecomposition::endpoint_t>(std::move(o)); + } + return SnarlDistanceIndex::ends_at(a0); + } + bool for_each_child_impl( + const struct handlegraph::net_handle_t &a0, + const class std::function + &a1) const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), + "for_each_child_impl"); + if (overload) { + auto o = + overload.operator()(a0, a1); + if (pybind11::detail::cast_is_temporary_value_reference::value) { + static pybind11::detail::override_caster_t caster; + return pybind11::detail::cast_ref(std::move(o), caster); + } + return pybind11::detail::cast_safe(std::move(o)); + } + return SnarlDistanceIndex::for_each_child_impl(a0, a1); + } + bool for_each_traversal_impl( + const struct handlegraph::net_handle_t &a0, + const class std::function + &a1) const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), + "for_each_traversal_impl"); + if (overload) { + auto o = + overload.operator()(a0, a1); + if (pybind11::detail::cast_is_temporary_value_reference::value) { + static pybind11::detail::override_caster_t caster; + return pybind11::detail::cast_ref(std::move(o), caster); + } + return pybind11::detail::cast_safe(std::move(o)); + } + return SnarlDistanceIndex::for_each_traversal_impl(a0, a1); + } + bool follow_net_edges_impl( + const struct handlegraph::net_handle_t &a0, + const class handlegraph::HandleGraph *a1, bool a2, + const class std::function + &a3) const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), + "follow_net_edges_impl"); + if (overload) { + auto o = overload.operator()( + a0, a1, a2, a3); + if (pybind11::detail::cast_is_temporary_value_reference::value) { + static pybind11::detail::override_caster_t caster; + return pybind11::detail::cast_ref(std::move(o), caster); + } + return pybind11::detail::cast_safe(std::move(o)); + } + return SnarlDistanceIndex::follow_net_edges_impl(a0, a1, a2, a3); + } + struct handlegraph::net_handle_t get_parent_traversal( + const struct handlegraph::net_handle_t &a0, + const struct handlegraph::net_handle_t &a1) const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), + "get_parent_traversal"); + if (overload) { + auto o = + overload.operator()(a0, a1); + if (pybind11::detail::cast_is_temporary_value_reference< + struct handlegraph::net_handle_t>::value) { + static pybind11::detail::override_caster_t< + struct handlegraph::net_handle_t> + caster; + return pybind11::detail::cast_ref( + std::move(o), caster); + } + return pybind11::detail::cast_safe( + std::move(o)); + } + return SnarlDistanceIndex::get_parent_traversal(a0, a1); + } + bool for_each_tippy_child_impl( + const struct handlegraph::net_handle_t &a0, + const class std::function + &a1) const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), + "for_each_tippy_child_impl"); + if (overload) { + auto o = + overload.operator()(a0, a1); + if (pybind11::detail::cast_is_temporary_value_reference::value) { + static pybind11::detail::override_caster_t caster; + return pybind11::detail::cast_ref(std::move(o), caster); + } + return pybind11::detail::cast_safe(std::move(o)); + } + return SnarlDecomposition::for_each_tippy_child_impl(a0, a1); + } + bool for_each_traversal_start_impl( + const struct handlegraph::net_handle_t &a0, + const class std::function + &a1) const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), + "for_each_traversal_start_impl"); + if (overload) { + auto o = + overload.operator()(a0, a1); + if (pybind11::detail::cast_is_temporary_value_reference::value) { + static pybind11::detail::override_caster_t caster; + return pybind11::detail::cast_ref(std::move(o), caster); + } + return pybind11::detail::cast_safe(std::move(o)); + } + return SnarlDecomposition::for_each_traversal_start_impl(a0, a1); + } + bool for_each_traversal_end_impl( + const struct handlegraph::net_handle_t &a0, + const class std::function + &a1) const override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), + "for_each_traversal_end_impl"); + if (overload) { + auto o = + overload.operator()(a0, a1); + if (pybind11::detail::cast_is_temporary_value_reference::value) { + static pybind11::detail::override_caster_t caster; + return pybind11::detail::cast_ref(std::move(o), caster); + } + return pybind11::detail::cast_safe(std::move(o)); + } + return SnarlDecomposition::for_each_traversal_end_impl(a0, a1); + } + void serialize(const std::string &a0) override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), "serialize"); + if (overload) { + auto o = + overload.operator()(a0); + if (pybind11::detail::cast_is_temporary_value_reference::value) { + static pybind11::detail::override_caster_t caster; + return pybind11::detail::cast_ref(std::move(o), caster); + } + return pybind11::detail::cast_safe(std::move(o)); + } + return TriviallySerializable::serialize(a0); + } + void deserialize(const std::string &a0) override { + pybind11::gil_scoped_acquire gil; + pybind11::function overload = pybind11::get_overload( + static_cast(this), "deserialize"); + if (overload) { + auto o = + overload.operator()(a0); + if (pybind11::detail::cast_is_temporary_value_reference::value) { + static pybind11::detail::override_caster_t caster; + return pybind11::detail::cast_ref(std::move(o), caster); + } + return pybind11::detail::cast_safe(std::move(o)); + } + return TriviallySerializable::deserialize(a0); + } }; -void bind_bdsg_snarl_distance_index(std::function< pybind11::module &(std::string const &namespace_) > &M) -{ - { // bdsg::SnarlDistanceIndex file:bdsg/snarl_distance_index.hpp line:181 - pybind11::class_, PyCallBack_bdsg_SnarlDistanceIndex, handlegraph::SnarlDecomposition, handlegraph::TriviallySerializable> cl(M("bdsg"), "SnarlDistanceIndex", "The distance index, which also acts as a snarl decomposition.\n\n The distance index provides an interface to traverse the snarl tree and to\n find minimum distances between two sibling nodes in the snarl tree (eg\n between two chains that are children of the same snarl).\n\n It also provides a method for quickly calculating the minimum distance\n between two positions on the graph.\n\n The implementation here is tightly coupled with the filling-in code in vg\n (see vg::fill_in_distance_index()). To make a SnarlDistanceIndex that\n actually works, you have to construct the object, and then call\n get_snarl_tree_records() with zero or more TemporaryDistanceIndex objects\n for connected components, and a graph.\n\n The TemporaryDistanceIndex needs to have a variety of TemporaryRecord\n implementation classes (TemporaryChainRecord, TemporarySnarlRecord,\n TemporaryNodeRecord) set up and added to it; this all has to be done \"by\n hand\", as it were, because no code is in this library to help you do it.\n\n "); - cl.def( pybind11::init( [](){ return new bdsg::SnarlDistanceIndex(); }, [](){ return new PyCallBack_bdsg_SnarlDistanceIndex(); } ) ); - - pybind11::enum_(cl, "connectivity_t", pybind11::arithmetic(), "The connectivity of a net_handle- this defines the direction that the net_handle is traversed") - .value("START_START", bdsg::SnarlDistanceIndex::START_START) - .value("START_END", bdsg::SnarlDistanceIndex::START_END) - .value("START_TIP", bdsg::SnarlDistanceIndex::START_TIP) - .value("END_START", bdsg::SnarlDistanceIndex::END_START) - .value("END_END", bdsg::SnarlDistanceIndex::END_END) - .value("END_TIP", bdsg::SnarlDistanceIndex::END_TIP) - .value("TIP_START", bdsg::SnarlDistanceIndex::TIP_START) - .value("TIP_END", bdsg::SnarlDistanceIndex::TIP_END) - .value("TIP_TIP", bdsg::SnarlDistanceIndex::TIP_TIP) - .export_values(); - - - pybind11::enum_(cl, "net_handle_record_t", pybind11::arithmetic(), "Type of a net_handle_t, which may not be the type of the record\nThis is to allow a node record to be seen as a chain from the perspective of a handle") - .value("ROOT_HANDLE", bdsg::SnarlDistanceIndex::ROOT_HANDLE) - .value("NODE_HANDLE", bdsg::SnarlDistanceIndex::NODE_HANDLE) - .value("SNARL_HANDLE", bdsg::SnarlDistanceIndex::SNARL_HANDLE) - .value("CHAIN_HANDLE", bdsg::SnarlDistanceIndex::CHAIN_HANDLE) - .value("SENTINEL_HANDLE", bdsg::SnarlDistanceIndex::SENTINEL_HANDLE) - .export_values(); - - - pybind11::enum_(cl, "record_t", pybind11::arithmetic(), "A record_t is the type of structure that a record can be.\n The actual distance index is stored as a series of \"records\" for each snarl/node/chain. \n The record type defines what is stored in a record\n\nNODE, SNARL, and CHAIN indicate that they don't store distances.\nSIMPLE_SNARL is a snarl with all children connecting only to the boundary nodes in one direction (ie, a bubble).\nTRIVIAL_SNARL represents consecutive nodes in a chain. \nNODE represents a node that is a trivial chain. A node can only be the child of a snarl.\nOVERSIZED_SNARL only stores distances to the boundaries.\nROOT_SNARL represents a connected component of the root. It has no start or end node so \n its children technically belong to the root.\nMULTICOMPONENT_CHAIN can represent a chain with snarls that are not start-end connected.\n The chain is split up into components between these snarls, each node is tagged with\n which component it belongs to.") - .value("ROOT", bdsg::SnarlDistanceIndex::ROOT) - .value("NODE", bdsg::SnarlDistanceIndex::NODE) - .value("DISTANCED_NODE", bdsg::SnarlDistanceIndex::DISTANCED_NODE) - .value("TRIVIAL_SNARL", bdsg::SnarlDistanceIndex::TRIVIAL_SNARL) - .value("DISTANCED_TRIVIAL_SNARL", bdsg::SnarlDistanceIndex::DISTANCED_TRIVIAL_SNARL) - .value("SIMPLE_SNARL", bdsg::SnarlDistanceIndex::SIMPLE_SNARL) - .value("DISTANCED_SIMPLE_SNARL", bdsg::SnarlDistanceIndex::DISTANCED_SIMPLE_SNARL) - .value("SNARL", bdsg::SnarlDistanceIndex::SNARL) - .value("DISTANCED_SNARL", bdsg::SnarlDistanceIndex::DISTANCED_SNARL) - .value("OVERSIZED_SNARL", bdsg::SnarlDistanceIndex::OVERSIZED_SNARL) - .value("ROOT_SNARL", bdsg::SnarlDistanceIndex::ROOT_SNARL) - .value("DISTANCED_ROOT_SNARL", bdsg::SnarlDistanceIndex::DISTANCED_ROOT_SNARL) - .value("CHAIN", bdsg::SnarlDistanceIndex::CHAIN) - .value("DISTANCED_CHAIN", bdsg::SnarlDistanceIndex::DISTANCED_CHAIN) - .value("MULTICOMPONENT_CHAIN", bdsg::SnarlDistanceIndex::MULTICOMPONENT_CHAIN) - .value("CHILDREN", bdsg::SnarlDistanceIndex::CHILDREN) - .export_values(); +void bind_bdsg_snarl_distance_index( + std::function &M) { + { // bdsg::SnarlDistanceIndex file:bdsg/snarl_distance_index.hpp line:181 + pybind11::class_< + bdsg::SnarlDistanceIndex, std::shared_ptr, + PyCallBack_bdsg_SnarlDistanceIndex, handlegraph::SnarlDecomposition, + handlegraph::TriviallySerializable> + cl(M("bdsg"), "SnarlDistanceIndex", + "The distance index, which also acts as a snarl decomposition.\n\n " + "The distance index provides an interface to traverse the snarl " + "tree and to\n find minimum distances between two sibling nodes in " + "the snarl tree (eg\n between two chains that are children of the " + "same snarl).\n\n It also provides a method for quickly calculating " + "the minimum distance\n between two positions on the graph.\n\n The " + "implementation here is tightly coupled with the filling-in code in " + "vg\n (see vg::fill_in_distance_index()). To make a " + "SnarlDistanceIndex that\n actually works, you have to construct " + "the object, and then call\n get_snarl_tree_records() with zero or " + "more TemporaryDistanceIndex objects\n for connected components, " + "and a graph.\n\n The TemporaryDistanceIndex needs to have a " + "variety of TemporaryRecord\n implementation classes " + "(TemporaryChainRecord, TemporarySnarlRecord,\n " + "TemporaryNodeRecord) set up and added to it; this all has to be " + "done \"by\n hand\", as it were, because no code is in this library " + "to help you do it.\n\n "); + cl.def(pybind11::init( + []() { return new bdsg::SnarlDistanceIndex(); }, + []() { return new PyCallBack_bdsg_SnarlDistanceIndex(); })); + pybind11::enum_( + cl, "connectivity_t", pybind11::arithmetic(), + "The connectivity of a net_handle- this defines the direction that the " + "net_handle is traversed") + .value("START_START", bdsg::SnarlDistanceIndex::START_START) + .value("START_END", bdsg::SnarlDistanceIndex::START_END) + .value("START_TIP", bdsg::SnarlDistanceIndex::START_TIP) + .value("END_START", bdsg::SnarlDistanceIndex::END_START) + .value("END_END", bdsg::SnarlDistanceIndex::END_END) + .value("END_TIP", bdsg::SnarlDistanceIndex::END_TIP) + .value("TIP_START", bdsg::SnarlDistanceIndex::TIP_START) + .value("TIP_END", bdsg::SnarlDistanceIndex::TIP_END) + .value("TIP_TIP", bdsg::SnarlDistanceIndex::TIP_TIP) + .export_values(); - pybind11::enum_(cl, "temp_record_t", pybind11::arithmetic(), "") - .value("TEMP_CHAIN", bdsg::SnarlDistanceIndex::TEMP_CHAIN) - .value("TEMP_SNARL", bdsg::SnarlDistanceIndex::TEMP_SNARL) - .value("TEMP_NODE", bdsg::SnarlDistanceIndex::TEMP_NODE) - .value("TEMP_ROOT", bdsg::SnarlDistanceIndex::TEMP_ROOT) - .export_values(); + pybind11::enum_( + cl, "net_handle_record_t", pybind11::arithmetic(), + "Type of a net_handle_t, which may not be the type of the record\nThis " + "is to allow a node record to be seen as a chain from the perspective " + "of a handle") + .value("ROOT_HANDLE", bdsg::SnarlDistanceIndex::ROOT_HANDLE) + .value("NODE_HANDLE", bdsg::SnarlDistanceIndex::NODE_HANDLE) + .value("SNARL_HANDLE", bdsg::SnarlDistanceIndex::SNARL_HANDLE) + .value("CHAIN_HANDLE", bdsg::SnarlDistanceIndex::CHAIN_HANDLE) + .value("SENTINEL_HANDLE", bdsg::SnarlDistanceIndex::SENTINEL_HANDLE) + .export_values(); - cl.def("serialize", [](bdsg::SnarlDistanceIndex &o, const std::string & a0) -> void { return o.serialize(a0); }, "", pybind11::arg("filename")); - cl.def("deserialize", [](bdsg::SnarlDistanceIndex &o, const std::string & a0) -> void { return o.deserialize(a0); }, "", pybind11::arg("filename")); - cl.def("dissociate", (void (bdsg::SnarlDistanceIndex::*)()) &bdsg::SnarlDistanceIndex::dissociate, "C++: bdsg::SnarlDistanceIndex::dissociate() --> void"); - cl.def("serialize", (void (bdsg::SnarlDistanceIndex::*)(const class std::function &) const) &bdsg::SnarlDistanceIndex::serialize, "C++: bdsg::SnarlDistanceIndex::serialize(const class std::function &) const --> void", pybind11::arg("iteratee")); - cl.def("serialize", (void (bdsg::SnarlDistanceIndex::*)(int)) &bdsg::SnarlDistanceIndex::serialize, "C++: bdsg::SnarlDistanceIndex::serialize(int) --> void", pybind11::arg("fd")); - cl.def("deserialize", (void (bdsg::SnarlDistanceIndex::*)(int)) &bdsg::SnarlDistanceIndex::deserialize, "C++: bdsg::SnarlDistanceIndex::deserialize(int) --> void", pybind11::arg("fd")); - cl.def("get_magic_number", (unsigned int (bdsg::SnarlDistanceIndex::*)() const) &bdsg::SnarlDistanceIndex::get_magic_number, "C++: bdsg::SnarlDistanceIndex::get_magic_number() const --> unsigned int"); - cl.def("get_prefix", (std::string (bdsg::SnarlDistanceIndex::*)() const) &bdsg::SnarlDistanceIndex::get_prefix, "C++: bdsg::SnarlDistanceIndex::get_prefix() const --> std::string"); - cl.def("preload", [](bdsg::SnarlDistanceIndex const &o) -> void { return o.preload(); }, ""); - cl.def("preload", (void (bdsg::SnarlDistanceIndex::*)(bool) const) &bdsg::SnarlDistanceIndex::preload, "Allow for preloading the index for more accurate timing of algorithms\n that use it, if it fits in memory. If blocking is true, waits for the\n index to be paged in. Otherwise, just tells the OS that we will want to\n use it.\n\nC++: bdsg::SnarlDistanceIndex::preload(bool) const --> void", pybind11::arg("blocking")); - cl.def("maximum_distance", [](bdsg::SnarlDistanceIndex const &o, const long long & a0, const bool & a1, const unsigned long & a2, const long long & a3, const bool & a4, const unsigned long & a5) -> unsigned long { return o.maximum_distance(a0, a1, a2, a3, a4, a5); }, "", pybind11::arg("id1"), pybind11::arg("rev1"), pybind11::arg("offset1"), pybind11::arg("id2"), pybind11::arg("rev2"), pybind11::arg("offset2")); - cl.def("maximum_distance", [](bdsg::SnarlDistanceIndex const &o, const long long & a0, const bool & a1, const unsigned long & a2, const long long & a3, const bool & a4, const unsigned long & a5, bool const & a6) -> unsigned long { return o.maximum_distance(a0, a1, a2, a3, a4, a5, a6); }, "", pybind11::arg("id1"), pybind11::arg("rev1"), pybind11::arg("offset1"), pybind11::arg("id2"), pybind11::arg("rev2"), pybind11::arg("offset2"), pybind11::arg("unoriented_distance")); - cl.def("maximum_distance", (unsigned long (bdsg::SnarlDistanceIndex::*)(const long long, const bool, const unsigned long, const long long, const bool, const unsigned long, bool, const class handlegraph::HandleGraph *) const) &bdsg::SnarlDistanceIndex::maximum_distance, "Find an approximation of the maximum distance between two positions. \nThis isn't a true maximum- the only guarantee is that it's greater than or equal to the minimum distance.\n\nC++: bdsg::SnarlDistanceIndex::maximum_distance(const long long, const bool, const unsigned long, const long long, const bool, const unsigned long, bool, const class handlegraph::HandleGraph *) const --> unsigned long", pybind11::arg("id1"), pybind11::arg("rev1"), pybind11::arg("offset1"), pybind11::arg("id2"), pybind11::arg("rev2"), pybind11::arg("offset2"), pybind11::arg("unoriented_distance"), pybind11::arg("graph")); - cl.def("distance_in_parent", [](bdsg::SnarlDistanceIndex const &o, const struct handlegraph::net_handle_t & a0, const struct handlegraph::net_handle_t & a1, const struct handlegraph::net_handle_t & a2) -> unsigned long { return o.distance_in_parent(a0, a1, a2); }, "", pybind11::arg("parent"), pybind11::arg("child1"), pybind11::arg("child2")); - cl.def("distance_in_parent", [](bdsg::SnarlDistanceIndex const &o, const struct handlegraph::net_handle_t & a0, const struct handlegraph::net_handle_t & a1, const struct handlegraph::net_handle_t & a2, const class handlegraph::HandleGraph * a3) -> unsigned long { return o.distance_in_parent(a0, a1, a2, a3); }, "", pybind11::arg("parent"), pybind11::arg("child1"), pybind11::arg("child2"), pybind11::arg("graph")); - cl.def("distance_in_parent", (unsigned long (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &, const struct handlegraph::net_handle_t &, const struct handlegraph::net_handle_t &, const class handlegraph::HandleGraph *, unsigned long) const) &bdsg::SnarlDistanceIndex::distance_in_parent, "C++: bdsg::SnarlDistanceIndex::distance_in_parent(const struct handlegraph::net_handle_t &, const struct handlegraph::net_handle_t &, const struct handlegraph::net_handle_t &, const class handlegraph::HandleGraph *, unsigned long) const --> unsigned long", pybind11::arg("parent"), pybind11::arg("child1"), pybind11::arg("child2"), pybind11::arg("graph"), pybind11::arg("distance_limit")); - cl.def("distance_in_snarl", [](bdsg::SnarlDistanceIndex const &o, const struct handlegraph::net_handle_t & a0, const unsigned long & a1, const bool & a2, const unsigned long & a3, const bool & a4) -> unsigned long { return o.distance_in_snarl(a0, a1, a2, a3, a4); }, "", pybind11::arg("parent"), pybind11::arg("rank1"), pybind11::arg("right_side1"), pybind11::arg("rank2"), pybind11::arg("right_side2")); - cl.def("distance_in_snarl", [](bdsg::SnarlDistanceIndex const &o, const struct handlegraph::net_handle_t & a0, const unsigned long & a1, const bool & a2, const unsigned long & a3, const bool & a4, const class handlegraph::HandleGraph * a5) -> unsigned long { return o.distance_in_snarl(a0, a1, a2, a3, a4, a5); }, "", pybind11::arg("parent"), pybind11::arg("rank1"), pybind11::arg("right_side1"), pybind11::arg("rank2"), pybind11::arg("right_side2"), pybind11::arg("graph")); - cl.def("distance_in_snarl", (unsigned long (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &, const unsigned long &, const bool &, const unsigned long &, const bool &, const class handlegraph::HandleGraph *, unsigned long) const) &bdsg::SnarlDistanceIndex::distance_in_snarl, "C++: bdsg::SnarlDistanceIndex::distance_in_snarl(const struct handlegraph::net_handle_t &, const unsigned long &, const bool &, const unsigned long &, const bool &, const class handlegraph::HandleGraph *, unsigned long) const --> unsigned long", pybind11::arg("parent"), pybind11::arg("rank1"), pybind11::arg("right_side1"), pybind11::arg("rank2"), pybind11::arg("right_side2"), pybind11::arg("graph"), pybind11::arg("distance_limit")); - cl.def("max_distance_in_parent", [](bdsg::SnarlDistanceIndex const &o, const struct handlegraph::net_handle_t & a0, const struct handlegraph::net_handle_t & a1, const struct handlegraph::net_handle_t & a2) -> unsigned long { return o.max_distance_in_parent(a0, a1, a2); }, "", pybind11::arg("parent"), pybind11::arg("child1"), pybind11::arg("child2")); - cl.def("max_distance_in_parent", [](bdsg::SnarlDistanceIndex const &o, const struct handlegraph::net_handle_t & a0, const struct handlegraph::net_handle_t & a1, const struct handlegraph::net_handle_t & a2, const class handlegraph::HandleGraph * a3) -> unsigned long { return o.max_distance_in_parent(a0, a1, a2, a3); }, "", pybind11::arg("parent"), pybind11::arg("child1"), pybind11::arg("child2"), pybind11::arg("graph")); - cl.def("max_distance_in_parent", (unsigned long (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &, const struct handlegraph::net_handle_t &, const struct handlegraph::net_handle_t &, const class handlegraph::HandleGraph *, unsigned long) const) &bdsg::SnarlDistanceIndex::max_distance_in_parent, "Find the maximum distance between two children in the parent. \nThis is the same as distance_in_parent for everything except children of chains\n\nC++: bdsg::SnarlDistanceIndex::max_distance_in_parent(const struct handlegraph::net_handle_t &, const struct handlegraph::net_handle_t &, const struct handlegraph::net_handle_t &, const class handlegraph::HandleGraph *, unsigned long) const --> unsigned long", pybind11::arg("parent"), pybind11::arg("child1"), pybind11::arg("child2"), pybind11::arg("graph"), pybind11::arg("distance_limit")); - cl.def("distance_to_parent_bound", [](bdsg::SnarlDistanceIndex const &o, const struct handlegraph::net_handle_t & a0, bool const & a1, struct handlegraph::net_handle_t const & a2) -> unsigned long { return o.distance_to_parent_bound(a0, a1, a2); }, "", pybind11::arg("parent"), pybind11::arg("to_start"), pybind11::arg("child")); - cl.def("distance_to_parent_bound", (unsigned long (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &, bool, struct handlegraph::net_handle_t, class std::tuple) const) &bdsg::SnarlDistanceIndex::distance_to_parent_bound, "Get the distance from the child to the start or end bound of the parent.\nparent_and_child_types are hints to figure out the type of snarl/chain records the parent and child are.\ntuple of parent record type, parent handle type, child record type, child handle type.\nThis is really just used to see if the parent and child are trivial chains, so it might not be exactly what the actual record is.\n\nC++: bdsg::SnarlDistanceIndex::distance_to_parent_bound(const struct handlegraph::net_handle_t &, bool, struct handlegraph::net_handle_t, class std::tuple) const --> unsigned long", pybind11::arg("parent"), pybind11::arg("to_start"), pybind11::arg("child"), pybind11::arg("parent_and_child_types")); - cl.def("into_which_snarl", (class std::tuple (bdsg::SnarlDistanceIndex::*)(const long long &, const bool &) const) &bdsg::SnarlDistanceIndex::into_which_snarl, "If this node id and orientation is pointing into a snarl, then return the start.\nnode id and orientation pointing into the snarl, and if the snarl is trivial.\nReturns <0, false, false> if it doesn't point into a snarl.\n\nC++: bdsg::SnarlDistanceIndex::into_which_snarl(const long long &, const bool &) const --> class std::tuple", pybind11::arg("id"), pybind11::arg("reverse")); - cl.def("is_ordered_in_chain", (bool (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &, const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::is_ordered_in_chain, "Return true if child1 comes before child2 in the chain. \n\nC++: bdsg::SnarlDistanceIndex::is_ordered_in_chain(const struct handlegraph::net_handle_t &, const struct handlegraph::net_handle_t &) const --> bool", pybind11::arg("child1"), pybind11::arg("child2")); - cl.def("is_externally_start_end_connected", (bool (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t) const) &bdsg::SnarlDistanceIndex::is_externally_start_end_connected, "C++: bdsg::SnarlDistanceIndex::is_externally_start_end_connected(const struct handlegraph::net_handle_t) const --> bool", pybind11::arg("net")); - cl.def("is_externally_start_start_connected", (bool (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t) const) &bdsg::SnarlDistanceIndex::is_externally_start_start_connected, "C++: bdsg::SnarlDistanceIndex::is_externally_start_start_connected(const struct handlegraph::net_handle_t) const --> bool", pybind11::arg("net")); - cl.def("is_externally_end_end_connected", (bool (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t) const) &bdsg::SnarlDistanceIndex::is_externally_end_end_connected, "C++: bdsg::SnarlDistanceIndex::is_externally_end_end_connected(const struct handlegraph::net_handle_t) const --> bool", pybind11::arg("net")); - cl.def("lowest_common_ancestor", (struct std::pair (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &, const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::lowest_common_ancestor, "For two net handles, get a net handle lowest common ancestor.\nIf the lowest common ancestor is the root, then the two handles may be in\ndifferent connected components. In this case, return false.\n\nC++: bdsg::SnarlDistanceIndex::lowest_common_ancestor(const struct handlegraph::net_handle_t &, const struct handlegraph::net_handle_t &) const --> struct std::pair", pybind11::arg("net1"), pybind11::arg("net2")); - cl.def("node_length", (unsigned long (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::node_length, "Return the length of the net, which must represent a node (or sentinel of a snarl)\n\nC++: bdsg::SnarlDistanceIndex::node_length(const struct handlegraph::net_handle_t &) const --> unsigned long", pybind11::arg("net")); - cl.def("minimum_length", (unsigned long (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::minimum_length, "This is also the length of a net, but it can also be a snarl or chain. \nThe length of a chain includes the boundary nodes, a snarl does not.\nA looping chain only includes the start/end node once\n\nC++: bdsg::SnarlDistanceIndex::minimum_length(const struct handlegraph::net_handle_t &) const --> unsigned long", pybind11::arg("net")); - cl.def("maximum_length", (unsigned long (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::maximum_length, "C++: bdsg::SnarlDistanceIndex::maximum_length(const struct handlegraph::net_handle_t &) const --> unsigned long", pybind11::arg("net")); - cl.def("chain_minimum_length", (unsigned long (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::chain_minimum_length, "The length of a chain. If it is a multicomponent chain, then the length of \nthe last component, which is used for calculating distance, instead of inf \n\nC++: bdsg::SnarlDistanceIndex::chain_minimum_length(const struct handlegraph::net_handle_t &) const --> unsigned long", pybind11::arg("net")); - cl.def("node_id", (long long (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::node_id, "What is the node id of the node represented by this net handle.\nnet must be a node or a sentinel\n\nC++: bdsg::SnarlDistanceIndex::node_id(const struct handlegraph::net_handle_t &) const --> long long", pybind11::arg("net")); - cl.def("has_node", (bool (bdsg::SnarlDistanceIndex::*)(const long long) const) &bdsg::SnarlDistanceIndex::has_node, "Does the graph have this node?\n\nC++: bdsg::SnarlDistanceIndex::has_node(const long long) const --> bool", pybind11::arg("id")); - cl.def("is_reversed_in_parent", (bool (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::is_reversed_in_parent, "Only really relevant for nodes in chains, is the node\ntraversed backwards relative to the orientation of the chain\n\nC++: bdsg::SnarlDistanceIndex::is_reversed_in_parent(const struct handlegraph::net_handle_t &) const --> bool", pybind11::arg("net")); - cl.def("get_node_net_handle", [](bdsg::SnarlDistanceIndex const &o, const long long & a0) -> handlegraph::net_handle_t { return o.get_node_net_handle(a0); }, "", pybind11::arg("id")); - cl.def("get_node_net_handle", (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)(const long long, bool) const) &bdsg::SnarlDistanceIndex::get_node_net_handle, "Get a net handle from a node and, optionally, an orientation\n\nC++: bdsg::SnarlDistanceIndex::get_node_net_handle(const long long, bool) const --> struct handlegraph::net_handle_t", pybind11::arg("id"), pybind11::arg("rev")); - cl.def("get_max_tree_depth", (unsigned long (bdsg::SnarlDistanceIndex::*)() const) &bdsg::SnarlDistanceIndex::get_max_tree_depth, "How deep is the snarl tree? The root is 0, top-level chain is 1, etc\nOnly counts chains\n\nC++: bdsg::SnarlDistanceIndex::get_max_tree_depth() const --> unsigned long"); - cl.def("get_depth", (unsigned long (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::get_depth, "What is the depth of this net handle? Nodes and snarls get the depth of their parent.\nThe depth of the root is 0, the depth of its child chains is 1, the depth of the nodes and snarls that are \nchildren of those chains is also 1, and the chains that are children of those snarls have depth 2\n\nC++: bdsg::SnarlDistanceIndex::get_depth(const struct handlegraph::net_handle_t &) const --> unsigned long", pybind11::arg("net")); - cl.def("get_connected_component_number", (unsigned long (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::get_connected_component_number, "C++: bdsg::SnarlDistanceIndex::get_connected_component_number(const struct handlegraph::net_handle_t &) const --> unsigned long", pybind11::arg("net")); - cl.def("get_handle_from_connected_component", (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)(unsigned long) const) &bdsg::SnarlDistanceIndex::get_handle_from_connected_component, "Given the connected component number (from get_connected_component_number), get the\nroot-level handle pointing to it.\nIf the connected component is a root-level snarl, then this may return a \"root\" handle,\nbut it will actually point to the snarl\n\nC++: bdsg::SnarlDistanceIndex::get_handle_from_connected_component(unsigned long) const --> struct handlegraph::net_handle_t", pybind11::arg("num")); - cl.def("has_connectivity", (bool (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &, enum handlegraph::SnarlDecomposition::endpoint_t, enum handlegraph::SnarlDecomposition::endpoint_t) const) &bdsg::SnarlDistanceIndex::has_connectivity, "Is there a path between the start and end endpoints within the net handle?\n\nC++: bdsg::SnarlDistanceIndex::has_connectivity(const struct handlegraph::net_handle_t &, enum handlegraph::SnarlDecomposition::endpoint_t, enum handlegraph::SnarlDecomposition::endpoint_t) const --> bool", pybind11::arg("net"), pybind11::arg("start"), pybind11::arg("end")); - cl.def("has_external_connectivity", (bool (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &, enum handlegraph::SnarlDecomposition::endpoint_t, enum handlegraph::SnarlDecomposition::endpoint_t) const) &bdsg::SnarlDistanceIndex::has_external_connectivity, "Is there a path between the start and end endpoints outside the net handle?\nThis is used for children of the root\n\nC++: bdsg::SnarlDistanceIndex::has_external_connectivity(const struct handlegraph::net_handle_t &, enum handlegraph::SnarlDecomposition::endpoint_t, enum handlegraph::SnarlDecomposition::endpoint_t) const --> bool", pybind11::arg("net"), pybind11::arg("start"), pybind11::arg("end")); - cl.def("get_prefix_sum_value", (unsigned long (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::get_prefix_sum_value, "Get the prefix sum value for a node in a chain.\nFails if the parent of net is not a chain\n\nC++: bdsg::SnarlDistanceIndex::get_prefix_sum_value(const struct handlegraph::net_handle_t &) const --> unsigned long", pybind11::arg("net")); - cl.def("get_max_prefix_sum_value", (unsigned long (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::get_max_prefix_sum_value, "Get the maximum prefix sum value for a node in a chain.\nFails if the parent of net is not a chain\n\nC++: bdsg::SnarlDistanceIndex::get_max_prefix_sum_value(const struct handlegraph::net_handle_t &) const --> unsigned long", pybind11::arg("net")); - cl.def("get_forward_loop_value", (unsigned long (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::get_forward_loop_value, "Get the forward loop value for a node in a chain.\nFails if the parent of net is not a chain\n\nC++: bdsg::SnarlDistanceIndex::get_forward_loop_value(const struct handlegraph::net_handle_t &) const --> unsigned long", pybind11::arg("net")); - cl.def("get_reverse_loop_value", (unsigned long (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::get_reverse_loop_value, "Get the reverse value for a node in a chain.\nFails if the parent of net is not a chain\n\nC++: bdsg::SnarlDistanceIndex::get_reverse_loop_value(const struct handlegraph::net_handle_t &) const --> unsigned long", pybind11::arg("net")); - cl.def("get_chain_component", [](bdsg::SnarlDistanceIndex const &o, const struct handlegraph::net_handle_t & a0) -> unsigned long { return o.get_chain_component(a0); }, "", pybind11::arg("net")); - cl.def("get_chain_component", (unsigned long (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &, bool) const) &bdsg::SnarlDistanceIndex::get_chain_component, "C++: bdsg::SnarlDistanceIndex::get_chain_component(const struct handlegraph::net_handle_t &, bool) const --> unsigned long", pybind11::arg("net"), pybind11::arg("get_end")); - cl.def("get_root", (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)() const) &bdsg::SnarlDistanceIndex::get_root, "Get a net handle referring to a tip-to-tip traversal of the contents of the root snarl.\n\nC++: bdsg::SnarlDistanceIndex::get_root() const --> struct handlegraph::net_handle_t"); - cl.def("is_root", (bool (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::is_root, "Return true if the given handle refers to (a traversal of) the root\nsnarl, and false otherwise.\n\nC++: bdsg::SnarlDistanceIndex::is_root(const struct handlegraph::net_handle_t &) const --> bool", pybind11::arg("net")); - cl.def("is_root_snarl", (bool (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::is_root_snarl, "Return true if the given handle refers to (a traversal of) a snarl of the root,\nwhich is considered to be the root but actually refers to a subset of the children \nof the root that are connected\n\nC++: bdsg::SnarlDistanceIndex::is_root_snarl(const struct handlegraph::net_handle_t &) const --> bool", pybind11::arg("net")); - cl.def("is_snarl", (bool (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::is_snarl, "Returns true if the given net handle refers to (a traversal of) a snarl.\n\nC++: bdsg::SnarlDistanceIndex::is_snarl(const struct handlegraph::net_handle_t &) const --> bool", pybind11::arg("net")); - cl.def("is_dag", (bool (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::is_dag, "Return true if the given snarl is a DAG and false otherwise\nReturns true if the given net_handle_t is not a snarl\n\nC++: bdsg::SnarlDistanceIndex::is_dag(const struct handlegraph::net_handle_t &) const --> bool", pybind11::arg("snarl")); - cl.def("is_simple_snarl", (bool (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::is_simple_snarl, "Returns true if the given net handle refers to (a traversal of) a simple snarl\nA simple snarl is a bubble where each child node can only reach the boundary nodes,\nand each side of a node reaches a different boundary node\nThere may also be an edge connecting the two boundary nodes but no additional \nedges are allowed\n\nC++: bdsg::SnarlDistanceIndex::is_simple_snarl(const struct handlegraph::net_handle_t &) const --> bool", pybind11::arg("net")); - cl.def("is_regular_snarl", (bool (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::is_regular_snarl, "Returns true if the given net handle refers to (a traversal of) a regular snarl\nA regular snarl is the same as a simple snarl, except that the children may be\nnested chains, rather than being restricted to nodes \n\nC++: bdsg::SnarlDistanceIndex::is_regular_snarl(const struct handlegraph::net_handle_t &) const --> bool", pybind11::arg("net")); - cl.def("is_chain", (bool (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::is_chain, "Returns true if the given net handle refers to (a traversal of) a chain.\n\nC++: bdsg::SnarlDistanceIndex::is_chain(const struct handlegraph::net_handle_t &) const --> bool", pybind11::arg("net")); - cl.def("is_multicomponent_chain", (bool (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::is_multicomponent_chain, "Returns true if the given net handle refers to (a traversal of) a chain that is not start-end connected\n\nC++: bdsg::SnarlDistanceIndex::is_multicomponent_chain(const struct handlegraph::net_handle_t &) const --> bool", pybind11::arg("net")); - cl.def("is_looping_chain", (bool (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::is_looping_chain, "Returns true if the given net handle refers to (a traversal of) a chain that loops (a chain where the first and last node are the same).\n\nC++: bdsg::SnarlDistanceIndex::is_looping_chain(const struct handlegraph::net_handle_t &) const --> bool", pybind11::arg("net")); - cl.def("is_trivial_chain", (bool (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::is_trivial_chain, "Returns true if the given net handle refers to (a traversal of) a trivial chain that represents a single node.\n\nC++: bdsg::SnarlDistanceIndex::is_trivial_chain(const struct handlegraph::net_handle_t &) const --> bool", pybind11::arg("net")); - cl.def("is_node", (bool (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::is_node, "Returns true if the given net handle refers to (a traversal of) a single node, and thus has a corresponding handle_t.\n\nC++: bdsg::SnarlDistanceIndex::is_node(const struct handlegraph::net_handle_t &) const --> bool", pybind11::arg("net")); - cl.def("is_sentinel", (bool (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::is_sentinel, "Return true if the given net handle is a snarl bound sentinel (in either\ninward or outward orientation), and false otherwise.\n\nC++: bdsg::SnarlDistanceIndex::is_sentinel(const struct handlegraph::net_handle_t &) const --> bool", pybind11::arg("net")); - cl.def("get_net", (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::handle_t &, const class handlegraph::HandleGraph *) const) &bdsg::SnarlDistanceIndex::get_net, "Turn a handle to an oriented node into a net handle for a start-to-end or end-to-start traversal of the node, as appropriate.\n\nC++: bdsg::SnarlDistanceIndex::get_net(const struct handlegraph::handle_t &, const class handlegraph::HandleGraph *) const --> struct handlegraph::net_handle_t", pybind11::arg("handle"), pybind11::arg("graph")); - cl.def("get_handle", (struct handlegraph::handle_t (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &, const class handlegraph::HandleGraph *) const) &bdsg::SnarlDistanceIndex::get_handle, "For a net handle to a traversal of a single node, get the handle for that node in the orientation it is traversed.\nMay not be called for other net handles.\n\nC++: bdsg::SnarlDistanceIndex::get_handle(const struct handlegraph::net_handle_t &, const class handlegraph::HandleGraph *) const --> struct handlegraph::handle_t", pybind11::arg("net"), pybind11::arg("graph")); - cl.def("get_parent", (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::get_parent, "Get the parent snarl of a chain, or the parent chain of a snarl or node.\nIf the child is start-to-end or end-to-start, and the parent is a chain,\nthe chain comes out facing the same way, accounting for the relative\norientation of the child snarl or node in the chain. Otherwise,\neverything is produced as start-to-end, even if that is not actually a\nrealizable traversal of a snarl or chain. May not be called on the root\nsnarl.\n\nAlso works on snarl boundary sentinels.\n\nC++: bdsg::SnarlDistanceIndex::get_parent(const struct handlegraph::net_handle_t &) const --> struct handlegraph::net_handle_t", pybind11::arg("child")); - cl.def("get_bound", (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &, bool, bool) const) &bdsg::SnarlDistanceIndex::get_bound, "Get the bounding handle for the snarl or chain referenced by the given\nnet handle, getting the start or end facing in or out as appropriate.\n\nFor snarls, returns the bounding sentinel net handles. For chains,\nreturns net handles for traversals of the bounding nodes of the chain.\nIf the chain is a looping chain, then the start and end of the chain\nare the same, so the connectivity of the bound indicates which we're\nlooking at; the connectivity will be start-start if it is going \nbackwards in the node, and end-end if it is going forwards.\n\nIgnores traversal type.\n\nMay not be called on traversals of individual nodes.\n\nC++: bdsg::SnarlDistanceIndex::get_bound(const struct handlegraph::net_handle_t &, bool, bool) const --> struct handlegraph::net_handle_t", pybind11::arg("snarl"), pybind11::arg("get_end"), pybind11::arg("face_in")); - cl.def("get_node_from_sentinel", (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::get_node_from_sentinel, "Given the sentinel of a snarl, return a handle to the node representing it\n\nC++: bdsg::SnarlDistanceIndex::get_node_from_sentinel(const struct handlegraph::net_handle_t &) const --> struct handlegraph::net_handle_t", pybind11::arg("sentinel")); - cl.def("flip", (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::flip, "Return a net handle to the same snarl/chain/node in the opposite orientation.\nNo effect on tip-to-tip, start-to-start, or end-to-end net handles. Flips all the others.\n\nC++: bdsg::SnarlDistanceIndex::flip(const struct handlegraph::net_handle_t &) const --> struct handlegraph::net_handle_t", pybind11::arg("net")); - cl.def("canonical", (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::canonical, "Get a canonical traversal handle from any net handle. All handles to the\nsame net graph element have the same canonical traversal. That canonical\ntraversal must be realizable, and might not always be start-to-end or\neven consistently be the same kind of traversal for different snarls,\nchains, or nodes. Mostly useful to normalize for equality comparisons.\n\nAny root snarl will become just the root\nAnything without connectivity will get START_END\n\nC++: bdsg::SnarlDistanceIndex::canonical(const struct handlegraph::net_handle_t &) const --> struct handlegraph::net_handle_t", pybind11::arg("net")); - cl.def("start_end_traversal_of", (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::start_end_traversal_of, "Makes a start-end traversal of the net.\nFaster than canonical because it doesn't check the index for anything \n\nC++: bdsg::SnarlDistanceIndex::start_end_traversal_of(const struct handlegraph::net_handle_t &) const --> struct handlegraph::net_handle_t", pybind11::arg("net")); - cl.def("starts_at", (enum handlegraph::SnarlDecomposition::endpoint_t (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::starts_at, "Return the kind of location at which the given traversal starts.\n\nC++: bdsg::SnarlDistanceIndex::starts_at(const struct handlegraph::net_handle_t &) const --> enum handlegraph::SnarlDecomposition::endpoint_t", pybind11::arg("traversal")); - cl.def("ends_at", (enum handlegraph::SnarlDecomposition::endpoint_t (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::ends_at, "Return the kind of location at which the given traversal ends.\n\nC++: bdsg::SnarlDistanceIndex::ends_at(const struct handlegraph::net_handle_t &) const --> enum handlegraph::SnarlDecomposition::endpoint_t", pybind11::arg("traversal")); - cl.def("get_rank_in_parent", (unsigned long (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::get_rank_in_parent, "For a child of a snarl, the rank is used to calculate the distance\n\nC++: bdsg::SnarlDistanceIndex::get_rank_in_parent(const struct handlegraph::net_handle_t &) const --> unsigned long", pybind11::arg("net")); - cl.def("connected_component_count", (unsigned long (bdsg::SnarlDistanceIndex::*)() const) &bdsg::SnarlDistanceIndex::connected_component_count, "How many connected components are in this graph?\nThis returns the number of topological connected components, not necessarily the \nnumber of nodes in the top-level snarl \n\nC++: bdsg::SnarlDistanceIndex::connected_component_count() const --> unsigned long"); - cl.def("get_snarl_child_from_rank", (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &, const unsigned long &) const) &bdsg::SnarlDistanceIndex::get_snarl_child_from_rank, "Get the child of a snarl from its rank. This shouldn't be exposed to the public interface but I need it\nPlease don't use it\nFor 0 or 1, returns the sentinel facing in. Otherwise return the child as a chain going START_END\n\nC++: bdsg::SnarlDistanceIndex::get_snarl_child_from_rank(const struct handlegraph::net_handle_t &, const unsigned long &) const --> struct handlegraph::net_handle_t", pybind11::arg("snarl"), pybind11::arg("rank")); - cl.def("get_parent_traversal", (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &, const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::get_parent_traversal, "Get a net handle for traversals of a snarl or chain that contains\nthe given oriented bounding node traversals or sentinels. Given two\nsentinels for a snarl, produces a net handle to a start-to-end,\nend-to-end, end-to-start, or start-to-start traversal of that snarl.\nGiven handles to traversals of the bounding nodes of a chain, similarly\nproduces a net handle to a traversal of the chain.\n\nFor a chain, either or both handles can also be a snarl containing tips,\nfor a tip-to-start, tip-to-end, start-to-tip, end-to-tip, or tip-to-tip\ntraversal. Similarly, for a snarl, either or both handles can be a chain\nin the snarl that contains internal tips, or that has no edges on the\nappropriate end.\n\nMay only be called if a path actually exists between the given start\nand end.\n\nC++: bdsg::SnarlDistanceIndex::get_parent_traversal(const struct handlegraph::net_handle_t &, const struct handlegraph::net_handle_t &) const --> struct handlegraph::net_handle_t", pybind11::arg("traversal_start"), pybind11::arg("traversal_end")); - cl.def_static("has_distances", (const bool (*)(enum bdsg::SnarlDistanceIndex::record_t)) &bdsg::SnarlDistanceIndex::has_distances, "C++: bdsg::SnarlDistanceIndex::has_distances(enum bdsg::SnarlDistanceIndex::record_t) --> const bool", pybind11::arg("type")); - cl.def_static("get_record_handle_type", (const enum bdsg::SnarlDistanceIndex::net_handle_record_t (*)(enum bdsg::SnarlDistanceIndex::record_t)) &bdsg::SnarlDistanceIndex::get_record_handle_type, "Given the type of the record, return the handle type. Some record types can represent multiple things,\nfor example a simple snarl record is used to represent a snarl, and the nodes/trivial chains in it.\nThis will return whatever is higher on the snarl tree. A simple snarl will be considered a snarl,\na root snarl will be considered a root, etc\n\nC++: bdsg::SnarlDistanceIndex::get_record_handle_type(enum bdsg::SnarlDistanceIndex::record_t) --> const enum bdsg::SnarlDistanceIndex::net_handle_record_t", pybind11::arg("type")); - cl.def_static("get_record_offset", (const unsigned long (*)(const struct handlegraph::net_handle_t &)) &bdsg::SnarlDistanceIndex::get_record_offset, "The offset into records that this handle points to\n\nC++: bdsg::SnarlDistanceIndex::get_record_offset(const struct handlegraph::net_handle_t &) --> const unsigned long", pybind11::arg("net_handle")); - cl.def_static("get_node_record_offset", (const unsigned long (*)(const struct handlegraph::net_handle_t &)) &bdsg::SnarlDistanceIndex::get_node_record_offset, "The offset of a node in a trivial snarl (0 if it isn't a node in a trivial snarl)\n\nC++: bdsg::SnarlDistanceIndex::get_node_record_offset(const struct handlegraph::net_handle_t &) --> const unsigned long", pybind11::arg("net_handle")); - cl.def_static("get_connectivity", (const enum bdsg::SnarlDistanceIndex::connectivity_t (*)(const struct handlegraph::net_handle_t &)) &bdsg::SnarlDistanceIndex::get_connectivity, "C++: bdsg::SnarlDistanceIndex::get_connectivity(const struct handlegraph::net_handle_t &) --> const enum bdsg::SnarlDistanceIndex::connectivity_t", pybind11::arg("net_handle")); - cl.def_static("get_handle_type", (const enum bdsg::SnarlDistanceIndex::net_handle_record_t (*)(const struct handlegraph::net_handle_t &)) &bdsg::SnarlDistanceIndex::get_handle_type, "C++: bdsg::SnarlDistanceIndex::get_handle_type(const struct handlegraph::net_handle_t &) --> const enum bdsg::SnarlDistanceIndex::net_handle_record_t", pybind11::arg("net_handle")); - cl.def_static("get_net_handle_from_values", [](unsigned long const & a0, enum bdsg::SnarlDistanceIndex::connectivity_t const & a1, enum bdsg::SnarlDistanceIndex::net_handle_record_t const & a2) -> const handlegraph::net_handle_t { return bdsg::SnarlDistanceIndex::get_net_handle_from_values(a0, a1, a2); }, "", pybind11::arg("pointer"), pybind11::arg("connectivity"), pybind11::arg("type")); - cl.def_static("get_net_handle_from_values", (const struct handlegraph::net_handle_t (*)(unsigned long, enum bdsg::SnarlDistanceIndex::connectivity_t, enum bdsg::SnarlDistanceIndex::net_handle_record_t, unsigned long)) &bdsg::SnarlDistanceIndex::get_net_handle_from_values, "C++: bdsg::SnarlDistanceIndex::get_net_handle_from_values(unsigned long, enum bdsg::SnarlDistanceIndex::connectivity_t, enum bdsg::SnarlDistanceIndex::net_handle_record_t, unsigned long) --> const struct handlegraph::net_handle_t", pybind11::arg("pointer"), pybind11::arg("connectivity"), pybind11::arg("type"), pybind11::arg("node_offset")); - cl.def("get_net_handle", (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)(unsigned long, enum bdsg::SnarlDistanceIndex::connectivity_t) const) &bdsg::SnarlDistanceIndex::get_net_handle, "C++: bdsg::SnarlDistanceIndex::get_net_handle(unsigned long, enum bdsg::SnarlDistanceIndex::connectivity_t) const --> struct handlegraph::net_handle_t", pybind11::arg("pointer"), pybind11::arg("connectivity")); - cl.def("get_net_handle", (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)(unsigned long) const) &bdsg::SnarlDistanceIndex::get_net_handle, "C++: bdsg::SnarlDistanceIndex::get_net_handle(unsigned long) const --> struct handlegraph::net_handle_t", pybind11::arg("pointer")); - cl.def_static("get_node_pointer_offset", (const unsigned long (*)(const long long &, const long long &, unsigned long)) &bdsg::SnarlDistanceIndex::get_node_pointer_offset, "Get the offset into snarl_tree_records for the pointer to a node record.\n\nC++: bdsg::SnarlDistanceIndex::get_node_pointer_offset(const long long &, const long long &, unsigned long) --> const unsigned long", pybind11::arg("id"), pybind11::arg("min_node_id"), pybind11::arg("component_count")); - cl.def_static("endpoints_to_connectivity", (const enum bdsg::SnarlDistanceIndex::connectivity_t (*)(enum handlegraph::SnarlDecomposition::endpoint_t, enum handlegraph::SnarlDecomposition::endpoint_t)) &bdsg::SnarlDistanceIndex::endpoints_to_connectivity, "C++: bdsg::SnarlDistanceIndex::endpoints_to_connectivity(enum handlegraph::SnarlDecomposition::endpoint_t, enum handlegraph::SnarlDecomposition::endpoint_t) --> const enum bdsg::SnarlDistanceIndex::connectivity_t", pybind11::arg("start"), pybind11::arg("end")); - cl.def_static("get_start_endpoint", (const enum handlegraph::SnarlDecomposition::endpoint_t (*)(enum bdsg::SnarlDistanceIndex::connectivity_t)) &bdsg::SnarlDistanceIndex::get_start_endpoint, "C++: bdsg::SnarlDistanceIndex::get_start_endpoint(enum bdsg::SnarlDistanceIndex::connectivity_t) --> const enum handlegraph::SnarlDecomposition::endpoint_t", pybind11::arg("connectivity")); - cl.def_static("get_start_endpoint", (const enum handlegraph::SnarlDecomposition::endpoint_t (*)(struct handlegraph::net_handle_t)) &bdsg::SnarlDistanceIndex::get_start_endpoint, "C++: bdsg::SnarlDistanceIndex::get_start_endpoint(struct handlegraph::net_handle_t) --> const enum handlegraph::SnarlDecomposition::endpoint_t", pybind11::arg("net")); - cl.def_static("get_end_endpoint", (const enum handlegraph::SnarlDecomposition::endpoint_t (*)(enum bdsg::SnarlDistanceIndex::connectivity_t)) &bdsg::SnarlDistanceIndex::get_end_endpoint, "C++: bdsg::SnarlDistanceIndex::get_end_endpoint(enum bdsg::SnarlDistanceIndex::connectivity_t) --> const enum handlegraph::SnarlDecomposition::endpoint_t", pybind11::arg("connectivity")); - cl.def_static("get_end_endpoint", (const enum handlegraph::SnarlDecomposition::endpoint_t (*)(const struct handlegraph::net_handle_t &)) &bdsg::SnarlDistanceIndex::get_end_endpoint, "C++: bdsg::SnarlDistanceIndex::get_end_endpoint(const struct handlegraph::net_handle_t &) --> const enum handlegraph::SnarlDecomposition::endpoint_t", pybind11::arg("net")); - cl.def_static("connectivity_to_endpoints", (const struct std::pair (*)(const enum bdsg::SnarlDistanceIndex::connectivity_t &)) &bdsg::SnarlDistanceIndex::connectivity_to_endpoints, "C++: bdsg::SnarlDistanceIndex::connectivity_to_endpoints(const enum bdsg::SnarlDistanceIndex::connectivity_t &) --> const struct std::pair", pybind11::arg("connectivity")); - cl.def("set_snarl_size_limit", (void (bdsg::SnarlDistanceIndex::*)(unsigned long)) &bdsg::SnarlDistanceIndex::set_snarl_size_limit, "C++: bdsg::SnarlDistanceIndex::set_snarl_size_limit(unsigned long) --> void", pybind11::arg("size")); - cl.def("set_only_top_level_chain_distances", (void (bdsg::SnarlDistanceIndex::*)(bool)) &bdsg::SnarlDistanceIndex::set_only_top_level_chain_distances, "C++: bdsg::SnarlDistanceIndex::set_only_top_level_chain_distances(bool) --> void", pybind11::arg("only_chain")); - cl.def("net_handle_as_string", (std::string (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &) const) &bdsg::SnarlDistanceIndex::net_handle_as_string, "C++: bdsg::SnarlDistanceIndex::net_handle_as_string(const struct handlegraph::net_handle_t &) const --> std::string", pybind11::arg("net")); - cl.def("traverse_decomposition", (bool (bdsg::SnarlDistanceIndex::*)(const class std::function &, const class std::function &, const class std::function &) const) &bdsg::SnarlDistanceIndex::traverse_decomposition, "C++: bdsg::SnarlDistanceIndex::traverse_decomposition(const class std::function &, const class std::function &, const class std::function &) const --> bool", pybind11::arg("snarl_iteratee"), pybind11::arg("chain_iteratee"), pybind11::arg("node_iteratee")); - cl.def("traverse_decomposition_helper", (bool (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t &, const class std::function &, const class std::function &, const class std::function &) const) &bdsg::SnarlDistanceIndex::traverse_decomposition_helper, "C++: bdsg::SnarlDistanceIndex::traverse_decomposition_helper(const struct handlegraph::net_handle_t &, const class std::function &, const class std::function &, const class std::function &) const --> bool", pybind11::arg("net"), pybind11::arg("snarl_iteratee"), pybind11::arg("chain_iteratee"), pybind11::arg("node_iteratee")); - cl.def("print_self", (void (bdsg::SnarlDistanceIndex::*)() const) &bdsg::SnarlDistanceIndex::print_self, "C++: bdsg::SnarlDistanceIndex::print_self() const --> void"); - cl.def("print_descendants_of", (void (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t) const) &bdsg::SnarlDistanceIndex::print_descendants_of, "C++: bdsg::SnarlDistanceIndex::print_descendants_of(const struct handlegraph::net_handle_t) const --> void", pybind11::arg("net")); - cl.def("print_snarl_stats", (void (bdsg::SnarlDistanceIndex::*)() const) &bdsg::SnarlDistanceIndex::print_snarl_stats, "C++: bdsg::SnarlDistanceIndex::print_snarl_stats() const --> void"); - cl.def("write_snarls_to_json", (void (bdsg::SnarlDistanceIndex::*)() const) &bdsg::SnarlDistanceIndex::write_snarls_to_json, "C++: bdsg::SnarlDistanceIndex::write_snarls_to_json() const --> void"); - cl.def("validate_index", (void (bdsg::SnarlDistanceIndex::*)() const) &bdsg::SnarlDistanceIndex::validate_index, "C++: bdsg::SnarlDistanceIndex::validate_index() const --> void"); - cl.def("validate_descendants_of", (void (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t) const) &bdsg::SnarlDistanceIndex::validate_descendants_of, "C++: bdsg::SnarlDistanceIndex::validate_descendants_of(const struct handlegraph::net_handle_t) const --> void", pybind11::arg("net")); - cl.def("validate_ancestors_of", (void (bdsg::SnarlDistanceIndex::*)(const struct handlegraph::net_handle_t) const) &bdsg::SnarlDistanceIndex::validate_ancestors_of, "C++: bdsg::SnarlDistanceIndex::validate_ancestors_of(const struct handlegraph::net_handle_t) const --> void", pybind11::arg("net")); - cl.def("get_usage", (class std::tuple (bdsg::SnarlDistanceIndex::*)()) &bdsg::SnarlDistanceIndex::get_usage, "C++: bdsg::SnarlDistanceIndex::get_usage() --> class std::tuple"); - cl.def_static("sum", (unsigned long (*)(const unsigned long &, const unsigned long &)) &bdsg::SnarlDistanceIndex::sum, "Add integers, returning max() if any of them are max()\n\nC++: bdsg::SnarlDistanceIndex::sum(const unsigned long &, const unsigned long &) --> unsigned long", pybind11::arg("val1"), pybind11::arg("val2")); - cl.def_static("minus", (unsigned long (*)(unsigned long, unsigned long)) &bdsg::SnarlDistanceIndex::minus, "C++: bdsg::SnarlDistanceIndex::minus(unsigned long, unsigned long) --> unsigned long", pybind11::arg("x"), pybind11::arg("y")); - cl.def_static("maximum", (unsigned long (*)(unsigned long, unsigned long)) &bdsg::SnarlDistanceIndex::maximum, "C++: bdsg::SnarlDistanceIndex::maximum(unsigned long, unsigned long) --> unsigned long", pybind11::arg("x"), pybind11::arg("y")); - cl.def_static("bit_width", (unsigned long (*)(unsigned long)) &bdsg::SnarlDistanceIndex::bit_width, "C++: bdsg::SnarlDistanceIndex::bit_width(unsigned long) --> unsigned long", pybind11::arg("value")); - cl.def("time_accesses", (void (bdsg::SnarlDistanceIndex::*)()) &bdsg::SnarlDistanceIndex::time_accesses, "C++: bdsg::SnarlDistanceIndex::time_accesses() --> void"); + pybind11::enum_( + cl, "record_t", pybind11::arithmetic(), + "A record_t is the type of structure that a record can be.\n The " + "actual distance index is stored as a series of \"records\" for each " + "snarl/node/chain. \n The record type defines what is stored in a " + "record\n\nNODE, SNARL, and CHAIN indicate that they don't store " + "distances.\nSIMPLE_SNARL is a snarl with all children connecting only " + "to the boundary nodes in one direction (ie, a bubble).\nTRIVIAL_SNARL " + "represents consecutive nodes in a chain. \nNODE represents a node " + "that is a trivial chain. A node can only be the child of a " + "snarl.\nOVERSIZED_SNARL only stores distances to the " + "boundaries.\nROOT_SNARL represents a connected component of the root. " + "It has no start or end node so \n its children technically belong " + "to the root.\nMULTICOMPONENT_CHAIN can represent a chain with snarls " + "that are not start-end connected.\n The chain is split up into " + "components between these snarls, each node is tagged with\n which " + "component it belongs to.") + .value("ROOT", bdsg::SnarlDistanceIndex::ROOT) + .value("NODE", bdsg::SnarlDistanceIndex::NODE) + .value("DISTANCED_NODE", bdsg::SnarlDistanceIndex::DISTANCED_NODE) + .value("TRIVIAL_SNARL", bdsg::SnarlDistanceIndex::TRIVIAL_SNARL) + .value("DISTANCED_TRIVIAL_SNARL", + bdsg::SnarlDistanceIndex::DISTANCED_TRIVIAL_SNARL) + .value("SIMPLE_SNARL", bdsg::SnarlDistanceIndex::SIMPLE_SNARL) + .value("DISTANCED_SIMPLE_SNARL", + bdsg::SnarlDistanceIndex::DISTANCED_SIMPLE_SNARL) + .value("SNARL", bdsg::SnarlDistanceIndex::SNARL) + .value("DISTANCED_SNARL", bdsg::SnarlDistanceIndex::DISTANCED_SNARL) + .value("OVERSIZED_SNARL", bdsg::SnarlDistanceIndex::OVERSIZED_SNARL) + .value("ROOT_SNARL", bdsg::SnarlDistanceIndex::ROOT_SNARL) + .value("DISTANCED_ROOT_SNARL", + bdsg::SnarlDistanceIndex::DISTANCED_ROOT_SNARL) + .value("CHAIN", bdsg::SnarlDistanceIndex::CHAIN) + .value("DISTANCED_CHAIN", bdsg::SnarlDistanceIndex::DISTANCED_CHAIN) + .value("MULTICOMPONENT_CHAIN", + bdsg::SnarlDistanceIndex::MULTICOMPONENT_CHAIN) + .value("CHILDREN", bdsg::SnarlDistanceIndex::CHILDREN) + .export_values(); - { // bdsg::SnarlDistanceIndex::TemporaryDistanceIndex file:bdsg/snarl_distance_index.hpp line:1524 - auto & enclosing_class = cl; - pybind11::class_> cl(enclosing_class, "TemporaryDistanceIndex", ""); - cl.def( pybind11::init( [](){ return new bdsg::SnarlDistanceIndex::TemporaryDistanceIndex(); } ) ); - cl.def( pybind11::init( [](bdsg::SnarlDistanceIndex::TemporaryDistanceIndex const &o){ return new bdsg::SnarlDistanceIndex::TemporaryDistanceIndex(o); } ) ); - cl.def_readwrite("min_node_id", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::min_node_id); - cl.def_readwrite("max_node_id", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::max_node_id); - cl.def_readwrite("root_structure_count", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::root_structure_count); - cl.def_readwrite("max_tree_depth", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::max_tree_depth); - cl.def_readwrite("max_index_size", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::max_index_size); - cl.def_readwrite("max_distance", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::max_distance); - cl.def_readwrite("components", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::components); - cl.def_readwrite("root_snarl_components", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::root_snarl_components); - cl.def_readwrite("temp_chain_records", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::temp_chain_records); - cl.def_readwrite("temp_snarl_records", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::temp_snarl_records); - cl.def_readwrite("temp_node_records", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::temp_node_records); - cl.def_readwrite("use_oversized_snarls", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::use_oversized_snarls); - cl.def("structure_start_end_as_string", (std::string (bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::*)(struct std::pair) const) &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::structure_start_end_as_string, "C++: bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::structure_start_end_as_string(struct std::pair) const --> std::string", pybind11::arg("index")); - cl.def("get_max_record_length", (unsigned long (bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::*)() const) &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::get_max_record_length, "C++: bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::get_max_record_length() const --> unsigned long"); - cl.def("assign", (class bdsg::SnarlDistanceIndex::TemporaryDistanceIndex & (bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::*)(const class bdsg::SnarlDistanceIndex::TemporaryDistanceIndex &)) &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::operator=, "C++: bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::operator=(const class bdsg::SnarlDistanceIndex::TemporaryDistanceIndex &) --> class bdsg::SnarlDistanceIndex::TemporaryDistanceIndex &", pybind11::return_value_policy::automatic, pybind11::arg("")); + pybind11::enum_( + cl, "temp_record_t", pybind11::arithmetic(), "") + .value("TEMP_CHAIN", bdsg::SnarlDistanceIndex::TEMP_CHAIN) + .value("TEMP_SNARL", bdsg::SnarlDistanceIndex::TEMP_SNARL) + .value("TEMP_NODE", bdsg::SnarlDistanceIndex::TEMP_NODE) + .value("TEMP_ROOT", bdsg::SnarlDistanceIndex::TEMP_ROOT) + .export_values(); - { // bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryRecord file:bdsg/snarl_distance_index.hpp line:1544 - auto & enclosing_class = cl; - pybind11::class_> cl(enclosing_class, "TemporaryRecord", ""); - cl.def( pybind11::init( [](bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryRecord const &o){ return new bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryRecord(o); } ) ); - cl.def( pybind11::init( [](){ return new bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryRecord(); } ) ); - cl.def("assign", (struct bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryRecord & (bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryRecord::*)(const struct bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryRecord &)) &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryRecord::operator=, "C++: bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryRecord::operator=(const struct bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryRecord &) --> struct bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryRecord &", pybind11::return_value_policy::automatic, pybind11::arg("")); - } + cl.def( + "serialize", + [](bdsg::SnarlDistanceIndex &o, const std::string &a0) -> void { + return o.serialize(a0); + }, + "", pybind11::arg("filename")); + cl.def( + "deserialize", + [](bdsg::SnarlDistanceIndex &o, const std::string &a0) -> void { + return o.deserialize(a0); + }, + "", pybind11::arg("filename")); + cl.def("dissociate", + (void (bdsg::SnarlDistanceIndex::*)())&bdsg::SnarlDistanceIndex:: + dissociate, + "C++: bdsg::SnarlDistanceIndex::dissociate() --> void"); + cl.def( + "serialize", + (void (bdsg::SnarlDistanceIndex::*)( + const class std::function &) + const) & + bdsg::SnarlDistanceIndex::serialize, + "C++: bdsg::SnarlDistanceIndex::serialize(const class " + "std::function &) const --> void", + pybind11::arg("iteratee")); + cl.def("serialize", + (void (bdsg::SnarlDistanceIndex::*)( + int))&bdsg::SnarlDistanceIndex::serialize, + "C++: bdsg::SnarlDistanceIndex::serialize(int) --> void", + pybind11::arg("fd")); + cl.def("deserialize", + (void (bdsg::SnarlDistanceIndex::*)( + int))&bdsg::SnarlDistanceIndex::deserialize, + "C++: bdsg::SnarlDistanceIndex::deserialize(int) --> void", + pybind11::arg("fd")); + cl.def("get_magic_number", + (unsigned int (bdsg::SnarlDistanceIndex::*)() const) & + bdsg::SnarlDistanceIndex::get_magic_number, + "C++: bdsg::SnarlDistanceIndex::get_magic_number() const --> " + "unsigned int"); + cl.def("get_prefix", + (std::string (bdsg::SnarlDistanceIndex::*)() const) & + bdsg::SnarlDistanceIndex::get_prefix, + "C++: bdsg::SnarlDistanceIndex::get_prefix() const --> std::string"); + cl.def( + "preload", + [](bdsg::SnarlDistanceIndex const &o) -> void { return o.preload(); }, + ""); + cl.def("preload", + (void (bdsg::SnarlDistanceIndex::*)(bool) const) & + bdsg::SnarlDistanceIndex::preload, + "Allow for preloading the index for more accurate timing of " + "algorithms\n that use it, if it fits in memory. If blocking is " + "true, waits for the\n index to be paged in. Otherwise, just tells " + "the OS that we will want to\n use it.\n\nC++: " + "bdsg::SnarlDistanceIndex::preload(bool) const --> void", + pybind11::arg("blocking")); + cl.def( + "maximum_distance", + [](bdsg::SnarlDistanceIndex const &o, const long long &a0, + const bool &a1, const unsigned long &a2, const long long &a3, + const bool &a4, const unsigned long &a5) -> unsigned long { + return o.maximum_distance(a0, a1, a2, a3, a4, a5); + }, + "", pybind11::arg("id1"), pybind11::arg("rev1"), + pybind11::arg("offset1"), pybind11::arg("id2"), pybind11::arg("rev2"), + pybind11::arg("offset2")); + cl.def( + "maximum_distance", + [](bdsg::SnarlDistanceIndex const &o, const long long &a0, + const bool &a1, const unsigned long &a2, const long long &a3, + const bool &a4, const unsigned long &a5, + bool const &a6) -> unsigned long { + return o.maximum_distance(a0, a1, a2, a3, a4, a5, a6); + }, + "", pybind11::arg("id1"), pybind11::arg("rev1"), + pybind11::arg("offset1"), pybind11::arg("id2"), pybind11::arg("rev2"), + pybind11::arg("offset2"), pybind11::arg("unoriented_distance")); + cl.def("maximum_distance", + (unsigned long (bdsg::SnarlDistanceIndex::*)( + const long long, const bool, const unsigned long, + const long long, const bool, const unsigned long, bool, + const class handlegraph::HandleGraph *) const) & + bdsg::SnarlDistanceIndex::maximum_distance, + "Find an approximation of the maximum distance between two " + "positions. \nThis isn't a true maximum- the only guarantee is that " + "it's greater than or equal to the minimum distance.\n\nC++: " + "bdsg::SnarlDistanceIndex::maximum_distance(const long long, const " + "bool, const unsigned long, const long long, const bool, const " + "unsigned long, bool, const class handlegraph::HandleGraph *) const " + "--> unsigned long", + pybind11::arg("id1"), pybind11::arg("rev1"), + pybind11::arg("offset1"), pybind11::arg("id2"), + pybind11::arg("rev2"), pybind11::arg("offset2"), + pybind11::arg("unoriented_distance"), pybind11::arg("graph")); + cl.def( + "distance_in_parent", + [](bdsg::SnarlDistanceIndex const &o, + const struct handlegraph::net_handle_t &a0, + const struct handlegraph::net_handle_t &a1, + const struct handlegraph::net_handle_t &a2) -> unsigned long { + return o.distance_in_parent(a0, a1, a2); + }, + "", pybind11::arg("parent"), pybind11::arg("child1"), + pybind11::arg("child2")); + cl.def( + "distance_in_parent", + [](bdsg::SnarlDistanceIndex const &o, + const struct handlegraph::net_handle_t &a0, + const struct handlegraph::net_handle_t &a1, + const struct handlegraph::net_handle_t &a2, + const class handlegraph::HandleGraph *a3) -> unsigned long { + return o.distance_in_parent(a0, a1, a2, a3); + }, + "", pybind11::arg("parent"), pybind11::arg("child1"), + pybind11::arg("child2"), pybind11::arg("graph")); + cl.def( + "distance_in_parent", + (unsigned long (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &, + const struct handlegraph::net_handle_t &, + const struct handlegraph::net_handle_t &, + const class handlegraph::HandleGraph *, unsigned long) const) & + bdsg::SnarlDistanceIndex::distance_in_parent, + "C++: bdsg::SnarlDistanceIndex::distance_in_parent(const struct " + "handlegraph::net_handle_t &, const struct handlegraph::net_handle_t " + "&, const struct handlegraph::net_handle_t &, const class " + "handlegraph::HandleGraph *, unsigned long) const --> unsigned long", + pybind11::arg("parent"), pybind11::arg("child1"), + pybind11::arg("child2"), pybind11::arg("graph"), + pybind11::arg("distance_limit")); + cl.def( + "distance_in_snarl", + [](bdsg::SnarlDistanceIndex const &o, + const struct handlegraph::net_handle_t &a0, const unsigned long &a1, + const bool &a2, const unsigned long &a3, + const bool &a4) -> unsigned long { + return o.distance_in_snarl(a0, a1, a2, a3, a4); + }, + "", pybind11::arg("parent"), pybind11::arg("rank1"), + pybind11::arg("right_side1"), pybind11::arg("rank2"), + pybind11::arg("right_side2")); + cl.def( + "distance_in_snarl", + [](bdsg::SnarlDistanceIndex const &o, + const struct handlegraph::net_handle_t &a0, const unsigned long &a1, + const bool &a2, const unsigned long &a3, const bool &a4, + const class handlegraph::HandleGraph *a5) -> unsigned long { + return o.distance_in_snarl(a0, a1, a2, a3, a4, a5); + }, + "", pybind11::arg("parent"), pybind11::arg("rank1"), + pybind11::arg("right_side1"), pybind11::arg("rank2"), + pybind11::arg("right_side2"), pybind11::arg("graph")); + cl.def("distance_in_snarl", + (unsigned long (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &, const unsigned long &, + const bool &, const unsigned long &, const bool &, + const class handlegraph::HandleGraph *, unsigned long) const) & + bdsg::SnarlDistanceIndex::distance_in_snarl, + "C++: bdsg::SnarlDistanceIndex::distance_in_snarl(const struct " + "handlegraph::net_handle_t &, const unsigned long &, const bool &, " + "const unsigned long &, const bool &, const class " + "handlegraph::HandleGraph *, unsigned long) const --> unsigned long", + pybind11::arg("parent"), pybind11::arg("rank1"), + pybind11::arg("right_side1"), pybind11::arg("rank2"), + pybind11::arg("right_side2"), pybind11::arg("graph"), + pybind11::arg("distance_limit")); + cl.def( + "max_distance_in_parent", + [](bdsg::SnarlDistanceIndex const &o, + const struct handlegraph::net_handle_t &a0, + const struct handlegraph::net_handle_t &a1, + const struct handlegraph::net_handle_t &a2) -> unsigned long { + return o.max_distance_in_parent(a0, a1, a2); + }, + "", pybind11::arg("parent"), pybind11::arg("child1"), + pybind11::arg("child2")); + cl.def( + "max_distance_in_parent", + [](bdsg::SnarlDistanceIndex const &o, + const struct handlegraph::net_handle_t &a0, + const struct handlegraph::net_handle_t &a1, + const struct handlegraph::net_handle_t &a2, + const class handlegraph::HandleGraph *a3) -> unsigned long { + return o.max_distance_in_parent(a0, a1, a2, a3); + }, + "", pybind11::arg("parent"), pybind11::arg("child1"), + pybind11::arg("child2"), pybind11::arg("graph")); + cl.def( + "max_distance_in_parent", + (unsigned long (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &, + const struct handlegraph::net_handle_t &, + const struct handlegraph::net_handle_t &, + const class handlegraph::HandleGraph *, unsigned long) const) & + bdsg::SnarlDistanceIndex::max_distance_in_parent, + "Find the maximum distance between two children in the parent. \nThis " + "is the same as distance_in_parent for everything except children of " + "chains\n\nC++: bdsg::SnarlDistanceIndex::max_distance_in_parent(const " + "struct handlegraph::net_handle_t &, const struct " + "handlegraph::net_handle_t &, const struct handlegraph::net_handle_t " + "&, const class handlegraph::HandleGraph *, unsigned long) const --> " + "unsigned long", + pybind11::arg("parent"), pybind11::arg("child1"), + pybind11::arg("child2"), pybind11::arg("graph"), + pybind11::arg("distance_limit")); + cl.def( + "distance_to_parent_bound", + [](bdsg::SnarlDistanceIndex const &o, + const struct handlegraph::net_handle_t &a0, bool const &a1, + struct handlegraph::net_handle_t const &a2) -> unsigned long { + return o.distance_to_parent_bound(a0, a1, a2); + }, + "", pybind11::arg("parent"), pybind11::arg("to_start"), + pybind11::arg("child")); + cl.def( + "distance_to_parent_bound", + (unsigned long (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &, bool, + struct handlegraph::net_handle_t, + class std::tuple< + enum bdsg::SnarlDistanceIndex::net_handle_record_t, + enum bdsg::SnarlDistanceIndex::net_handle_record_t, + enum bdsg::SnarlDistanceIndex::net_handle_record_t, + enum bdsg::SnarlDistanceIndex::net_handle_record_t>) const) & + bdsg::SnarlDistanceIndex::distance_to_parent_bound, + "Get the distance from the child to the start or end bound of the " + "parent.\nparent_and_child_types are hints to figure out the type of " + "snarl/chain records the parent and child are.\ntuple of parent record " + "type, parent handle type, child record type, child handle type.\nThis " + "is really just used to see if the parent and child are trivial " + "chains, so it might not be exactly what the actual record is.\n\nC++: " + "bdsg::SnarlDistanceIndex::distance_to_parent_bound(const struct " + "handlegraph::net_handle_t &, bool, struct handlegraph::net_handle_t, " + "class std::tuple) const --> unsigned " + "long", + pybind11::arg("parent"), pybind11::arg("to_start"), + pybind11::arg("child"), pybind11::arg("parent_and_child_types")); + cl.def( + "into_which_snarl", + (class std::tuple (bdsg::SnarlDistanceIndex::*)( + const long long &, const bool &) const) & + bdsg::SnarlDistanceIndex::into_which_snarl, + "If this node id and orientation is pointing into a snarl, then return " + "the start.\nnode id and orientation pointing into the snarl, and if " + "the snarl is trivial.\nReturns <0, false, false> if it doesn't point " + "into a snarl.\n\nC++: " + "bdsg::SnarlDistanceIndex::into_which_snarl(const long long &, const " + "bool &) const --> class std::tuple", + pybind11::arg("id"), pybind11::arg("reverse")); + cl.def("is_ordered_in_chain", + (bool (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &, + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::is_ordered_in_chain, + "Return true if child1 comes before child2 in the chain. \n\nC++: " + "bdsg::SnarlDistanceIndex::is_ordered_in_chain(const struct " + "handlegraph::net_handle_t &, const struct " + "handlegraph::net_handle_t &) const --> bool", + pybind11::arg("child1"), pybind11::arg("child2")); + cl.def("is_externally_start_end_connected", + (bool (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t) const) & + bdsg::SnarlDistanceIndex::is_externally_start_end_connected, + "C++: " + "bdsg::SnarlDistanceIndex::is_externally_start_end_connected(const " + "struct handlegraph::net_handle_t) const --> bool", + pybind11::arg("net")); + cl.def("is_externally_start_start_connected", + (bool (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t) const) & + bdsg::SnarlDistanceIndex::is_externally_start_start_connected, + "C++: " + "bdsg::SnarlDistanceIndex::is_externally_start_start_connected(" + "const struct handlegraph::net_handle_t) const --> bool", + pybind11::arg("net")); + cl.def( + "is_externally_end_end_connected", + (bool (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t) const) & + bdsg::SnarlDistanceIndex::is_externally_end_end_connected, + "C++: bdsg::SnarlDistanceIndex::is_externally_end_end_connected(const " + "struct handlegraph::net_handle_t) const --> bool", + pybind11::arg("net")); + cl.def( + "lowest_common_ancestor", + (struct std::pair ( + bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &, + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::lowest_common_ancestor, + "For two net handles, get a net handle lowest common ancestor.\nIf the " + "lowest common ancestor is the root, then the two handles may be " + "in\ndifferent connected components. In this case, return " + "false.\n\nC++: bdsg::SnarlDistanceIndex::lowest_common_ancestor(const " + "struct handlegraph::net_handle_t &, const struct " + "handlegraph::net_handle_t &) const --> struct std::pair", + pybind11::arg("net1"), pybind11::arg("net2")); + cl.def("node_length", + (unsigned long (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::node_length, + "Return the length of the net, which must represent a node (or " + "sentinel of a snarl)\n\nC++: " + "bdsg::SnarlDistanceIndex::node_length(const struct " + "handlegraph::net_handle_t &) const --> unsigned long", + pybind11::arg("net")); + cl.def("minimum_length", + (unsigned long (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::minimum_length, + "This is also the length of a net, but it can also be a snarl or " + "chain. \nThe length of a chain includes the boundary nodes, a " + "snarl does not.\nA looping chain only includes the start/end node " + "once\n\nC++: bdsg::SnarlDistanceIndex::minimum_length(const struct " + "handlegraph::net_handle_t &) const --> unsigned long", + pybind11::arg("net")); + cl.def("maximum_length", + (unsigned long (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::maximum_length, + "C++: bdsg::SnarlDistanceIndex::maximum_length(const struct " + "handlegraph::net_handle_t &) const --> unsigned long", + pybind11::arg("net")); + cl.def("chain_minimum_length", + (unsigned long (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::chain_minimum_length, + "The length of a chain. If it is a multicomponent chain, then the " + "length of \nthe last component, which is used for calculating " + "distance, instead of inf \n\nC++: " + "bdsg::SnarlDistanceIndex::chain_minimum_length(const struct " + "handlegraph::net_handle_t &) const --> unsigned long", + pybind11::arg("net")); + cl.def("node_id", + (long long (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::node_id, + "What is the node id of the node represented by this net " + "handle.\nnet must be a node or a sentinel\n\nC++: " + "bdsg::SnarlDistanceIndex::node_id(const struct " + "handlegraph::net_handle_t &) const --> long long", + pybind11::arg("net")); + cl.def("has_node", + (bool (bdsg::SnarlDistanceIndex::*)(const long long) const) & + bdsg::SnarlDistanceIndex::has_node, + "Does the graph have this node?\n\nC++: " + "bdsg::SnarlDistanceIndex::has_node(const long long) const --> bool", + pybind11::arg("id")); + cl.def("is_reversed_in_parent", + (bool (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::is_reversed_in_parent, + "Only really relevant for nodes in chains, is the node\ntraversed " + "backwards relative to the orientation of the chain\n\nC++: " + "bdsg::SnarlDistanceIndex::is_reversed_in_parent(const struct " + "handlegraph::net_handle_t &) const --> bool", + pybind11::arg("net")); + cl.def( + "get_node_net_handle", + [](bdsg::SnarlDistanceIndex const &o, const long long &a0) + -> handlegraph::net_handle_t { return o.get_node_net_handle(a0); }, + "", pybind11::arg("id")); + cl.def( + "get_node_net_handle", + (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)( + const long long, bool) const) & + bdsg::SnarlDistanceIndex::get_node_net_handle, + "Get a net handle from a node and, optionally, an orientation\n\nC++: " + "bdsg::SnarlDistanceIndex::get_node_net_handle(const long long, bool) " + "const --> struct handlegraph::net_handle_t", + pybind11::arg("id"), pybind11::arg("rev")); + cl.def("get_max_tree_depth", + (unsigned long (bdsg::SnarlDistanceIndex::*)() const) & + bdsg::SnarlDistanceIndex::get_max_tree_depth, + "How deep is the snarl tree? The root is 0, top-level chain is 1, " + "etc\nOnly counts chains\n\nC++: " + "bdsg::SnarlDistanceIndex::get_max_tree_depth() const --> unsigned " + "long"); + cl.def( + "get_depth", + (unsigned long (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::get_depth, + "What is the depth of this net handle? Nodes and snarls get the depth " + "of their parent.\nThe depth of the root is 0, the depth of its child " + "chains is 1, the depth of the nodes and snarls that are \nchildren of " + "those chains is also 1, and the chains that are children of those " + "snarls have depth 2\n\nC++: bdsg::SnarlDistanceIndex::get_depth(const " + "struct handlegraph::net_handle_t &) const --> unsigned long", + pybind11::arg("net")); + cl.def( + "get_connected_component_number", + (unsigned long (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::get_connected_component_number, + "C++: bdsg::SnarlDistanceIndex::get_connected_component_number(const " + "struct handlegraph::net_handle_t &) const --> unsigned long", + pybind11::arg("net")); + cl.def("get_handle_from_connected_component", + (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)( + unsigned long) const) & + bdsg::SnarlDistanceIndex::get_handle_from_connected_component, + "Given the connected component number (from " + "get_connected_component_number), get the\nroot-level handle " + "pointing to it.\nIf the connected component is a root-level snarl, " + "then this may return a \"root\" handle,\nbut it will actually " + "point to the snarl\n\nC++: " + "bdsg::SnarlDistanceIndex::get_handle_from_connected_component(" + "unsigned long) const --> struct handlegraph::net_handle_t", + pybind11::arg("num")); + cl.def("has_connectivity", + (bool (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &, + enum handlegraph::SnarlDecomposition::endpoint_t, + enum handlegraph::SnarlDecomposition::endpoint_t) const) & + bdsg::SnarlDistanceIndex::has_connectivity, + "Is there a path between the start and end endpoints within the net " + "handle?\n\nC++: bdsg::SnarlDistanceIndex::has_connectivity(const " + "struct handlegraph::net_handle_t &, enum " + "handlegraph::SnarlDecomposition::endpoint_t, enum " + "handlegraph::SnarlDecomposition::endpoint_t) const --> bool", + pybind11::arg("net"), pybind11::arg("start"), pybind11::arg("end")); + cl.def("has_external_connectivity", + (bool (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &, + enum handlegraph::SnarlDecomposition::endpoint_t, + enum handlegraph::SnarlDecomposition::endpoint_t) const) & + bdsg::SnarlDistanceIndex::has_external_connectivity, + "Is there a path between the start and end endpoints outside the " + "net handle?\nThis is used for children of the root\n\nC++: " + "bdsg::SnarlDistanceIndex::has_external_connectivity(const struct " + "handlegraph::net_handle_t &, enum " + "handlegraph::SnarlDecomposition::endpoint_t, enum " + "handlegraph::SnarlDecomposition::endpoint_t) const --> bool", + pybind11::arg("net"), pybind11::arg("start"), pybind11::arg("end")); + cl.def("get_prefix_sum_value", + (unsigned long (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::get_prefix_sum_value, + "Get the prefix sum value for a node in a chain.\nFails if the " + "parent of net is not a chain\n\nC++: " + "bdsg::SnarlDistanceIndex::get_prefix_sum_value(const struct " + "handlegraph::net_handle_t &) const --> unsigned long", + pybind11::arg("net")); + cl.def("get_max_prefix_sum_value", + (unsigned long (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::get_max_prefix_sum_value, + "Get the maximum prefix sum value for a node in a chain.\nFails if " + "the parent of net is not a chain\n\nC++: " + "bdsg::SnarlDistanceIndex::get_max_prefix_sum_value(const struct " + "handlegraph::net_handle_t &) const --> unsigned long", + pybind11::arg("net")); + cl.def("get_forward_loop_value", + (unsigned long (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::get_forward_loop_value, + "Get the forward loop value for a node in a chain.\nFails if the " + "parent of net is not a chain\n\nC++: " + "bdsg::SnarlDistanceIndex::get_forward_loop_value(const struct " + "handlegraph::net_handle_t &) const --> unsigned long", + pybind11::arg("net")); + cl.def("get_reverse_loop_value", + (unsigned long (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::get_reverse_loop_value, + "Get the reverse value for a node in a chain.\nFails if the parent " + "of net is not a chain\n\nC++: " + "bdsg::SnarlDistanceIndex::get_reverse_loop_value(const struct " + "handlegraph::net_handle_t &) const --> unsigned long", + pybind11::arg("net")); + cl.def( + "get_chain_component", + [](bdsg::SnarlDistanceIndex const &o, + const struct handlegraph::net_handle_t &a0) -> unsigned long { + return o.get_chain_component(a0); + }, + "", pybind11::arg("net")); + cl.def("get_chain_component", + (unsigned long (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &, bool) const) & + bdsg::SnarlDistanceIndex::get_chain_component, + "C++: bdsg::SnarlDistanceIndex::get_chain_component(const struct " + "handlegraph::net_handle_t &, bool) const --> unsigned long", + pybind11::arg("net"), pybind11::arg("get_end")); + cl.def( + "get_root", + (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)() + const) & + bdsg::SnarlDistanceIndex::get_root, + "Get a net handle referring to a tip-to-tip traversal of the contents " + "of the root snarl.\n\nC++: bdsg::SnarlDistanceIndex::get_root() const " + "--> struct handlegraph::net_handle_t"); + cl.def("is_root", + (bool (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::is_root, + "Return true if the given handle refers to (a traversal of) the " + "root\nsnarl, and false otherwise.\n\nC++: " + "bdsg::SnarlDistanceIndex::is_root(const struct " + "handlegraph::net_handle_t &) const --> bool", + pybind11::arg("net")); + cl.def("is_root_snarl", + (bool (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::is_root_snarl, + "Return true if the given handle refers to (a traversal of) a snarl " + "of the root,\nwhich is considered to be the root but actually " + "refers to a subset of the children \nof the root that are " + "connected\n\nC++: bdsg::SnarlDistanceIndex::is_root_snarl(const " + "struct handlegraph::net_handle_t &) const --> bool", + pybind11::arg("net")); + cl.def("is_snarl", + (bool (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::is_snarl, + "Returns true if the given net handle refers to (a traversal of) a " + "snarl.\n\nC++: bdsg::SnarlDistanceIndex::is_snarl(const struct " + "handlegraph::net_handle_t &) const --> bool", + pybind11::arg("net")); + cl.def("is_dag", + (bool (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::is_dag, + "Return true if the given snarl is a DAG and false " + "otherwise\nReturns true if the given net_handle_t is not a " + "snarl\n\nC++: bdsg::SnarlDistanceIndex::is_dag(const struct " + "handlegraph::net_handle_t &) const --> bool", + pybind11::arg("snarl")); + cl.def("is_simple_snarl", + (bool (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::is_simple_snarl, + "Returns true if the given net handle refers to (a traversal of) a " + "simple snarl\nA simple snarl is a bubble where each child node can " + "only reach the boundary nodes,\nand each side of a node reaches a " + "different boundary node\nThere may also be an edge connecting the " + "two boundary nodes but no additional \nedges are allowed\n\nC++: " + "bdsg::SnarlDistanceIndex::is_simple_snarl(const struct " + "handlegraph::net_handle_t &) const --> bool", + pybind11::arg("net")); + cl.def( + "is_regular_snarl", + (bool (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::is_regular_snarl, + "Returns true if the given net handle refers to (a traversal of) a " + "regular snarl\nA regular snarl is the same as a simple snarl, except " + "that the children may be\nnested chains, rather than being restricted " + "to nodes \n\nC++: bdsg::SnarlDistanceIndex::is_regular_snarl(const " + "struct handlegraph::net_handle_t &) const --> bool", + pybind11::arg("net")); + cl.def("is_chain", + (bool (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::is_chain, + "Returns true if the given net handle refers to (a traversal of) a " + "chain.\n\nC++: bdsg::SnarlDistanceIndex::is_chain(const struct " + "handlegraph::net_handle_t &) const --> bool", + pybind11::arg("net")); + cl.def("is_multicomponent_chain", + (bool (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::is_multicomponent_chain, + "Returns true if the given net handle refers to (a traversal of) a " + "chain that is not start-end connected\n\nC++: " + "bdsg::SnarlDistanceIndex::is_multicomponent_chain(const struct " + "handlegraph::net_handle_t &) const --> bool", + pybind11::arg("net")); + cl.def("is_looping_chain", + (bool (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::is_looping_chain, + "Returns true if the given net handle refers to (a traversal of) a " + "chain that loops (a chain where the first and last node are the " + "same).\n\nC++: bdsg::SnarlDistanceIndex::is_looping_chain(const " + "struct handlegraph::net_handle_t &) const --> bool", + pybind11::arg("net")); + cl.def("is_trivial_chain", + (bool (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::is_trivial_chain, + "Returns true if the given net handle refers to (a traversal of) a " + "trivial chain that represents a single node.\n\nC++: " + "bdsg::SnarlDistanceIndex::is_trivial_chain(const struct " + "handlegraph::net_handle_t &) const --> bool", + pybind11::arg("net")); + cl.def("is_node", + (bool (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::is_node, + "Returns true if the given net handle refers to (a traversal of) a " + "single node, and thus has a corresponding handle_t.\n\nC++: " + "bdsg::SnarlDistanceIndex::is_node(const struct " + "handlegraph::net_handle_t &) const --> bool", + pybind11::arg("net")); + cl.def("is_sentinel", + (bool (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::is_sentinel, + "Return true if the given net handle is a snarl bound sentinel (in " + "either\ninward or outward orientation), and false " + "otherwise.\n\nC++: bdsg::SnarlDistanceIndex::is_sentinel(const " + "struct handlegraph::net_handle_t &) const --> bool", + pybind11::arg("net")); + cl.def( + "get_net", + (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::handle_t &, + const class handlegraph::HandleGraph *) const) & + bdsg::SnarlDistanceIndex::get_net, + "Turn a handle to an oriented node into a net handle for a " + "start-to-end or end-to-start traversal of the node, as " + "appropriate.\n\nC++: bdsg::SnarlDistanceIndex::get_net(const struct " + "handlegraph::handle_t &, const class handlegraph::HandleGraph *) " + "const --> struct handlegraph::net_handle_t", + pybind11::arg("handle"), pybind11::arg("graph")); + cl.def( + "get_handle", + (struct handlegraph::handle_t (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &, + const class handlegraph::HandleGraph *) const) & + bdsg::SnarlDistanceIndex::get_handle, + "For a net handle to a traversal of a single node, get the handle for " + "that node in the orientation it is traversed.\nMay not be called for " + "other net handles.\n\nC++: bdsg::SnarlDistanceIndex::get_handle(const " + "struct handlegraph::net_handle_t &, const class " + "handlegraph::HandleGraph *) const --> struct handlegraph::handle_t", + pybind11::arg("net"), pybind11::arg("graph")); + cl.def( + "get_parent", + (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::get_parent, + "Get the parent snarl of a chain, or the parent chain of a snarl or " + "node.\nIf the child is start-to-end or end-to-start, and the parent " + "is a chain,\nthe chain comes out facing the same way, accounting for " + "the relative\norientation of the child snarl or node in the chain. " + "Otherwise,\neverything is produced as start-to-end, even if that is " + "not actually a\nrealizable traversal of a snarl or chain. May not be " + "called on the root\nsnarl.\n\nAlso works on snarl boundary " + "sentinels.\n\nC++: bdsg::SnarlDistanceIndex::get_parent(const struct " + "handlegraph::net_handle_t &) const --> struct " + "handlegraph::net_handle_t", + pybind11::arg("child")); + cl.def("get_bound", + (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &, bool, bool) const) & + bdsg::SnarlDistanceIndex::get_bound, + "Get the bounding handle for the snarl or chain referenced by the " + "given\nnet handle, getting the start or end facing in or out as " + "appropriate.\n\nFor snarls, returns the bounding sentinel net " + "handles. For chains,\nreturns net handles for traversals of the " + "bounding nodes of the chain.\nIf the chain is a looping chain, " + "then the start and end of the chain\nare the same, so the " + "connectivity of the bound indicates which we're\nlooking at; the " + "connectivity will be start-start if it is going \nbackwards in the " + "node, and end-end if it is going forwards.\n\nIgnores traversal " + "type.\n\nMay not be called on traversals of individual " + "nodes.\n\nC++: bdsg::SnarlDistanceIndex::get_bound(const struct " + "handlegraph::net_handle_t &, bool, bool) const --> struct " + "handlegraph::net_handle_t", + pybind11::arg("snarl"), pybind11::arg("get_end"), + pybind11::arg("face_in")); + cl.def("get_node_from_sentinel", + (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::get_node_from_sentinel, + "Given the sentinel of a snarl, return a handle to the node " + "representing it\n\nC++: " + "bdsg::SnarlDistanceIndex::get_node_from_sentinel(const struct " + "handlegraph::net_handle_t &) const --> struct " + "handlegraph::net_handle_t", + pybind11::arg("sentinel")); + cl.def( + "flip", + (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::flip, + "Return a net handle to the same snarl/chain/node in the opposite " + "orientation.\nNo effect on tip-to-tip, start-to-start, or end-to-end " + "net handles. Flips all the others.\n\nC++: " + "bdsg::SnarlDistanceIndex::flip(const struct handlegraph::net_handle_t " + "&) const --> struct handlegraph::net_handle_t", + pybind11::arg("net")); + cl.def("canonical", + (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::canonical, + "Get a canonical traversal handle from any net handle. All handles " + "to the\nsame net graph element have the same canonical traversal. " + "That canonical\ntraversal must be realizable, and might not always " + "be start-to-end or\neven consistently be the same kind of " + "traversal for different snarls,\nchains, or nodes. Mostly useful " + "to normalize for equality comparisons.\n\nAny root snarl will " + "become just the root\nAnything without connectivity will get " + "START_END\n\nC++: bdsg::SnarlDistanceIndex::canonical(const struct " + "handlegraph::net_handle_t &) const --> struct " + "handlegraph::net_handle_t", + pybind11::arg("net")); + cl.def("start_end_traversal_of", + (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::start_end_traversal_of, + "Makes a start-end traversal of the net.\nFaster than canonical " + "because it doesn't check the index for anything \n\nC++: " + "bdsg::SnarlDistanceIndex::start_end_traversal_of(const struct " + "handlegraph::net_handle_t &) const --> struct " + "handlegraph::net_handle_t", + pybind11::arg("net")); + cl.def("starts_at", + (enum handlegraph::SnarlDecomposition::endpoint_t ( + bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::starts_at, + "Return the kind of location at which the given traversal " + "starts.\n\nC++: bdsg::SnarlDistanceIndex::starts_at(const struct " + "handlegraph::net_handle_t &) const --> enum " + "handlegraph::SnarlDecomposition::endpoint_t", + pybind11::arg("traversal")); + cl.def("ends_at", + (enum handlegraph::SnarlDecomposition::endpoint_t ( + bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::ends_at, + "Return the kind of location at which the given traversal " + "ends.\n\nC++: bdsg::SnarlDistanceIndex::ends_at(const struct " + "handlegraph::net_handle_t &) const --> enum " + "handlegraph::SnarlDecomposition::endpoint_t", + pybind11::arg("traversal")); + cl.def( + "get_rank_in_parent", + (unsigned long (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::get_rank_in_parent, + "For a child of a snarl, the rank is used to calculate the " + "distance\n\nC++: bdsg::SnarlDistanceIndex::get_rank_in_parent(const " + "struct handlegraph::net_handle_t &) const --> unsigned long", + pybind11::arg("net")); + cl.def("connected_component_count", + (unsigned long (bdsg::SnarlDistanceIndex::*)() const) & + bdsg::SnarlDistanceIndex::connected_component_count, + "How many connected components are in this graph?\nThis returns the " + "number of topological connected components, not necessarily the " + "\nnumber of nodes in the top-level snarl \n\nC++: " + "bdsg::SnarlDistanceIndex::connected_component_count() const --> " + "unsigned long"); + cl.def("get_snarl_child_from_rank", + (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &, const unsigned long &) + const) & + bdsg::SnarlDistanceIndex::get_snarl_child_from_rank, + "Get the child of a snarl from its rank. This shouldn't be exposed " + "to the public interface but I need it\nPlease don't use it\nFor 0 " + "or 1, returns the sentinel facing in. Otherwise return the child " + "as a chain going START_END\n\nC++: " + "bdsg::SnarlDistanceIndex::get_snarl_child_from_rank(const struct " + "handlegraph::net_handle_t &, const unsigned long &) const --> " + "struct handlegraph::net_handle_t", + pybind11::arg("snarl"), pybind11::arg("rank")); + cl.def( + "get_parent_traversal", + (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &, + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::get_parent_traversal, + "Get a net handle for traversals of a snarl or chain that " + "contains\nthe given oriented bounding node traversals or sentinels. " + "Given two\nsentinels for a snarl, produces a net handle to a " + "start-to-end,\nend-to-end, end-to-start, or start-to-start traversal " + "of that snarl.\nGiven handles to traversals of the bounding nodes of " + "a chain, similarly\nproduces a net handle to a traversal of the " + "chain.\n\nFor a chain, either or both handles can also be a snarl " + "containing tips,\nfor a tip-to-start, tip-to-end, start-to-tip, " + "end-to-tip, or tip-to-tip\ntraversal. Similarly, for a snarl, either " + "or both handles can be a chain\nin the snarl that contains internal " + "tips, or that has no edges on the\nappropriate end.\n\nMay only be " + "called if a path actually exists between the given start\nand " + "end.\n\nC++: bdsg::SnarlDistanceIndex::get_parent_traversal(const " + "struct handlegraph::net_handle_t &, const struct " + "handlegraph::net_handle_t &) const --> struct " + "handlegraph::net_handle_t", + pybind11::arg("traversal_start"), pybind11::arg("traversal_end")); + cl.def_static("has_distances", + (bool (*)(enum bdsg::SnarlDistanceIndex::record_t))&bdsg:: + SnarlDistanceIndex::has_distances, + "C++: bdsg::SnarlDistanceIndex::has_distances(enum " + "bdsg::SnarlDistanceIndex::record_t) --> const bool", + pybind11::arg("type")); + cl.def_static( + "get_record_handle_type", + (const enum bdsg::SnarlDistanceIndex::net_handle_record_t (*)( + enum bdsg::SnarlDistanceIndex::record_t))&bdsg::SnarlDistanceIndex:: + get_record_handle_type, + "Given the type of the record, return the handle type. Some record " + "types can represent multiple things,\nfor example a simple snarl " + "record is used to represent a snarl, and the nodes/trivial chains in " + "it.\nThis will return whatever is higher on the snarl tree. A simple " + "snarl will be considered a snarl,\na root snarl will be considered a " + "root, etc\n\nC++: " + "bdsg::SnarlDistanceIndex::get_record_handle_type(enum " + "bdsg::SnarlDistanceIndex::record_t) --> const enum " + "bdsg::SnarlDistanceIndex::net_handle_record_t", + pybind11::arg("type")); + cl.def_static("get_record_offset", + (const unsigned long (*)( + const struct handlegraph::net_handle_t + &))&bdsg::SnarlDistanceIndex::get_record_offset, + "The offset into records that this handle points to\n\nC++: " + "bdsg::SnarlDistanceIndex::get_record_offset(const struct " + "handlegraph::net_handle_t &) --> const unsigned long", + pybind11::arg("net_handle")); + cl.def_static("get_node_record_offset", + (const unsigned long (*)( + const struct handlegraph::net_handle_t + &))&bdsg::SnarlDistanceIndex::get_node_record_offset, + "The offset of a node in a trivial snarl (0 if it isn't a " + "node in a trivial snarl)\n\nC++: " + "bdsg::SnarlDistanceIndex::get_node_record_offset(const " + "struct handlegraph::net_handle_t &) --> const unsigned long", + pybind11::arg("net_handle")); + cl.def_static("get_connectivity", + (const enum bdsg::SnarlDistanceIndex::connectivity_t (*)( + const struct handlegraph::net_handle_t + &))&bdsg::SnarlDistanceIndex::get_connectivity, + "C++: bdsg::SnarlDistanceIndex::get_connectivity(const " + "struct handlegraph::net_handle_t &) --> const enum " + "bdsg::SnarlDistanceIndex::connectivity_t", + pybind11::arg("net_handle")); + cl.def_static("get_handle_type", + (const enum bdsg::SnarlDistanceIndex::net_handle_record_t (*)( + const struct handlegraph::net_handle_t + &))&bdsg::SnarlDistanceIndex::get_handle_type, + "C++: bdsg::SnarlDistanceIndex::get_handle_type(const struct " + "handlegraph::net_handle_t &) --> const enum " + "bdsg::SnarlDistanceIndex::net_handle_record_t", + pybind11::arg("net_handle")); + cl.def_static( + "get_net_handle_from_values", + [](unsigned long const &a0, + enum bdsg::SnarlDistanceIndex::connectivity_t const &a1, + enum bdsg::SnarlDistanceIndex::net_handle_record_t const &a2) + -> const handlegraph::net_handle_t { + return bdsg::SnarlDistanceIndex::get_net_handle_from_values(a0, a1, + a2); + }, + "", pybind11::arg("pointer"), pybind11::arg("connectivity"), + pybind11::arg("type")); + cl.def_static( + "get_net_handle_from_values", + (const struct handlegraph::net_handle_t (*)( + unsigned long, enum bdsg::SnarlDistanceIndex::connectivity_t, + enum bdsg::SnarlDistanceIndex::net_handle_record_t, + unsigned long))&bdsg::SnarlDistanceIndex:: + get_net_handle_from_values, + "C++: bdsg::SnarlDistanceIndex::get_net_handle_from_values(unsigned " + "long, enum bdsg::SnarlDistanceIndex::connectivity_t, enum " + "bdsg::SnarlDistanceIndex::net_handle_record_t, unsigned long) --> " + "const struct handlegraph::net_handle_t", + pybind11::arg("pointer"), pybind11::arg("connectivity"), + pybind11::arg("type"), pybind11::arg("node_offset")); + cl.def("get_net_handle", + (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)( + unsigned long, enum bdsg::SnarlDistanceIndex::connectivity_t) + const) & + bdsg::SnarlDistanceIndex::get_net_handle, + "C++: bdsg::SnarlDistanceIndex::get_net_handle(unsigned long, enum " + "bdsg::SnarlDistanceIndex::connectivity_t) const --> struct " + "handlegraph::net_handle_t", + pybind11::arg("pointer"), pybind11::arg("connectivity")); + cl.def("get_net_handle", + (struct handlegraph::net_handle_t (bdsg::SnarlDistanceIndex::*)( + unsigned long) const) & + bdsg::SnarlDistanceIndex::get_net_handle, + "C++: bdsg::SnarlDistanceIndex::get_net_handle(unsigned long) const " + "--> struct handlegraph::net_handle_t", + pybind11::arg("pointer")); + cl.def_static( + "get_node_pointer_offset", + (const unsigned long (*)( + const long long &, const long long &, + unsigned long))&bdsg::SnarlDistanceIndex::get_node_pointer_offset, + "Get the offset into snarl_tree_records for the pointer to a node " + "record.\n\nC++: " + "bdsg::SnarlDistanceIndex::get_node_pointer_offset(const long long &, " + "const long long &, unsigned long) --> const unsigned long", + pybind11::arg("id"), pybind11::arg("min_node_id"), + pybind11::arg("component_count")); + cl.def_static( + "endpoints_to_connectivity", + (const enum bdsg::SnarlDistanceIndex::connectivity_t (*)( + enum handlegraph::SnarlDecomposition::endpoint_t, + enum handlegraph::SnarlDecomposition::endpoint_t))&bdsg:: + SnarlDistanceIndex::endpoints_to_connectivity, + "C++: bdsg::SnarlDistanceIndex::endpoints_to_connectivity(enum " + "handlegraph::SnarlDecomposition::endpoint_t, enum " + "handlegraph::SnarlDecomposition::endpoint_t) --> const enum " + "bdsg::SnarlDistanceIndex::connectivity_t", + pybind11::arg("start"), pybind11::arg("end")); + cl.def_static("get_start_endpoint", + (enum handlegraph::SnarlDecomposition::endpoint_t (*)( + enum bdsg::SnarlDistanceIndex::connectivity_t))&bdsg:: + SnarlDistanceIndex::get_start_endpoint, + "C++: bdsg::SnarlDistanceIndex::get_start_endpoint(enum " + "bdsg::SnarlDistanceIndex::connectivity_t) --> const enum " + "handlegraph::SnarlDecomposition::endpoint_t", + pybind11::arg("connectivity")); + cl.def_static("get_start_endpoint", + (enum handlegraph::SnarlDecomposition::endpoint_t (*)( + struct handlegraph::net_handle_t))&bdsg:: + SnarlDistanceIndex::get_start_endpoint, + "C++: bdsg::SnarlDistanceIndex::get_start_endpoint(struct " + "handlegraph::net_handle_t) --> const enum " + "handlegraph::SnarlDecomposition::endpoint_t", + pybind11::arg("net")); + cl.def_static("get_end_endpoint", + (enum handlegraph::SnarlDecomposition::endpoint_t (*)( + enum bdsg::SnarlDistanceIndex::connectivity_t))&bdsg:: + SnarlDistanceIndex::get_end_endpoint, + "C++: bdsg::SnarlDistanceIndex::get_end_endpoint(enum " + "bdsg::SnarlDistanceIndex::connectivity_t) --> const enum " + "handlegraph::SnarlDecomposition::endpoint_t", + pybind11::arg("connectivity")); + cl.def_static("get_end_endpoint", + (enum handlegraph::SnarlDecomposition::endpoint_t (*)( + const struct handlegraph::net_handle_t + &))&bdsg::SnarlDistanceIndex::get_end_endpoint, + "C++: bdsg::SnarlDistanceIndex::get_end_endpoint(const " + "struct handlegraph::net_handle_t &) --> const enum " + "handlegraph::SnarlDecomposition::endpoint_t", + pybind11::arg("net")); + cl.def_static( + "connectivity_to_endpoints", + (const struct std::pair< + enum handlegraph::SnarlDecomposition::endpoint_t, + enum handlegraph::SnarlDecomposition::endpoint_t> (*)( + const enum bdsg::SnarlDistanceIndex::connectivity_t + &))&bdsg::SnarlDistanceIndex::connectivity_to_endpoints, + "C++: bdsg::SnarlDistanceIndex::connectivity_to_endpoints(const enum " + "bdsg::SnarlDistanceIndex::connectivity_t &) --> const struct " + "std::pair", + pybind11::arg("connectivity")); + cl.def("set_snarl_size_limit", + (void (bdsg::SnarlDistanceIndex::*)( + unsigned long))&bdsg::SnarlDistanceIndex::set_snarl_size_limit, + "C++: bdsg::SnarlDistanceIndex::set_snarl_size_limit(unsigned long) " + "--> void", + pybind11::arg("size")); + cl.def( + "set_only_top_level_chain_distances", + (void (bdsg::SnarlDistanceIndex::*)( + bool))&bdsg::SnarlDistanceIndex::set_only_top_level_chain_distances, + "C++: " + "bdsg::SnarlDistanceIndex::set_only_top_level_chain_distances(bool) " + "--> void", + pybind11::arg("only_chain")); + cl.def("net_handle_as_string", + (std::string (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &) const) & + bdsg::SnarlDistanceIndex::net_handle_as_string, + "C++: bdsg::SnarlDistanceIndex::net_handle_as_string(const struct " + "handlegraph::net_handle_t &) const --> std::string", + pybind11::arg("net")); + cl.def("traverse_decomposition", + (bool (bdsg::SnarlDistanceIndex::*)( + const class std::function &, + const class std::function &, + const class std::function &) const) & + bdsg::SnarlDistanceIndex::traverse_decomposition, + "C++: bdsg::SnarlDistanceIndex::traverse_decomposition(const class " + "std::function &, " + "const class std::function &, const class std::function &) const --> bool", + pybind11::arg("snarl_iteratee"), pybind11::arg("chain_iteratee"), + pybind11::arg("node_iteratee")); + cl.def("traverse_decomposition_helper", + (bool (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t &, + const class std::function &, + const class std::function &, + const class std::function &) const) & + bdsg::SnarlDistanceIndex::traverse_decomposition_helper, + "C++: bdsg::SnarlDistanceIndex::traverse_decomposition_helper(const " + "struct handlegraph::net_handle_t &, const class std::function &, const class " + "std::function &, " + "const class std::function &) const --> bool", + pybind11::arg("net"), pybind11::arg("snarl_iteratee"), + pybind11::arg("chain_iteratee"), pybind11::arg("node_iteratee")); + cl.def("print_self", + (void (bdsg::SnarlDistanceIndex::*)() const) & + bdsg::SnarlDistanceIndex::print_self, + "C++: bdsg::SnarlDistanceIndex::print_self() const --> void"); + cl.def("print_descendants_of", + (void (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t) const) & + bdsg::SnarlDistanceIndex::print_descendants_of, + "C++: bdsg::SnarlDistanceIndex::print_descendants_of(const struct " + "handlegraph::net_handle_t) const --> void", + pybind11::arg("net")); + cl.def("print_snarl_stats", + (void (bdsg::SnarlDistanceIndex::*)() const) & + bdsg::SnarlDistanceIndex::print_snarl_stats, + "C++: bdsg::SnarlDistanceIndex::print_snarl_stats() const --> void"); + cl.def( + "write_snarls_to_json", + (void (bdsg::SnarlDistanceIndex::*)() const) & + bdsg::SnarlDistanceIndex::write_snarls_to_json, + "C++: bdsg::SnarlDistanceIndex::write_snarls_to_json() const --> void"); + cl.def("validate_index", + (void (bdsg::SnarlDistanceIndex::*)() const) & + bdsg::SnarlDistanceIndex::validate_index, + "C++: bdsg::SnarlDistanceIndex::validate_index() const --> void"); + cl.def("validate_descendants_of", + (void (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t) const) & + bdsg::SnarlDistanceIndex::validate_descendants_of, + "C++: bdsg::SnarlDistanceIndex::validate_descendants_of(const " + "struct handlegraph::net_handle_t) const --> void", + pybind11::arg("net")); + cl.def("validate_ancestors_of", + (void (bdsg::SnarlDistanceIndex::*)( + const struct handlegraph::net_handle_t) const) & + bdsg::SnarlDistanceIndex::validate_ancestors_of, + "C++: bdsg::SnarlDistanceIndex::validate_ancestors_of(const struct " + "handlegraph::net_handle_t) const --> void", + pybind11::arg("net")); + cl.def( + "get_usage", + (class std::tuple ( + bdsg::SnarlDistanceIndex::*)())&bdsg::SnarlDistanceIndex::get_usage, + "C++: bdsg::SnarlDistanceIndex::get_usage() --> class " + "std::tuple"); + cl.def_static("sum", + (unsigned long (*)( + const unsigned long &, + const unsigned long &))&bdsg::SnarlDistanceIndex::sum, + "Add integers, returning max() if any of them are " + "max()\n\nC++: bdsg::SnarlDistanceIndex::sum(const unsigned " + "long &, const unsigned long &) --> unsigned long", + pybind11::arg("val1"), pybind11::arg("val2")); + cl.def_static( + "minus", + (unsigned long (*)(unsigned long, + unsigned long))&bdsg::SnarlDistanceIndex::minus, + "C++: bdsg::SnarlDistanceIndex::minus(unsigned long, unsigned long) " + "--> unsigned long", + pybind11::arg("x"), pybind11::arg("y")); + cl.def_static( + "maximum", + (unsigned long (*)(unsigned long, + unsigned long))&bdsg::SnarlDistanceIndex::maximum, + "C++: bdsg::SnarlDistanceIndex::maximum(unsigned long, unsigned long) " + "--> unsigned long", + pybind11::arg("x"), pybind11::arg("y")); + cl.def_static( + "bit_width", + (unsigned long (*)(unsigned long))&bdsg::SnarlDistanceIndex::bit_width, + "C++: bdsg::SnarlDistanceIndex::bit_width(unsigned long) --> unsigned " + "long", + pybind11::arg("value")); + cl.def("time_accesses", + (void (bdsg::SnarlDistanceIndex::*)())&bdsg::SnarlDistanceIndex:: + time_accesses, + "C++: bdsg::SnarlDistanceIndex::time_accesses() --> void"); - { // bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord file:bdsg/snarl_distance_index.hpp line:1546 - auto & enclosing_class = cl; - pybind11::class_, bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryRecord> cl(enclosing_class, "TemporaryChainRecord", ""); - cl.def( pybind11::init( [](){ return new bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord(); } ) ); - cl.def( pybind11::init( [](bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord const &o){ return new bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord(o); } ) ); - cl.def_readwrite("start_node_id", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::start_node_id); - cl.def_readwrite("end_node_id", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::end_node_id); - cl.def_readwrite("end_node_length", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::end_node_length); - cl.def_readwrite("tree_depth", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::tree_depth); - cl.def_readwrite("parent", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::parent); - cl.def_readwrite("min_length", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::min_length); - cl.def_readwrite("max_length", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::max_length); - cl.def_readwrite("distance_left_start", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::distance_left_start); - cl.def_readwrite("distance_right_start", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::distance_right_start); - cl.def_readwrite("distance_left_end", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::distance_left_end); - cl.def_readwrite("distance_right_end", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::distance_right_end); - cl.def_readwrite("rank_in_parent", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::rank_in_parent); - cl.def_readwrite("root_snarl_index", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::root_snarl_index); - cl.def_readwrite("start_node_rev", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::start_node_rev); - cl.def_readwrite("end_node_rev", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::end_node_rev); - cl.def_readwrite("reversed_in_parent", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::reversed_in_parent); - cl.def_readwrite("is_trivial", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::is_trivial); - cl.def_readwrite("is_tip", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::is_tip); - cl.def_readwrite("loopable", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::loopable); - cl.def_readwrite("children", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::children); - cl.def_readwrite("prefix_sum", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::prefix_sum); - cl.def_readwrite("max_prefix_sum", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::max_prefix_sum); - cl.def_readwrite("forward_loops", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::forward_loops); - cl.def_readwrite("backward_loops", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::backward_loops); - cl.def_readwrite("chain_components", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::chain_components); - cl.def("get_max_record_length", (unsigned long (bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::*)(bool) const) &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::get_max_record_length, "C++: bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::get_max_record_length(bool) const --> unsigned long", pybind11::arg("include_distances")); - cl.def("assign", (struct bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord & (bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::*)(const struct bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord &)) &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::operator=, "C++: bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::operator=(const struct bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord &) --> struct bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord &", pybind11::return_value_policy::automatic, pybind11::arg("")); - } + { // bdsg::SnarlDistanceIndex::TemporaryDistanceIndex + // file:bdsg/snarl_distance_index.hpp line:1524 + auto &enclosing_class = cl; + pybind11::class_< + bdsg::SnarlDistanceIndex::TemporaryDistanceIndex, + std::shared_ptr> + cl(enclosing_class, "TemporaryDistanceIndex", ""); + cl.def(pybind11::init([]() { + return new bdsg::SnarlDistanceIndex::TemporaryDistanceIndex(); + })); + cl.def(pybind11::init( + [](bdsg::SnarlDistanceIndex::TemporaryDistanceIndex const &o) { + return new bdsg::SnarlDistanceIndex::TemporaryDistanceIndex(o); + })); + cl.def_readwrite( + "min_node_id", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::min_node_id); + cl.def_readwrite( + "max_node_id", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::max_node_id); + cl.def_readwrite("root_structure_count", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + root_structure_count); + cl.def_readwrite( + "max_tree_depth", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::max_tree_depth); + cl.def_readwrite( + "max_index_size", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::max_index_size); + cl.def_readwrite( + "max_distance", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::max_distance); + cl.def_readwrite( + "components", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::components); + cl.def_readwrite("root_snarl_components", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + root_snarl_components); + cl.def_readwrite("temp_chain_records", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + temp_chain_records); + cl.def_readwrite("temp_snarl_records", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + temp_snarl_records); + cl.def_readwrite( + "temp_node_records", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::temp_node_records); + cl.def_readwrite("use_oversized_snarls", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + use_oversized_snarls); + cl.def("structure_start_end_as_string", + (std::string (bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::*)( + struct std::pair) const) & + bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + structure_start_end_as_string, + "C++: " + "bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::structure_" + "start_end_as_string(struct std::pair) const " + "--> std::string", + pybind11::arg("index")); + cl.def("get_max_record_length", + (unsigned long ( + bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::*)() const) & + bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + get_max_record_length, + "C++: " + "bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::get_max_record_" + "length() const --> unsigned long"); + cl.def( + "assign", + (class bdsg::SnarlDistanceIndex::TemporaryDistanceIndex & + (bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + *)(const class bdsg::SnarlDistanceIndex::TemporaryDistanceIndex + &)) & + bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::operator=, + "C++: " + "bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::operator=(const " + "class bdsg::SnarlDistanceIndex::TemporaryDistanceIndex &) --> class " + "bdsg::SnarlDistanceIndex::TemporaryDistanceIndex &", + pybind11::return_value_policy::automatic, pybind11::arg("")); - { // bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord file:bdsg/snarl_distance_index.hpp line:1588 - auto & enclosing_class = cl; - pybind11::class_, bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryRecord> cl(enclosing_class, "TemporarySnarlRecord", ""); - cl.def( pybind11::init( [](){ return new bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord(); } ) ); - cl.def( pybind11::init( [](bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord const &o){ return new bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord(o); } ) ); - cl.def_readwrite("parent", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::parent); - cl.def_readwrite("start_node_id", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::start_node_id); - cl.def_readwrite("start_node_length", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::start_node_length); - cl.def_readwrite("end_node_id", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::end_node_id); - cl.def_readwrite("end_node_length", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::end_node_length); - cl.def_readwrite("node_count", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::node_count); - cl.def_readwrite("min_length", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::min_length); - cl.def_readwrite("max_length", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::max_length); - cl.def_readwrite("max_distance", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::max_distance); - cl.def_readwrite("tree_depth", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::tree_depth); - cl.def_readwrite("distance_start_start", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::distance_start_start); - cl.def_readwrite("distance_end_end", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::distance_end_end); - cl.def_readwrite("rank_in_parent", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::rank_in_parent); - cl.def_readwrite("reversed_in_parent", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::reversed_in_parent); - cl.def_readwrite("start_node_rev", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::start_node_rev); - cl.def_readwrite("end_node_rev", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::end_node_rev); - cl.def_readwrite("is_trivial", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::is_trivial); - cl.def_readwrite("is_simple", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::is_simple); - cl.def_readwrite("is_tip", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::is_tip); - cl.def_readwrite("is_root_snarl", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::is_root_snarl); - cl.def_readwrite("include_distances", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::include_distances); - cl.def_readwrite("children", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::children); - cl.def_readwrite("tippy_child_ranks", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::tippy_child_ranks); - cl.def_readwrite("distances", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::distances); - cl.def("get_max_record_length", (unsigned long (bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::*)() const) &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::get_max_record_length, "C++: bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::get_max_record_length() const --> unsigned long"); - cl.def("assign", (struct bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord & (bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::*)(const struct bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord &)) &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::operator=, "C++: bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::operator=(const struct bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord &) --> struct bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord &", pybind11::return_value_policy::automatic, pybind11::arg("")); - } + { // bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryRecord + // file:bdsg/snarl_distance_index.hpp line:1544 + auto &enclosing_class = cl; + pybind11::class_< + bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryRecord, + std::shared_ptr> + cl(enclosing_class, "TemporaryRecord", ""); + cl.def( + pybind11::init([](bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryRecord const &o) { + return new bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryRecord(o); + })); + cl.def(pybind11::init([]() { + return new bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryRecord(); + })); + cl.def("assign", + (struct bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryRecord & + (bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryRecord::*)(const struct bdsg::SnarlDistanceIndex:: + TemporaryDistanceIndex:: + TemporaryRecord &)) & + bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryRecord::operator=, + "C++: " + "bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::" + "TemporaryRecord::operator=(const struct " + "bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::" + "TemporaryRecord &) --> struct " + "bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::" + "TemporaryRecord &", + pybind11::return_value_policy::automatic, pybind11::arg("")); + } - { // bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord file:bdsg/snarl_distance_index.hpp line:1621 - auto & enclosing_class = cl; - pybind11::class_, bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryRecord> cl(enclosing_class, "TemporaryNodeRecord", ""); - cl.def( pybind11::init( [](){ return new bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord(); } ) ); - cl.def( pybind11::init( [](bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord const &o){ return new bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord(o); } ) ); - cl.def_readwrite("node_id", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord::node_id); - cl.def_readwrite("parent", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord::parent); - cl.def_readwrite("node_length", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord::node_length); - cl.def_readwrite("rank_in_parent", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord::rank_in_parent); - cl.def_readwrite("root_snarl_index", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord::root_snarl_index); - cl.def_readwrite("distance_left_start", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord::distance_left_start); - cl.def_readwrite("distance_right_start", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord::distance_right_start); - cl.def_readwrite("distance_left_end", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord::distance_left_end); - cl.def_readwrite("distance_right_end", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord::distance_right_end); - cl.def_readwrite("reversed_in_parent", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord::reversed_in_parent); - cl.def_readwrite("is_tip", &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord::is_tip); - cl.def_static("get_max_record_length", (const unsigned long (*)()) &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord::get_max_record_length, "C++: bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord::get_max_record_length() --> const unsigned long"); - cl.def("assign", (struct bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord & (bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord::*)(const struct bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord &)) &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord::operator=, "C++: bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord::operator=(const struct bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord &) --> struct bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord &", pybind11::return_value_policy::automatic, pybind11::arg("")); - } + { // bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord + // file:bdsg/snarl_distance_index.hpp line:1546 + auto &enclosing_class = cl; + pybind11::class_< + bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord, + std::shared_ptr, + bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryRecord> + cl(enclosing_class, "TemporaryChainRecord", ""); + cl.def(pybind11::init([]() { + return new bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord(); + })); + cl.def( + pybind11::init([](bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord const &o) { + return new bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord(o); + })); + cl.def_readwrite("start_node_id", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::start_node_id); + cl.def_readwrite("end_node_id", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::end_node_id); + cl.def_readwrite("end_node_length", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::end_node_length); + cl.def_readwrite("tree_depth", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::tree_depth); + cl.def_readwrite("parent", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::parent); + cl.def_readwrite("min_length", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::min_length); + cl.def_readwrite("max_length", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::max_length); + cl.def_readwrite("distance_left_start", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::distance_left_start); + cl.def_readwrite("distance_right_start", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::distance_right_start); + cl.def_readwrite("distance_left_end", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::distance_left_end); + cl.def_readwrite("distance_right_end", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::distance_right_end); + cl.def_readwrite("rank_in_parent", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::rank_in_parent); + cl.def_readwrite("root_snarl_index", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::root_snarl_index); + cl.def_readwrite("start_node_rev", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::start_node_rev); + cl.def_readwrite("end_node_rev", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::end_node_rev); + cl.def_readwrite("reversed_in_parent", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::reversed_in_parent); + cl.def_readwrite("is_trivial", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::is_trivial); + cl.def_readwrite("is_tip", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::is_tip); + cl.def_readwrite("loopable", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::loopable); + cl.def_readwrite("children", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::children); + cl.def_readwrite("prefix_sum", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::prefix_sum); + cl.def_readwrite("max_prefix_sum", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::max_prefix_sum); + cl.def_readwrite("forward_loops", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::forward_loops); + cl.def_readwrite("backward_loops", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::backward_loops); + cl.def_readwrite("chain_components", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::chain_components); + cl.def( + "get_max_record_length", + (unsigned long (bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::*)(bool) const) & + bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::get_max_record_length, + "C++: " + "bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::" + "TemporaryChainRecord::get_max_record_length(bool) const --> " + "unsigned long", + pybind11::arg("include_distances")); + cl.def( + "assign", + (struct bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord & + (bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord:: + *)(const struct bdsg::SnarlDistanceIndex:: + TemporaryDistanceIndex::TemporaryChainRecord &)) & + bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryChainRecord::operator=, + "C++: " + "bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::" + "TemporaryChainRecord::operator=(const struct " + "bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::" + "TemporaryChainRecord &) --> struct " + "bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::" + "TemporaryChainRecord &", + pybind11::return_value_policy::automatic, pybind11::arg("")); + } - } + { // bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord + // file:bdsg/snarl_distance_index.hpp line:1588 + auto &enclosing_class = cl; + pybind11::class_< + bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord, + std::shared_ptr, + bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryRecord> + cl(enclosing_class, "TemporarySnarlRecord", ""); + cl.def(pybind11::init([]() { + return new bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord(); + })); + cl.def( + pybind11::init([](bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord const &o) { + return new bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord(o); + })); + cl.def_readwrite("parent", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::parent); + cl.def_readwrite("start_node_id", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::start_node_id); + cl.def_readwrite("start_node_length", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::start_node_length); + cl.def_readwrite("end_node_id", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::end_node_id); + cl.def_readwrite("end_node_length", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::end_node_length); + cl.def_readwrite("node_count", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::node_count); + cl.def_readwrite("min_length", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::min_length); + cl.def_readwrite("max_length", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::max_length); + cl.def_readwrite("max_distance", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::max_distance); + cl.def_readwrite("tree_depth", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::tree_depth); + cl.def_readwrite("distance_start_start", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::distance_start_start); + cl.def_readwrite("distance_end_end", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::distance_end_end); + cl.def_readwrite("rank_in_parent", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::rank_in_parent); + cl.def_readwrite("reversed_in_parent", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::reversed_in_parent); + cl.def_readwrite("start_node_rev", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::start_node_rev); + cl.def_readwrite("end_node_rev", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::end_node_rev); + cl.def_readwrite("is_trivial", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::is_trivial); + cl.def_readwrite("is_simple", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::is_simple); + cl.def_readwrite("is_tip", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::is_tip); + cl.def_readwrite("is_root_snarl", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::is_root_snarl); + cl.def_readwrite("include_distances", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::include_distances); + cl.def_readwrite("children", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::children); + cl.def_readwrite("tippy_child_ranks", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::tippy_child_ranks); + cl.def_readwrite("distances", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::distances); + cl.def( + "get_max_record_length", + (unsigned long (bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::*)() const) & + bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::get_max_record_length, + "C++: " + "bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::" + "TemporarySnarlRecord::get_max_record_length() const --> unsigned " + "long"); + cl.def( + "assign", + (struct bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord & + (bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord:: + *)(const struct bdsg::SnarlDistanceIndex:: + TemporaryDistanceIndex::TemporarySnarlRecord &)) & + bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporarySnarlRecord::operator=, + "C++: " + "bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::" + "TemporarySnarlRecord::operator=(const struct " + "bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::" + "TemporarySnarlRecord &) --> struct " + "bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::" + "TemporarySnarlRecord &", + pybind11::return_value_policy::automatic, pybind11::arg("")); + } - } + { // bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryNodeRecord + // file:bdsg/snarl_distance_index.hpp line:1621 + auto &enclosing_class = cl; + pybind11::class_< + bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryNodeRecord, + std::shared_ptr, + bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryRecord> + cl(enclosing_class, "TemporaryNodeRecord", ""); + cl.def(pybind11::init([]() { + return new bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryNodeRecord(); + })); + cl.def( + pybind11::init([](bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryNodeRecord const &o) { + return new bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryNodeRecord(o); + })); + cl.def_readwrite("node_id", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryNodeRecord::node_id); + cl.def_readwrite("parent", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryNodeRecord::parent); + cl.def_readwrite("node_length", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryNodeRecord::node_length); + cl.def_readwrite("rank_in_parent", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryNodeRecord::rank_in_parent); + cl.def_readwrite("root_snarl_index", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryNodeRecord::root_snarl_index); + cl.def_readwrite("distance_left_start", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryNodeRecord::distance_left_start); + cl.def_readwrite("distance_right_start", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryNodeRecord::distance_right_start); + cl.def_readwrite("distance_left_end", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryNodeRecord::distance_left_end); + cl.def_readwrite("distance_right_end", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryNodeRecord::distance_right_end); + cl.def_readwrite("reversed_in_parent", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryNodeRecord::reversed_in_parent); + cl.def_readwrite("is_tip", + &bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryNodeRecord::is_tip); + cl.def_static( + "get_max_record_length", + (const unsigned long ( + *)())&bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryNodeRecord::get_max_record_length, + "C++: " + "bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::" + "TemporaryNodeRecord::get_max_record_length() --> const unsigned " + "long"); + cl.def( + "assign", + (struct bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryNodeRecord & + (bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryNodeRecord:: + *)(const struct bdsg::SnarlDistanceIndex:: + TemporaryDistanceIndex::TemporaryNodeRecord &)) & + bdsg::SnarlDistanceIndex::TemporaryDistanceIndex:: + TemporaryNodeRecord::operator=, + "C++: " + "bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::" + "TemporaryNodeRecord::operator=(const struct " + "bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::" + "TemporaryNodeRecord &) --> struct " + "bdsg::SnarlDistanceIndex::TemporaryDistanceIndex::" + "TemporaryNodeRecord &", + pybind11::return_value_policy::automatic, pybind11::arg("")); + } + } + } } diff --git a/bdsg/include/bdsg/ch.hpp b/bdsg/include/bdsg/ch.hpp new file mode 100644 index 00000000..b09b649a --- /dev/null +++ b/bdsg/include/bdsg/ch.hpp @@ -0,0 +1,375 @@ +/* +file for the contraction hierarchy method +*/ +#ifndef BDSG_CH_HPP_INCLUDED +#define BDSG_CH_HPP_INCLUDED + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +// #define debug_binary_intersection +// #define debug_hhl_query + +namespace bdsg { +// inf implementation is largest possible int +#define INF_INT numeric_limits::max() +#define DIST_NBITS 32 +#define DIST_UINT uint32_t +typedef uint32_t NODE_UINT; +typedef int NodeId; +typedef int NodesideId; + +typedef struct HubRecord { + NodeId hub{}; + DIST_UINT dist{}; + + HubRecord() : hub{0}, dist{INF_INT} {} + HubRecord(NodeId hid, DIST_UINT min_dist) : hub{hid}, dist{min_dist} {} + + auto operator<=>(const HubRecord &r2) const { return hub <=> r2.hub; } + + auto operator<=>(const NodeId &n) const { return hub <=> n; } +} HubRecord; + +/// Allow promoting a DIST_UINT to a different type, translating infinities to +/// the type's max limit. +template OtherInt promote_distance(DIST_UINT val) { + if (val == INF_INT) { + return std::numeric_limits::max(); + } + return (OtherInt)val; +} + +/// Allow demoting a DIST_UINT from a different type, translating infinities +/// from the type's max limit and erroring on unrepresentably large values. +template DIST_UINT demote_distance(OtherInt val) { + if (val == std::numeric_limits::max()) { + return INF_INT; + } + if (val > (OtherInt)INF_INT) { + throw std::overflow_error( + "Cannot store excessively wide value " + std::to_string(val) + " in " + + std::to_string(DIST_NBITS) + " bits for hub labeling"); + } + return (DIST_UINT)val; +} + +/** + * For a handle graph indexed with HHL, get the HHL rank ("Boost graph ID") for + * an orientation of a node, as a source or destination. + */ +NODE_UINT bgid(const handle_t &h, const bdsg::HashGraph &hg); + +/** + * For a net graph indexed with HHL, get the HHL rank for an orientation of a + * net graph element (snarl start node, snarl end node, child node, child + * chain), as either the source or destination of a query. + * + * Snarl start nodes and snarl end nodes are handled so that "forward" + * orientation runs along the snarl, regardless of the orientation that the + * underlying handle graph node is in as a snarl boundary. + * + * Child chains and nodes are also handled so that "forward" orientation is the + * orientation the thing has in the snarl. So if a node is reversed in the + * snarl, asking about forward is actually asking about that node in its local + * reverse orientation. + * + * For net graphs, we need to distinguish between source and destination status + * to allow turning around within a child chain without traversing the full + * length of the chain. Each child chain needs to be represented by a subgraph + * with different in and out "port" nodes in each orientation. The source port + * is the one you would leave the node from in that orientation. + */ +NODE_UINT bgid(size_t net_rank, bool is_reverse, bool is_source); + +/** + * For a handle or net graph indexed with HHL, take the HHL rank of an + * orientation of a node and get that of the opposite orientation of a node. + * + * For handle graphs, ranks are the same for source and destination. + * + * For net graphs, ranks differ between source and destination "ports" for a + * net graph element; this also swaps source and destination status. + */ +NODE_UINT rev_bgid(NODE_UINT n); + +typedef struct NodeProp { + // This is initialized by make_boost_graph() + DIST_UINT seqlen; + DIST_UINT max_out = 0; + NODE_UINT contracted_neighbors = 0; + NODE_UINT level = 0; + NODE_UINT arc_cover = 1; + bool contracted = false; + // This is left uninitialized until make_contraction_hierarchy() is run. + NODE_UINT new_id; +} NodeProp; + +typedef struct EdgeProp { + bool contracted = false; + DIST_UINT weight = 0; + NODE_UINT arc_cover = 1; + bool ori = true; +} EdgeProp; + +typedef boost::adjacency_list + CHOverlay; +typedef boost::filtered_graph> + ContractedGraph; + +/// Allow outputting CHOverlay objects. Output text does not end with a +/// newline. +std::ostream &operator<<(std::ostream &out, const CHOverlay &ov); + +/** + * Build the intermediate hub labeling computation data structure ("Boost + * graph") from a HashGraph. + * + * The nodes in the graph must have dense node IDs starting at 1. + * + * For later queries, orientations of nodes are assigned ranks as provided by + * the bgid() function. + */ +CHOverlay make_boost_graph(const bdsg::HashGraph &hg); +/** + * Build the intermediate hub labeling computation data structure ("Boost + * graph") for the net graph of a snarl in a TemporaryDistanceIndex. + * + * all_children must contain the child chains and nodes of the snarl, as well as + * the bounding nodes of the snarl, in any order. + * + * For later queries, orientations of children or the snarl boundary nodes are + * assigned query ranks based on their snarl distance index rank. + * + * The snarl distance index ranks are 0 and 1 for the start and end nodes of the + * snarl, and the rank_in_parent field of the temporary index for each child. + */ +CHOverlay make_boost_graph( + const bdsg::SnarlDistanceIndex::TemporaryDistanceIndex &temp_index, + const SnarlDistanceIndex::temp_record_ref_t &snarl_index, + const SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord + &temp_snarl_record, + const vector> &all_children, + const HandleGraph *graph); + +int edge_diff(ContractedGraph::vertex_descriptor nid, ContractedGraph &ch, + CHOverlay &ov, vector &node_dists, int hop_limit); + +void contract(CHOverlay::vertex_descriptor nid, ContractedGraph &ch, + CHOverlay &ov, vector &node_dists, + vector &shouldnt_contract, int hop_limit); + +/** + * Find the contraction hierarchy order for the graph. + * + * Initializes the new_id field of each NodeProb in the graph. + */ +void make_contraction_hierarchy(CHOverlay &ov); + +template +ItrType get_dist_itr(ItrType start_itr, ItrType hub_itr) { + auto node_count = *start_itr; + auto last_fwd_end_bound_itr = next(start_itr, 1 + node_count); + if (hub_itr >= next(start_itr, *last_fwd_end_bound_itr)) { + // backwards label + auto first_back_bound_itr = next(start_itr, 1 + node_count + 1); + auto last_back_bound_itr = next(start_itr, 1 + node_count + 1 + node_count); + auto jump_to_dist = (*last_back_bound_itr) - *first_back_bound_itr; + return next(hub_itr, jump_to_dist); + } else { + // forwards label + auto first_fwd_bound_itr = next(start_itr, 1); + auto last_fwd_bound_itr = next(start_itr, 1 + node_count); + auto jump_to_dist = (*last_fwd_bound_itr) - *first_fwd_bound_itr; + return next(hub_itr, jump_to_dist); + } +} + +DIST_UINT binary_intersection_ch(vector &v1, vector &v2); +/* + * Do binary intersection to find shared labels for two vertices. + * + * start_itr should point to the first slot of the packed label data returned + * by pack_labels(), which is the label count. + * + * start_bound_index variables are relative to start_itr, and give the + * positions of the stored start bounds for the two labels; the stored end + * bounds will be in the slots after. + */ +template +DIST_UINT binary_intersection_ch(ItrType start_itr, size_t v1_start_bound_index, + size_t v2_start_bound_index) { + auto v1_start_bound_itr = next(start_itr, v1_start_bound_index); + auto v1_end_bound_itr = next(v1_start_bound_itr, 1); + auto v2_start_bound_itr = next(start_itr, v2_start_bound_index); + auto v2_end_bound_itr = next(v2_start_bound_itr, 1); + + auto v1_start_itr = next(start_itr, *v1_start_bound_itr); + auto v1_end_itr = next(start_itr, *v1_end_bound_itr); + +#ifdef debug_binary_intersection + std::cerr << "Found " << v1_end_itr - v1_start_itr << " labels for vertex 1" + << std::endl; +#endif + + auto v2_start_itr = next(start_itr, *v2_start_bound_itr); + auto v2_end_itr = next(start_itr, *v2_end_bound_itr); + +#ifdef debug_binary_intersection + std::cerr << "Found " << v2_end_itr - v2_start_itr << " labels for vertex 2" + << std::endl; +#endif + + auto v1_range = ranges::subrange(v1_start_itr, v1_end_itr); + auto v2_range = ranges::subrange(v2_start_itr, v2_end_itr); + + auto &key_vec = v1_range.size() < v2_range.size() ? v1_range : v2_range; + auto &search_vec = v1_range.size() < v2_range.size() ? v2_range : v1_range; + + auto search_start_itr = search_vec.begin(); + auto search_end_itr = search_vec.end(); + DIST_UINT min_dist = INF_INT; + for (auto it = key_vec.begin(); it < key_vec.end(); it++) { +#ifdef debug_binary_intersection + cerr << "Performing key query" << endl; +#endif + auto k = *it; + auto k_dist_itr = get_dist_itr(start_itr, it); +#ifdef debug_binary_intersection + cerr << "Distance for k " << k << " is " << *k_dist_itr + << ", at: " << distance(start_itr, k_dist_itr) << endl; + cerr << "searching for " << k << " between " + << distance(start_itr, search_start_itr) << " & " + << distance(start_itr, search_end_itr) << endl; +#endif + search_start_itr = lower_bound(search_start_itr, search_end_itr, k); + if (search_start_itr == search_end_itr) { +#ifdef debug_binary_intersection + std::cerr << "No more search results possible" << std::endl; +#endif + return min_dist; + } + if (*search_start_itr == k) { +#ifdef debug_binary_intersection + cerr << "match found, key: " << *search_start_itr << ", at " + << distance(start_itr, search_start_itr) << endl; +#endif + auto dist_itr = get_dist_itr(start_itr, search_start_itr); + DIST_UINT d = *(dist_itr) + *(k_dist_itr); +#ifdef debug_binary_intersection + cerr << "dist for key is: " << *dist_itr << ", at " + << distance(start_itr, dist_itr) << endl; + cerr << "total dist is: " << d << endl; +#endif + min_dist = min(min_dist, d); + } + } + return min_dist; +} + +/** + * Query stored hub label data for a minimum distance. + * + * start_itr should point to the first slot of the packed label data returned + * by pack_labels(), which is the label count. + * + * The rank space covers both orientations of each node. + * + * Returns the minimum distance from the end of the node orientation at rank1 + * to the start of the node orientation at rank2. (If working in a net graph in + * a SnarlDistanceIndex, these "nodes" may really be child chains.) + * + * If rank1 == rank2, returns the minimum distance around that cycle, if any. + * + * If there is no known path between the given nodes, returns INF_INT. + */ +template +DIST_UINT hhl_query(ItrType start_itr, size_t rank1, size_t rank2) { + size_t label_count = *start_itr; + +#ifdef debug_hhl_query + std::cerr << "Making hub label query on " << label_count << " labels" + << std::endl; +#endif + + // Bounds start after the label count, and at the rank of the first + // vertex past there we find the start bound for the first vertex. + auto start_index_1 = 1 + rank1; + +#ifdef debug_hhl_query + std::cerr << "Start bound for forward label for rank " << rank1 + << " is at index " << start_index_1 << " past there" << std::endl; +#endif + + // And there's a final end value for the first set of labels before we go on + // to the bounds where we would find the start bound for the second vertex. + auto start_index_2 = 1 + label_count + 1 + rank2; + +#ifdef debug_hhl_query + std::cerr << "Start bound for reverse label for rank " << rank2 + << " is at index " << start_index_2 << " past there" << std::endl; +#endif + + DIST_UINT dist = + binary_intersection_ch(start_itr, start_index_1, start_index_2); + + return dist; +} + +void down_dijk(int node, CHOverlay &ov, vector &node_dists, + vector> &labels, + vector> &labels_rev); + +void down_dijk_rev(int node, CHOverlay &ov, vector &node_dists, + vector> &labels, + vector> &labels_rev); + +void test_dijk(int node, CHOverlay &ov, vector &node_dists, + vector> &labels, + vector> &labels_rev); + +void test_dijk_rev(int node, CHOverlay &ov, vector &node_dists, + vector> &labels, + vector> &labels_rev); + +void create_labels(vector> &labels, + vector> &labels_rev, CHOverlay &ov); + +/** + * Puts hub labels in a flat vector form + * + * Structure: + * - offsets are relative to start of flat vector + * - extra offset in each of fwd and back offset sets at the end so that end of + * ranges can be found + * -- subtracting the extra offset by the first offset of its set gets the + * distance to the corresponding dist of a hub + * + * The layout is: + * label count | start offsets (fwd) | start offsets (back) | fwd label hubs | + * fwd label dists | back label hubs | back label dists + */ +vector pack_labels(const vector> &labels, + const vector> &labels_back); + +// not necessary stuff +void write_to_csv(CHOverlay &ov, string out_path); + +void write_to_gr(CHOverlay &ov, string out_path); + +vector read_node_order(string in_path); +} // namespace bdsg + +#endif \ No newline at end of file diff --git a/bdsg/include/bdsg/internal/indexing_iterator.hpp b/bdsg/include/bdsg/internal/indexing_iterator.hpp new file mode 100644 index 00000000..44a9bb1a --- /dev/null +++ b/bdsg/include/bdsg/internal/indexing_iterator.hpp @@ -0,0 +1,210 @@ +#ifndef BDSG_INDEXING_ITERATOR_HPP_INCLUDED +#define BDSG_INDEXING_ITERATOR_HPP_INCLUDED + +#include + +namespace bdsg { +/** + * A forward iterator for anything vector-like (PackedVector, PagedVector, + * etc.) that provides read-only access to elements and internally uses integer + * indexes. + * + * This iterator is invalidated if the backing object changes size or moves. + */ +template +class IndexingIterator { +public: + // Iterator traits for standard library compatibility + using iterator_category = std::random_access_iterator_tag; + using value_type = uint64_t; + using difference_type = typename std::make_signed::type; + using pointer = void; + using reference = decltype(std::declval()[0]); + + //Constructors + IndexingIterator() = default; + IndexingIterator(const IndexingIterator& other) = default; + IndexingIterator& operator=(const IndexingIterator& other) = default; + ~IndexingIterator() = default; + + /// Pre-increment operator + IndexingIterator& operator++(); + + /// Post-increment operator + IndexingIterator operator++(int); + + /// Dereference operator - returns value at current position + reference operator*() const; + + /// Equality comparison + bool operator==(const IndexingIterator& other) const; + + /// Inequality comparison + bool operator!=(const IndexingIterator& other) const; + + // Bidirectional methods + + /// Pre-decrement operator + IndexingIterator& operator--(); + + /// Post-decrement operator + IndexingIterator operator--(int); + + // Random-access methods + + /// Addition of offset in place + IndexingIterator& operator+=(difference_type offset); + + /// Addition of offset + IndexingIterator operator+(difference_type offset) const; + + /// Subtraction of offset in place + IndexingIterator& operator-=(difference_type offset); + + /// Subtraction of offset + IndexingIterator operator-(difference_type offset) const; + + /// Subtraction of two iterators + difference_type operator-(const IndexingIterator& other) const; + + /// Indexing into iterator. Even though we type this as reference, remember + /// that we don't actually implement writing to our "references" and just + /// use the value type. + /// Result is undefined if iterators are to different collections. + reference operator[](difference_type offset) const; + + // Comaprable iterator methods (TODO: Is there an STL concept name for this?) + + /// Determine if this iterator is strictly before another. + /// Result is undefined if iterators are to different collections. + bool operator<(const IndexingIterator& other) const; + + /// Determine if this iterator is before or at another. + /// Result is undefined if iterators are to different collections. + bool operator<=(const IndexingIterator& other) const; + + /// Determine if this iterator is strictly after another. + /// Result is undefined if iterators are to different collections. + bool operator>(const IndexingIterator& other) const; + + /// Determine if this iterator is at or after another. + /// Result is undefined if iterators are to different collections. + bool operator>=(const IndexingIterator& other) const; + +private: + // Private constructor - only associated class can create iterators + IndexingIterator(const VectorLike* vec, size_t idx); + + const VectorLike* vec_ptr = nullptr; + size_t index = 0; + + // We're not allowed to use "class" when befriending a template parameter. + // See + friend VectorLike; +}; + +template +IndexingIterator::IndexingIterator(const VectorLike* vec, size_t idx) + : vec_ptr(vec), index(idx) { + // Constructor +} + +template +IndexingIterator& IndexingIterator::operator++() { + ++index; + return *this; +} + +template +IndexingIterator IndexingIterator::operator++(int) { + IndexingIterator tmp = *this; + ++index; + return tmp; +} + +template +typename IndexingIterator::reference IndexingIterator::operator*() const { + return (*vec_ptr)[index]; +} + +template +bool IndexingIterator::operator==(const IndexingIterator& other) const { + return vec_ptr == other.vec_ptr && index == other.index; +} + +template +bool IndexingIterator::operator!=(const IndexingIterator& other) const { + return !(*this == other); +} + +template +IndexingIterator& IndexingIterator::operator--() { + --index; + return *this; +} + +template +IndexingIterator IndexingIterator::operator--(int) { + IndexingIterator tmp = *this; + --index; + return tmp; +} + +template +IndexingIterator& IndexingIterator::operator+=(difference_type offset) { + index += offset; + return *this; +} + +template +IndexingIterator IndexingIterator::operator+(difference_type offset) const { + return IndexingIterator(vec_ptr, index + offset); +} + +template +IndexingIterator& IndexingIterator::operator-=(difference_type offset) { + index -= offset; + return *this; +} + +template +IndexingIterator IndexingIterator::operator-(difference_type offset) const { + return IndexingIterator(vec_ptr, index - offset); +} + +template +typename IndexingIterator::difference_type IndexingIterator::operator-(const IndexingIterator& other) const { + // TODO: I don't know a way to subtract two unsigned values and get the + // signed difference in a single operation as long as that difference + // itself fits the signed type. So we cast and hope. + return (difference_type) index - (difference_type) other.index; +} + +template +typename IndexingIterator::reference IndexingIterator::operator[](difference_type offset) const { + return *(*this + offset); +} + +template +bool IndexingIterator::operator<(const IndexingIterator& other) const { + return index < other.index; +} + +template +bool IndexingIterator::operator<=(const IndexingIterator& other) const { + return index <= other.index; +} + +template +bool IndexingIterator::operator>(const IndexingIterator& other) const { + return index > other.index; +} + +template +bool IndexingIterator::operator>=(const IndexingIterator& other) const { + return index >= other.index; +} +} + + +#endif diff --git a/bdsg/include/bdsg/internal/mapped_structs.hpp b/bdsg/include/bdsg/internal/mapped_structs.hpp index b3dc7881..80310273 100644 --- a/bdsg/include/bdsg/internal/mapped_structs.hpp +++ b/bdsg/include/bdsg/internal/mapped_structs.hpp @@ -19,6 +19,7 @@ #include #include #include +#include // TODO: We only target little-endian systems, like x86_64 and ARM64 Linux and // MacOS. Porting to big-endian systems will require wrapping all the numbers @@ -827,7 +828,7 @@ class UniqueMappedPointer { template> class CompatVector { public: - + CompatVector() = default; // Because we contain a pointer, we need a destructor and copy and move @@ -865,6 +866,7 @@ class CompatVector { * Empty out the vector and free any allocated memory. */ void clear(); + T& at(size_t index); const T& at(size_t index) const; @@ -937,6 +939,7 @@ using MappedVector = CompatVector>; template> class CompatIntVector { public: + using iterator = IndexingIterator; CompatIntVector() = default; @@ -996,6 +999,9 @@ class CompatIntVector { */ void clear(); + iterator begin() const; + iterator end() const; + /** * Return the width in bits of the entries. */ @@ -1108,7 +1114,7 @@ class CompatIntVector { * Get a proxy reference to read the given index. */ ConstProxy operator[](size_t index) const; - + // Compatibility with SDSL-lite serialization /** @@ -1741,6 +1747,16 @@ void CompatIntVector::clear() { data.clear(); } +template +CompatIntVector::iterator CompatIntVector::begin() const { + return iterator(this, 0); +} + +template +CompatIntVector::iterator CompatIntVector::end() const { + return iterator(this, length); +} + template size_t CompatIntVector::width() const { return bit_width; @@ -1869,6 +1885,7 @@ auto CompatIntVector::at(size_t index) -> Proxy { template auto CompatIntVector::at(size_t index) const -> ConstProxy { if (index > size()) { + assert(false); throw std::out_of_range("Accessing index " + std::to_string(index) + " in integer vector of length " + std::to_string(size())); } diff --git a/bdsg/include/bdsg/internal/packed_structs.hpp b/bdsg/include/bdsg/internal/packed_structs.hpp index abca4d07..068657a8 100644 --- a/bdsg/include/bdsg/internal/packed_structs.hpp +++ b/bdsg/include/bdsg/internal/packed_structs.hpp @@ -16,6 +16,7 @@ #include #include +#include #include namespace bdsg { @@ -29,103 +30,6 @@ template inline void repack(IntVector& target, size_t new_width, size_t new_size); -/** - * A forward iterator for anything vector-like (PackedVector, PagedVector, - * etc.) that provides read-only access to elements and internally uses integer - * indexes. - * - * This iterator is invalidated if the backing object changes size or moves. - */ -template -class IndexingIterator { -public: - // Iterator traits for standard library compatibility - using iterator_category = std::random_access_iterator_tag; - using value_type = uint64_t; - using difference_type = typename std::make_signed::type; - using pointer = void; - using reference = uint64_t; - - // Standard iterator operations - IndexingIterator(const IndexingIterator& other) = default; - IndexingIterator& operator=(const IndexingIterator& other) = default; - ~IndexingIterator() = default; - - /// Pre-increment operator - IndexingIterator& operator++(); - - /// Post-increment operator - IndexingIterator operator++(int); - - /// Dereference operator - returns value at current position - uint64_t operator*() const; - - /// Equality comparison - bool operator==(const IndexingIterator& other) const; - - /// Inequality comparison - bool operator!=(const IndexingIterator& other) const; - - // Bidirectional methods - - /// Pre-decrement operator - IndexingIterator& operator--(); - - /// Post-decrement operator - IndexingIterator operator--(int); - - // Random-access methods - - /// Addition of offset in place - IndexingIterator& operator+=(difference_type offset); - - /// Addition of offset - IndexingIterator operator+(difference_type offset) const; - - /// Subtraction of offset in place - IndexingIterator& operator-=(difference_type offset); - - /// Subtraction of offset - IndexingIterator operator-(difference_type offset) const; - - /// Subtraction of two iterators - difference_type operator-(const IndexingIterator& other) const; - - /// Indexing into iterator. Even though we type this as reference, remember - /// that we don't actually implement writing to our "references" and just - /// use the value type. - /// Result is undefined if itrators are to different collecitons. - reference operator[](difference_type offset) const; - - // Comaprable iterator methods (TODO: Is there an STL concept name for this?) - - /// Determine if this iterator is strictly before another. - /// Result is undefined if iterators are to different collecitons. - bool operator<(const IndexingIterator& other) const; - - /// Determine if this iterator is before or at another. - /// Result is undefined if iterators are to different collecitons. - bool operator<=(const IndexingIterator& other) const; - - /// Determine if this iterator is strictly after another. - /// Result is undefined if iterators are to different collecitons. - bool operator>(const IndexingIterator& other) const; - - /// Determine if this iterator is at or after another. - /// Result is undefined if itrators are to different collecitons. - bool operator>=(const IndexingIterator& other) const; - -private: - // Private constructor - only associated class can create iterators - IndexingIterator(const VectorLike* vec, size_t idx); - - const VectorLike* vec_ptr = nullptr; - size_t index = 0; - - // We're not allowed to use "class" when befriending a template parameter. - // See - friend VectorLike; -}; /* * A dynamic integer vector that maintains integers in bit-compressed form. @@ -191,7 +95,10 @@ class PackedVector { /// Returns the i-th value inline uint64_t get(const size_t& i) const; - + + /// Returns the i-th value (vector-like access). + inline uint64_t operator[](size_t i) const { return get(i); } + /// Add a value to the end inline void push_back(const uint64_t& value); @@ -307,7 +214,10 @@ class PagedVector { /// Returns the i-th value inline uint64_t get(const size_t& i) const; - + + /// Returns the i-th value (vector-like access). + inline uint64_t operator[](size_t i) const { return get(i); } + /// Add a value to the end inline void push_back(const uint64_t& value); @@ -420,7 +330,10 @@ class RobustPagedVector { /// Returns the i-th value inline uint64_t get(const size_t& i) const; - + + /// Returns the i-th value (vector-like access). + inline uint64_t operator[](size_t i) const { return get(i); } + /// Add a value to the end inline void push_back(const uint64_t& value); @@ -525,7 +438,10 @@ class PackedDeque { /// Returns the i-th value inline uint64_t get(const size_t& i) const; - + + /// Returns the i-th value (vector-like access). + inline uint64_t operator[](size_t i) const { return get(i); } + /// Add a value to the front inline void push_front(const uint64_t& value); @@ -745,111 +661,6 @@ inline void repack>(sdsl::int_vector<>& target, size_t new_wi target = std::move(tmp); } -///////////////////// -/// IndexingIterator -///////////////////// - -template -IndexingIterator::IndexingIterator(const VectorLike* vec, size_t idx) - : vec_ptr(vec), index(idx) { - // Constructor -} - -template -IndexingIterator& IndexingIterator::operator++() { - ++index; - return *this; -} - -template -IndexingIterator IndexingIterator::operator++(int) { - IndexingIterator tmp = *this; - ++index; - return tmp; -} - -template -uint64_t IndexingIterator::operator*() const { - return vec_ptr->get(index); -} - -template -bool IndexingIterator::operator==(const IndexingIterator& other) const { - return vec_ptr == other.vec_ptr && index == other.index; -} - -template -bool IndexingIterator::operator!=(const IndexingIterator& other) const { - return !(*this == other); -} - -template -IndexingIterator& IndexingIterator::operator--() { - --index; - return *this; -} - -template -IndexingIterator IndexingIterator::operator--(int) { - IndexingIterator tmp = *this; - --index; - return tmp; -} - -template -IndexingIterator& IndexingIterator::operator+=(difference_type offset) { - index += offset; - return *this; -} - -template -IndexingIterator IndexingIterator::operator+(difference_type offset) const { - return IndexingIterator(vec_ptr, index + offset); -} - -template -IndexingIterator& IndexingIterator::operator-=(difference_type offset) { - index -= offset; - return *this; -} - -template -IndexingIterator IndexingIterator::operator-(difference_type offset) const { - return IndexingIterator(vec_ptr, index - offset); -} - -template -typename IndexingIterator::difference_type IndexingIterator::operator-(const IndexingIterator& other) const { - // TODO: I don't know a way to subtract two unsigned values and get the - // signed difference in a single operation as long as that difference - // itself fits the signed type. So we cast and hope. - return (difference_type) index - (difference_type) other.index; -} - -template -typename IndexingIterator::reference IndexingIterator::operator[](difference_type offset) const { - return *(*this + offset); -} - -template -bool IndexingIterator::operator<(const IndexingIterator& other) const { - return index < other.index; -} - -template -bool IndexingIterator::operator<=(const IndexingIterator& other) const { - return index <= other.index; -} - -template -bool IndexingIterator::operator>(const IndexingIterator& other) const { - return index > other.index; -} - -template -bool IndexingIterator::operator>=(const IndexingIterator& other) const { - return index >= other.index; -} ///////////////////// diff --git a/bdsg/include/bdsg/overlays/vectorizable_overlays.hpp b/bdsg/include/bdsg/overlays/vectorizable_overlays.hpp index cf53cedc..1f86a6d7 100644 --- a/bdsg/include/bdsg/overlays/vectorizable_overlays.hpp +++ b/bdsg/include/bdsg/overlays/vectorizable_overlays.hpp @@ -191,6 +191,10 @@ class VectorizableOverlay : virtual public VectorizableHandleGraph, virtual publ sdsl::bit_vector s_bv; sdsl::rank_support_v<1> s_bv_rank; sdsl::bit_vector::select_1_type s_bv_select; + + /// When doing multithreaded overlya construction, what's the minimum + /// number of items per thread? This limits thread count on small graphs. + static const size_t MIN_ITEMS_PER_THREAD; }; diff --git a/bdsg/include/bdsg/snarl_distance_index.hpp b/bdsg/include/bdsg/snarl_distance_index.hpp index 75c1db7d..b62e9b81 100644 --- a/bdsg/include/bdsg/snarl_distance_index.hpp +++ b/bdsg/include/bdsg/snarl_distance_index.hpp @@ -199,20 +199,24 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab void serialize_members(std::ostream& out) const; void deserialize_members(std::istream& in); - /// Call when loading a distance index; will error if wrong version + /* Call when loading a distance index; will error if wrong version + * + */ void check_version_on_load() const; virtual uint32_t get_magic_number() const; std::string get_prefix() const; - /// Allow for preloading the index for more accurate timing of algorithms - /// that use it, if it fits in memory. If blocking is true, waits for the - /// index to be paged in. Otherwise, just tells the OS that we will want to - /// use it. + /* Allow for preloading the index for more accurate timing of algorithms + * that use it, if it fits in memory. If blocking is true, waits for the + * index to be paged in. Otherwise, just tells the OS that we will want to + * use it. + * + */ void preload(bool blocking = false) const; -//////////////////////////////////// How we define different properties of a net handle +///////////////////////////////// How we define different properties of a net handle? public: @@ -221,8 +225,13 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab END_START, END_END, END_TIP, TIP_START, TIP_END, TIP_TIP}; - ///Type of a net_handle_t, which may not be the type of the record - ///This is to allow a node record to be seen as a chain from the perspective of a handle + /* Type of a net_handle_t, which may not be the type of the record + * This is to allow a node record to be seen as a chain from the perspective of a handle. + * And to allow a simple snarl record to be seen as a node, a chain, or a snarl. + * + * TODO: What does that really mean? Why can that happen? + * + */ enum net_handle_record_t {ROOT_HANDLE=0, NODE_HANDLE, SNARL_HANDLE, CHAIN_HANDLE, SENTINEL_HANDLE}; ///////////////////////////// functions for distance calculations using net_handle_t's @@ -424,18 +433,20 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab ///edges are allowed bool is_simple_snarl(const net_handle_t& net) const; - ///Returns true if the given net handle refers to (a traversal of) a regular snarl - ///A regular snarl is the same as a simple snarl, except that the children may be - ///nested chains, rather than being restricted to nodes - // If the distance index doesn't store distances then this needs a graph to check edges - // If allow_internal_loops is true, then a snarl can still be considered a regular snarl if one of - // its children allows a path to change direction inside of it. For example, if a path leaving - // the start node can reach the start node again, but not by taking any edges contained within - // the netgraph of the snarl, then this snarl would be considered regular with allow_internal_loops=true, - // but irregular with allow_internal_loops=false - // If there are not distances in the distance index, then it is computationally very expensive to check for internal loops, - // so this will throw an error if allow_internal_loops is false and there are no distances - bool is_regular_snarl(const net_handle_t& net, bool allow_internal_loops=false, const handlegraph::HandleGraph* graph = nullptr) const; + /* Returns true if the given net handle refers to (a traversal of) a regular snarl. + * + * A regular snarl is the same as a simple snarl, except that the children may be + * nested chains, rather than being restricted to nodes, as long as the + * nested chains don't allow reversals. + * + * Simple and trivial snarls also count as regular snarls. + * + */ + bool is_regular_snarl(const net_handle_t& net) const; + + ///Returns the number of direct children of a snarl (not counting boundary nodes). + ///O(1) — reads the stored count directly from the record without iterating. + size_t get_snarl_child_count(const net_handle_t& net) const; ///Returns true if the given net handle refers to (a traversal of) a chain. bool is_chain(const net_handle_t& net) const; @@ -545,9 +556,13 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab ///For 0 or 1, returns the sentinel facing in. Otherwise return the child as a chain going START_END net_handle_t get_snarl_child_from_rank(const net_handle_t& snarl, const size_t& rank) const; - /// Does this net handle store distances? + /* Does this net handle store distances? + * + */ bool has_distances(const net_handle_t& net) const; - /// Does the distance index in general store distances? + /* Does the distance index in general store distances? + * + */ bool has_distances() const; protected: @@ -582,9 +597,11 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab ///and end. net_handle_t get_parent_traversal(const net_handle_t& traversal_start, const net_handle_t& traversal_end) const; - private: - + + // TODO: This is apparently private because it does not actually work, and right now just prints a message to that effect. + // handlegraph::algorithms::for_each_handle_in_shortest_path() is available instead, but it doesn't use the index. + // ///Function to walk through the shortest path between the two nodes+orientations. Orientation is the same as for minimum_distance - ///traverses from the first node going forward to the second node going forward. ///Calls iteratee on each node of the shortest path between the nodes and the distance to the start of that node @@ -610,51 +627,246 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab ////////////////////////////// How to interpret net_handle_ts -// + public: ///A record_t is the type of structure that a record can be. - /// The actual distance index is stored as a series of "records" for each snarl/node/chain. - /// The record type defines what is stored in a record - /// + ///The actual distance index is stored as a series of "records" for each snarl/node/chain. + ///The record type defines what is stored in a record. + ///NODE, SNARL, and CHAIN indicate that they don't store distances. ///SIMPLE_SNARL is a snarl with all children connecting only to the boundary nodes in one direction (ie, a bubble). ///TRIVIAL_SNARL represents consecutive nodes in a chain. ///NODE represents a node that is a trivial chain. A node can only be the child of a snarl. - ///OVERSIZED_SNARL only stores distances to the boundaries. + ///OVERSIZED_SNARL stores hub labeling data to compute distances. ///ROOT_SNARL represents a connected component of the root. It has no start or end node so - /// its children technically belong to the root. + ///its children technically belong to the root. ///MULTICOMPONENT_CHAIN can represent a chain with snarls that are not start-end connected. - /// The chain is split up into components between these snarls, each node is tagged with - /// which component it belongs to. + ///The chain is split up into components between these snarls, each node is tagged with + ///which component it belongs to. /// + ///TODO: What is a CHILDREN record? Is it ever used? + /// + /// These MUST match the order in record_t_as_string! enum record_t {ROOT=1, NODE, DISTANCED_NODE, TRIVIAL_SNARL, DISTANCED_TRIVIAL_SNARL, SIMPLE_SNARL, DISTANCED_SIMPLE_SNARL, - SNARL, DISTANCED_SNARL, OVERSIZED_SNARL, + REGULAR_SNARL, DISTANCED_REGULAR_SNARL, OVERSIZED_REGULAR_SNARL, + SNARL, DISTANCED_SNARL, OVERSIZED_SNARL, ROOT_SNARL, DISTANCED_ROOT_SNARL, CHAIN, DISTANCED_CHAIN, MULTICOMPONENT_CHAIN, CHILDREN}; - const static bool has_distances(record_t type) { - return type == DISTANCED_NODE || type == DISTANCED_TRIVIAL_SNARL || type == DISTANCED_SIMPLE_SNARL - || type == DISTANCED_SNARL || type == OVERSIZED_SNARL || type == DISTANCED_ROOT_SNARL + + // Because the record_t encodes a complex taxonomy of snarls not *quite* + // decomposable to flags, we use these accessors to look at facets of it. + + /*Return true if records of the given type have stored distances. + * + */ + constexpr static bool has_distances(record_t type) { + return type == DISTANCED_NODE + || type == DISTANCED_TRIVIAL_SNARL || type == DISTANCED_SIMPLE_SNARL + || type == DISTANCED_REGULAR_SNARL || type == OVERSIZED_REGULAR_SNARL + || type == DISTANCED_SNARL || type == OVERSIZED_SNARL + || type == DISTANCED_ROOT_SNARL || type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN; } + /*Return true if the given record type represents a root snarl. + * + */ + constexpr static bool is_root_snarl(record_t type) { + return type == ROOT_SNARL + || type == DISTANCED_ROOT_SNARL; + } + + /*Return true if the given record type represents a root or a root snarl. + * + */ + constexpr static bool is_any_root(record_t type) { + return is_root_snarl(type) + || type == ROOT; + } + + /*Return true if the given record type represents a node. + * + */ + constexpr static bool is_node(record_t type) { + return type == NODE + || type == DISTANCED_NODE; + } + + /* Return true if the given record type represents a chain. + * + */ + constexpr static bool is_chain(record_t type) { + return type == CHAIN + || type == DISTANCED_CHAIN + || type == MULTICOMPONENT_CHAIN; + } + + /* Return true if the given record type represents a trivial snarl. + * + */ + constexpr static bool is_trivial_snarl(record_t type) { + return type == TRIVIAL_SNARL + || type == DISTANCED_TRIVIAL_SNARL; + } + + /* Return true if the given record type represents a simple (but not a + * trivial) snarl. + */ + constexpr static bool is_simple_snarl(record_t type) { + return type == SIMPLE_SNARL + || type == DISTANCED_SIMPLE_SNARL; + } + /* Return true if the given record type represents an oversized snarl. + * + */ + constexpr static bool is_oversized_snarl(record_t type) { + return type == OVERSIZED_SNARL + || type == OVERSIZED_REGULAR_SNARL; + } + + /* Determine if a record type is a regular, but not a not simple (or + * trivial), snarl. Root snarls cannot be regular. + * + */ + constexpr static bool is_regular_nonsimple_snarl(record_t type) { + return type == REGULAR_SNARL + || type == DISTANCED_REGULAR_SNARL + || type == OVERSIZED_REGULAR_SNARL; + } + + /* Determine if a record type is a regular snarl. Root snarls cannot be + * regular. Counts simple and trivial snarls as regular. + * + */ + constexpr static bool is_regular_snarl(record_t type) { + return is_regular_nonsimple_snarl(type) + || is_simple_snarl(type) + || is_trivial_snarl(type); + } + + /* Determine if a record type is a snarl that isn't also a root or a + * simple (or trivial) snarl. A "nonsimple" snarl is implicitly + * nontrivial. + * + */ + constexpr static bool is_nonroot_nonsimple_snarl(record_t type) { + return is_regular_nonsimple_snarl(type) + || type == SNARL + || type == DISTANCED_SNARL + || type == OVERSIZED_SNARL; + } + + /* Return true if the given record type represents a snarl that is not + * simple or trivial. + * + */ + constexpr static bool is_nonsimple_snarl(record_t type) { + return is_nonroot_nonsimple_snarl(type) + || is_root_snarl(type); + } + + /* Return true if the given record type represents a snarl that is not + * simple or trivial, and also isn't a root snarl. + * + */ + constexpr static bool is_nonroot_nontrivial_snarl(record_t type) { + return is_nonroot_nonsimple_snarl(type) + || type == SIMPLE_SNARL + || type == DISTANCED_SIMPLE_SNARL; + } + + /* Return true if the given record type represents a snarl that is not + * trivial. + * + */ + constexpr static bool is_nontrivial_snarl(record_t type) { + return is_nonroot_nontrivial_snarl(type) + || is_root_snarl(type); + } + + /* Make sure a record_t is a known type other than CHILDREN + * + */ + constexpr static bool is_any_nonchildren(record_t type) { + return is_any_root(type) + || is_node(type) + || is_chain(type) + || is_nonroot_nontrivial_snarl(type) + || is_trivial_snarl(type); + } + /* Encode the type of a root snarl that may or may not have distances. + * + */ + constexpr static record_t encode_root_snarl(bool has_distances) { + return has_distances ? DISTANCED_ROOT_SNARL : ROOT_SNARL; + } + + /* Encode the type of a simple snarl that may or may not have distances. + * + */ + constexpr static record_t encode_simple_snarl(bool has_distances) { + return has_distances ? DISTANCED_SIMPLE_SNARL : SIMPLE_SNARL; + } + + /* Encode the type of a snarl that isn't a root snarl or a simple (or trivial) snarl. + * It may have distances, it may be regular, and it may be oversized. + * + */ + constexpr static record_t encode_nonroot_nonsimple_snarl(bool has_distances, bool is_regular, bool is_oversized) { + if (is_oversized) { + if (!has_distances) { + throw runtime_error("error: oversized snarl must have distances"); + } + if (is_regular) { + return OVERSIZED_REGULAR_SNARL; + } else { + return OVERSIZED_SNARL; + } + } else if (is_regular) { + return has_distances ? DISTANCED_REGULAR_SNARL : REGULAR_SNARL; + } else { + return has_distances ? DISTANCED_SNARL : SNARL; + } + } + + /* Encode the type of a node that may or may not have distances. + * + */ + constexpr static record_t encode_node(bool has_distances) { + return has_distances ? DISTANCED_NODE : NODE; + } + + /* Encode the type of a chain. + * It may have distances, and it may be a multicomponent chain. + */ + constexpr static record_t encode_chain(bool has_distances, bool is_multicomponent) { + if (is_multicomponent) { + if (!has_distances) { + throw runtime_error("error: multicomponent chain must have distances"); + } + return MULTICOMPONENT_CHAIN; + } else { + return has_distances ? DISTANCED_CHAIN : CHAIN; + } + } + ///Given the type of the record, return the handle type. Some record types can represent multiple things, ///for example a simple snarl record is used to represent a snarl, and the nodes/trivial chains in it. ///This will return whatever is higher on the snarl tree. A simple snarl will be considered a snarl, ///a root snarl will be considered a root, etc - const static net_handle_record_t get_record_handle_type(record_t type) { + constexpr static net_handle_record_t get_record_handle_type(record_t type) { if (type == ROOT || type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL) { return ROOT_HANDLE; } else if (type == NODE || type == DISTANCED_NODE || type == TRIVIAL_SNARL || type == DISTANCED_TRIVIAL_SNARL) { return NODE_HANDLE; - } else if (type == SNARL || type == DISTANCED_SNARL || type == SIMPLE_SNARL ||type == OVERSIZED_SNARL - || type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL){ + } else if (is_nonroot_nontrivial_snarl(type)) { return SNARL_HANDLE; } else if (type == CHAIN || type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN) { return CHAIN_HANDLE; @@ -676,25 +888,25 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab ///The offset into records that this handle points to - const static size_t get_record_offset (const handlegraph::net_handle_t& net_handle) { + inline static size_t get_record_offset (const handlegraph::net_handle_t& net_handle) { return handlegraph::as_integer(net_handle) >> 15; } ///The offset of a node in a trivial snarl (0 if it isn't a node in a trivial snarl) - const static size_t get_node_record_offset (const handlegraph::net_handle_t& net_handle) { + inline static size_t get_node_record_offset (const handlegraph::net_handle_t& net_handle) { return (handlegraph::as_integer(net_handle) >> 7 ) & MAX_TRIVIAL_SNARL_NODE_COUNT; //Get 8 bits after last 7 } - const static connectivity_t get_connectivity (const handlegraph::net_handle_t& net_handle){ + inline static connectivity_t get_connectivity (const handlegraph::net_handle_t& net_handle){ size_t connectivity_as_int = (handlegraph::as_integer(net_handle)>>3) & 15; //Get 4 bits after last 3 assert (connectivity_as_int <= 9); return static_cast(connectivity_as_int); } - const static net_handle_record_t get_handle_type (const handlegraph::net_handle_t& net_handle) { + inline static net_handle_record_t get_handle_type (const handlegraph::net_handle_t& net_handle) { size_t record_type = handlegraph::as_integer(net_handle) & 7; //Get last 3 bits assert (record_type <= 4); return static_cast(record_type); } - const static handlegraph::net_handle_t get_net_handle_from_values(size_t pointer, connectivity_t connectivity, + inline static handlegraph::net_handle_t get_net_handle_from_values(size_t pointer, connectivity_t connectivity, net_handle_record_t type, size_t node_offset=0) { if (pointer > ((size_t)1 << (64-BITS_FOR_TRIVIAL_NODE_OFFSET-3-4))-1) { throw runtime_error("error: don't have space in net handle for record offset"); @@ -719,7 +931,7 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab ///Get the offset into snarl_tree_records for the pointer to a node record. - const static size_t get_node_pointer_offset (const handlegraph::nid_t& id, const handlegraph::nid_t& min_node_id, size_t component_count) { + inline static size_t get_node_pointer_offset (const handlegraph::nid_t& id, const handlegraph::nid_t& min_node_id, size_t component_count) { size_t node_records_offset = component_count + ROOT_RECORD_SIZE; size_t offset = (id-min_node_id)*2; return node_records_offset + offset; @@ -730,7 +942,7 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab // return ((offset-node_records_offset) / NODE_RECORD_SIZE) + min_node_id; //} - const static connectivity_t endpoints_to_connectivity(endpoint_t start, endpoint_t end) { + inline static connectivity_t endpoints_to_connectivity(endpoint_t start, endpoint_t end) { if (start == START && end == START) { return START_START; } else if (start == START && end == END) { @@ -753,7 +965,7 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab throw runtime_error("error: invalid endpoints"); } } - const static endpoint_t get_start_endpoint(connectivity_t connectivity) { + inline static endpoint_t get_start_endpoint(connectivity_t connectivity) { endpoint_t start_endpoint; if (connectivity == START_START || connectivity == START_END || connectivity == START_TIP){ start_endpoint = START; @@ -766,10 +978,10 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab } return start_endpoint; } - const static endpoint_t get_start_endpoint(net_handle_t net) { + inline static endpoint_t get_start_endpoint(net_handle_t net) { return get_start_endpoint(get_connectivity(net)); } - const static endpoint_t get_end_endpoint(connectivity_t connectivity) { + inline static endpoint_t get_end_endpoint(connectivity_t connectivity) { endpoint_t end_endpoint; if (connectivity == START_START || connectivity == END_START || connectivity == TIP_START){ end_endpoint = START; @@ -782,10 +994,10 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab } return end_endpoint; } - const static endpoint_t get_end_endpoint(const net_handle_t& net) { + inline static endpoint_t get_end_endpoint(const net_handle_t& net) { return get_end_endpoint(get_connectivity(net)); } - const static pair connectivity_to_endpoints(const connectivity_t& connectivity) { + inline static pair connectivity_to_endpoints(const connectivity_t& connectivity) { return make_pair(get_start_endpoint(connectivity), get_end_endpoint(connectivity)); } @@ -822,13 +1034,15 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab const static size_t MIN_NODE_ID_OFFSET = 4; const static size_t MAX_TREE_DEPTH_OFFSET = 5; - // While the version number is 4, store it in a bit masked way + // We store the version number in a bit masked way // to avoid getting confused with old indexes without version numbers // that start with component count - const static size_t CURRENT_VERSION_NUMBER = 4; - // A verion to allow though but warn about - const static size_t WARN_VERSION_NUMBER = 3; - /// Arbitrary large number which doens't overflow the number of bits we give + const static size_t CURRENT_VERSION_NUMBER = 5; + // A version to allow though but warn about + const static size_t WARN_VERSION_NUMBER = 9999; //placeholder value + /* Arbitrary large number which doens't overflow the number of bits we give + * + */ const static size_t VERSION_NUMBER_SENTINEL = (1 << 10) - 1; /*Node record @@ -945,17 +1159,44 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab private: /*Give each of the enum types a name for printing */ - vector record_t_as_string = {"ROOT", "NODE", "DISTANCED_NODE", - "TRIVIAL_SNARL", "DISTANCED_TRIVIAL_SNARL", - "SNARL", "DISTANCED_SNARL", "SIMPLE_SNARL", "OVERSIZED_SNARL", - "ROOT_SNARL", "DISTANCED_ROOT_SNARL", - "CHAIN", "DISTANCED_CHAIN", "MULTICOMPONENT_CHAIN", - "CHILDREN"}; - vector connectivity_t_as_string = { "START_START", "START_END", "START_TIP", - "END_START", "END_END", "END_TIP", - "TIP_START", "TIP_END", "TIP_TIP"}; - vector net_handle_record_t_string = {"ROOT_HANDLE", "NODE_HANDLE", "SNARL_HANDLE", - "CHAIN_HANDLE", "SENTINEL_HANDLE"}; + // TODO: The names can't be here unless we give up using them in static methods. + const static vector record_t_as_string; // Note that the enum for this one is 1-based but the names are still 0-based + const static vector connectivity_t_as_string; // Note that the enum for this one is 1-based but the names are still 0-based + const static vector net_handle_record_t_string; + + // To deal with different offsets for the different types we use accessors. + // TODO: Should we just make std::to_string overloads instead? + + /** + * Convert a record_t to a string. + */ + inline static std::string stringify(const record_t& v) { + if ((int)v > 0 && v - 1 < record_t_as_string.size()) { + return record_t_as_string[v - 1]; + } + return ""; + } + + /** + * Convert a connectivity_t to a string. + */ + inline static std::string stringify(const connectivity_t& v) { + if ((int)v > 0 && v - 1 < connectivity_t_as_string.size()) { + return connectivity_t_as_string[v - 1]; + } + return ""; + } + + /** + * Convert a net_handle_record_t to a string. + */ + inline static std::string stringify(const net_handle_record_t& v) { + // For this one, 0 is an allowed value. + if ((int)v >= 0 && v < net_handle_record_t_string.size()) { + return net_handle_record_t_string[v]; + } + return ""; + } /* If this is 0, then don't store distances. @@ -1009,9 +1250,8 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab const static bool is_externally_end_end_connected(const size_t tag) {return tag & 256;} -///////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////// SnarlTreeRecord class for interpreting the records in a distance index -// +//////////////////////////////// SnarlTreeRecord class for interpreting the records in a distance index + /* Define a struct for interpreting each type of snarl tree node record (For node, snarl, chain) * * This is meant to be a layer in between snarl_tree_records and the public interface. @@ -1020,6 +1260,13 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab * * SnarlTreeRecordWriter does the same thing but for writing values to the index. * + * Note that each SnarlTreeRecord class (expecially ChainRecord) sometimes + * knows how to parse/interpret *other* actual record types, to support the + * system where a node can "pretend" to be a chain, or a simple snarl can + * pretend to be either a node (TODO: why?) or a chain. We end up parsing the + * record with the class appropriate to the thing we want to interpret it as, + * not the one you would pick from its stored record type. + * */ struct SnarlTreeRecord { @@ -1089,7 +1336,7 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab bool get_start_orientation() const; handlegraph::nid_t get_end_id() const; //Return true if the end node is traversed backwards to leave the snarl - handlegraph::nid_t get_end_orientation() const; + bool get_end_orientation() const; }; @@ -1268,7 +1515,11 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab //How big is the entire snarl record? static size_t distance_vector_size(record_t type, size_t node_count); - static size_t record_size (record_t type, size_t node_count) ; + /* vec_size parameter only needed for oversized snarls + * represents size of hub labeling-related data + * the value needed should be the first entry after the fixed-size record data + */ + static size_t record_size (record_t type, size_t node_count, size_t vec_size) ; size_t record_size() ; //Get the index into the distance vector for the calculating distance between the given node sides @@ -1302,9 +1553,32 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab SnarlRecordWriter(); - SnarlRecordWriter (size_t node_count, bdsg::yomo::UniqueMappedPointer* records, record_t type); + SnarlRecordWriter (size_t node_count, bdsg::yomo::UniqueMappedPointer* records, record_t type, size_t vec_size); SnarlRecordWriter(bdsg::yomo::UniqueMappedPointer* records, size_t pointer); + /** + * Set size of hub labels flat vector (hub_labels). + * Only used for oversized snarls. + * + * May only be called once. + * + * + * Putting vec_size in the SNARL_RECORD_SIZE'th slot due to it being the first one after the header + */ + void set_vec_size(size_t vec_size); + + /** + * Set an entry in the vector holding the hub label data. + * Only used for oversized snarls. + * + * set_vec_size() must be called first. + */ + void set_vec_entry(size_t index, size_t value); + + /** + * Set a distance matrix entry. + * Not used for oversized snarls. + */ void set_distance(size_t rank1, bool right_side1, size_t rank2, bool right_side2, size_t distance); void set_distance_start_start(size_t value); @@ -1474,7 +1748,7 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab */ //Add a snarl to the end of the chain and return a SnarlRecordWriter pointing to it - SnarlRecordWriter add_snarl(size_t snarl_size, record_t type, size_t previous_child_offset); + SnarlRecordWriter add_snarl(size_t snarl_size, record_t type, size_t vec_size, size_t previous_child_offset); SimpleSnarlRecordWriter add_simple_snarl(size_t snarl_size, record_t type, size_t previous_child_offset); //Add a node to the end of a chain and return the offset of the record it got added to //If new_record is true, make a new trivial snarl record for the node @@ -1487,6 +1761,7 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab private: ////////////////////// More methods for dealing with net_handle_ts + SnarlTreeRecord get_snarl_tree_record(const handlegraph::net_handle_t& net_handle) const { return SnarlTreeRecord(get_record_offset(net_handle), &snarl_tree_records); } @@ -1552,13 +1827,26 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab */ enum temp_record_t {TEMP_CHAIN=0, TEMP_SNARL, TEMP_NODE, TEMP_ROOT}; + /** + * Type for referring to some temporary index (for a node, chain, etc.) in + * a TemporaryDistanceIndex. Holds a tag for the type of object being + * indexed, and then a number used to look it up. + * + * For a node, the number is the node ID. + * + * For anything else, it's the position in the corresponding vector of + * temporary indexes in the TemporaryDistanceIndex where the thing's + * temporary index is stored. + */ + using temp_record_ref_t = std::pair; + class TemporaryDistanceIndex{ public: TemporaryDistanceIndex(); ~TemporaryDistanceIndex(); //Get a string of the start and end of a structure - std::string structure_start_end_as_string(pair index) const; + std::string structure_start_end_as_string(temp_record_ref_t index) const; handlegraph::nid_t min_node_id=0; handlegraph::nid_t max_node_id=0; @@ -1583,7 +1871,7 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab size_t tree_depth=0; //TODO: This isn't used but I left it because I couldn't get the python bindings to build when I changed it //Type of the parent and offset into the appropriate vector //(TEMP_ROOT, 0) if this is a root level chain - pair parent; + temp_record_ref_t parent; size_t min_length=0;//Including boundary nodes size_t max_length = 0; @@ -1605,13 +1893,37 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab bool is_tip = false; bool loopable = true; //If this is a looping snarl, this is false if the last snarl is not start-end connected - vector> children; //All children, both nodes and snarls, in order + vector children; //All children, both nodes and snarls, in order //Distances for the chain, one entry per node //TODO This would probably be more efficient as a vector of a struct of five ints vector prefix_sum; vector max_prefix_sum; + /* Forward looping distances. If no loop is possible, an entry + * + */ + /* will be std::numeric_limits::max(). If any loop is + * + */ + /* possible anywhere along the chain, the first entry will contain + * + */ + /* a possible loop distance. + * + */ vector forward_loops; - vector backward_loops; + /* Backward lopping distances. If no loop is possible, an entry + * + */ + /* will be std::numeric_limits::max(). If any lopp is + * + */ + /* possible anywhere along the chain, the last entry will contain + * + */ + /* a possible loop distance. + * + */ + vector backward_loops; vector chain_components;//Which component does each node belong to, usually all 0s @@ -1619,18 +1931,36 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab size_t get_max_record_length(bool include_distances) const; }; struct TemporarySnarlRecord : TemporaryRecord{ - pair parent; + temp_record_ref_t parent; handlegraph::nid_t start_node_id; size_t start_node_length=0; handlegraph::nid_t end_node_id; size_t end_node_length=0; size_t node_count=0; - size_t min_length = std::numeric_limits::max(); //Not including boundary nodes + /* Minimum distance across the snarl from start to end, not + * + */ + /* including boundary nodes. + * + */ + size_t min_length = std::numeric_limits::max(); size_t max_length = 0; size_t max_distance = 0; size_t tree_depth = 0; //TODO: This isn't used but I left it because I couldn't get the python bindings to build when I changed it + /* Minimum distance from the start back to itself within the + * + */ + /* snarl, not including boundary nodes. + * + */ size_t distance_start_start = std::numeric_limits::max(); + /* Minimum distance from the end back to itself within the snarl, + * + */ + /* not including boundary nodes. + * + */ size_t distance_end_end = std::numeric_limits::max(); size_t rank_in_parent=0; @@ -1642,19 +1972,28 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab bool reversed_in_parent; bool start_node_rev; + // The end node is reverse if it points back into the snarl, not along it. bool end_node_rev; bool is_trivial; bool is_simple; + /* Set to true if the snarl is regular (see SnarlDistanceIndex::is_regular_snarl()). + * + */ + /* If is_simple is true, this must also be set to true when filling in the TemporarySnarlRecord. + * + */ + bool is_regular = false; bool is_tip = false; bool is_root_snarl = false; bool include_distances = true; - //All children, nodes and chains, in arbitrary order - vector> children; + vector children; //All children, nodes and chains, in arbitrary order //The ranks & orientations of children that are tips - unordered_map tippy_child_ranks; + unordered_map tippy_child_ranks; //vector, pair, size_t>> distances; unordered_map, pair>, size_t> distances; - + //linearized hub labels (if not empty, this is an oversized snarl) + vector hub_labels; + //How long is the record going to be in the distance index? size_t get_max_record_length() const ; }; @@ -1664,7 +2003,7 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab rank_in_parent(0), reversed_in_parent(false){ } handlegraph::nid_t node_id; - pair parent; + temp_record_ref_t parent; size_t node_length=0; size_t rank_in_parent=0; size_t root_snarl_index = std::numeric_limits::max(); @@ -1683,12 +2022,130 @@ class SnarlDistanceIndex : public SnarlDecomposition, public TriviallySerializab return NODE_RECORD_SIZE;} }; - - vector> components; - vector> root_snarl_components; + vector components; + vector root_snarl_components; vector temp_chain_records; vector temp_snarl_records; + /* Holds temporary indexes for all the nodes. + * + */ + /// + /* While temporary snarl and chain records are stored at more or less + * + */ + /* arbitrary indexes, temporary node records are laid out by node ID, + * + */ + /* with the one for the node with ID min_node_id at index 0. This means + * + */ + /* you can look up the TemporaryNodeRecord for a node by its ID, and + * + */ + /* that some positions in the vector are empty temporary indexes for + * + */ + /* nonexistent nodes. + * + */ vector temp_node_records; + + /* Look up a chain from a temporary record reference. + * + */ + /* Throws an error if the reference is not to a chain or is out of bounds. + * + */ + inline TemporaryChainRecord& get_chain(const temp_record_ref_t& ref) { + // Delegate to the const version and un-const the result. See + // + return const_cast(std::as_const(*this).get_chain(ref)); + } + + /* Look up a chain from a temporary record reference. + * + */ + /* Throws an error if the reference is not to a chain or is out of bounds. + * + */ + /* This version can be used when the object is const. + * + */ + inline const TemporaryChainRecord& get_chain(const temp_record_ref_t& ref) const { + if (ref.first != TEMP_CHAIN) { + throw std::invalid_argument("Trying to look up a non-chain as a chain"); + } + if (ref.second >= temp_chain_records.size()) { + throw std::out_of_range("Trying to look up chain " + std::to_string(ref.second) + " but temporary index only has " + std::to_string(temp_chain_records.size()) + " chains"); + } + return temp_chain_records[ref.second]; + } + + /* Look up a snarl from a temporary record reference. + * + */ + /* Throws an error if the reference is not to a snarl or is out of bounds. + * + */ + inline TemporarySnarlRecord& get_snarl(const temp_record_ref_t& ref) { + return const_cast(std::as_const(*this).get_snarl(ref)); + } + + /* Look up a snarl from a temporary record reference. + * + */ + /* Throws an error if the reference is not to a snarl or is out of bounds. + * + */ + /* This version can be used when the object is const. + * + */ + inline const TemporarySnarlRecord& get_snarl(const temp_record_ref_t& ref) const { + if (ref.first != TEMP_SNARL) { + throw std::invalid_argument("Trying to look up a non-snarl as a snarl"); + } + if (ref.second >= temp_snarl_records.size()) { + throw std::out_of_range("Trying to look up snarl " + std::to_string(ref.second) + " but temporary index only has " + std::to_string(temp_snarl_records.size()) + " snarls"); + } + return temp_snarl_records[ref.second]; + } + + /* Look up a node from a temporary record reference. + * + */ + /* Throws an error if the reference is not to a node or is out of bounds. + * + */ + inline TemporaryNodeRecord& get_node(const temp_record_ref_t& ref) { + return const_cast(std::as_const(*this).get_node(ref)); + } + + /* Look up a node from a temporary record reference. + * + */ + /* Throws an error if the reference is not to a node or is out of bounds. + * + */ + /* This version can be used when the object is const. + * + */ + inline const TemporaryNodeRecord& get_node(const temp_record_ref_t& ref) const { + if (ref.first != TEMP_NODE) { + throw std::invalid_argument("Trying to look up a non-node as a node"); + } + if (ref.second < min_node_id) { + throw std::out_of_range("Trying to look up node " + std::to_string(ref.second) + " but temporary index starts at node " + std::to_string(min_node_id)); + } + if (ref.second >= temp_node_records.size() + min_node_id) { + throw std::out_of_range("Trying to look up node " + std::to_string(ref.second) + " but temporary index only goes up until node " + std::to_string(temp_node_records.size() + min_node_id)); + } + // Nodes use a node ID in the ref, not an index. + return temp_node_records[ref.second - min_node_id]; + } + + // Roots never need to be looked up. + + //whether the entire index has any oversized snarls or not bool use_oversized_snarls = false; size_t most_oversized_snarl_size = 0; friend class SnarlDistanceIndex; diff --git a/bdsg/src/ch.cpp b/bdsg/src/ch.cpp new file mode 100644 index 00000000..e53269d3 --- /dev/null +++ b/bdsg/src/ch.cpp @@ -0,0 +1,1035 @@ +/* +Hub labeling with contraction hierarchy node ordering. +*/ +#include "bdsg/ch.hpp" +#include + +//#define debug_boost_graph +//#define debug_create + +namespace bdsg { + +NODE_UINT bgid(const handle_t& h, const bdsg::HashGraph& hg) { + auto nid = hg.get_id(h); + return hg.get_is_reverse(h) ? (nid-1)*2+1 : (nid-1)*2; +} + +NODE_UINT bgid(size_t net_rank, bool is_reverse, bool is_source) { + // The diagram is: + // 1<-3 (reverse traversal: start_rev (source) <- end_rev (not source)) + // 0->2 (forward traversal: start_fwd (not source) -> end_fwd (source)) + return net_rank * 4 + ((is_source ^ is_reverse) ? 2 : 0) + (is_reverse ? 1 : 0); +} + +NODE_UINT rev_bgid(NODE_UINT n) { + return n ^ 1; +} + +std::ostream& operator<<(std::ostream& out, const CHOverlay& ov) { + out << "Vertices: " << num_vertices(ov) << ", Edges: " << num_edges(ov) << std::endl; + out << "--- Nodes ---" << std::endl; + for (auto v : boost::make_iterator_range(vertices(ov))) { + const NodeProp& np = ov[v]; + out << "Node " << v << ": seqlen=" << np.seqlen + << " max_out=" << np.max_out + << " contracted_neighbors=" << np.contracted_neighbors + << " level=" << np.level + << " arc_cover=" << np.arc_cover + << " contracted=" << (np.contracted ? "true" : "false") + // Skip new_id since it is not always initialized; it's only + // initialized when make_contraction_hierarchy is run. + << std::endl; + } + out << "--- Edges ---"; + for (auto e : boost::make_iterator_range(edges(ov))) { + const EdgeProp& ep = ov[e]; + out << std::endl << "Edge " << source(e, ov) << " -> " << target(e, ov) + << ": contracted=" << (ep.contracted ? "true" : "false") + << " weight=" << ep.weight + << " arc_cover=" << ep.arc_cover + << " ori=" << (ep.ori ? "true" : "false"); + // Make sure not to end with a newline. + } + return out; +} + +CHOverlay make_boost_graph(const bdsg::HashGraph& hg) { + NODE_UINT node_count = hg.get_node_count(); + CHOverlay g(node_count*2); + hg.for_each_edge([&](const edge_t& edge_h) { + auto& [h1, h2] = edge_h; + auto i1 = bgid(h1, hg), i2 = bgid(h2, hg); + add_edge(i1, i2, g); + if (i1 != rev_bgid(i2)) { + add_edge(rev_bgid(i2), rev_bgid(i1), g); + } + }); + + hg.for_each_handle([&](const handle_t& h) { + auto nid = bgid(h, hg); + // Initialize all the seqlen fields + g[nid].seqlen = hg.get_length(h); + g[rev_bgid(nid)].seqlen = g[nid].seqlen; + }); + + + return g; +} + +CHOverlay make_boost_graph(const SnarlDistanceIndex::TemporaryDistanceIndex& temp_index, const SnarlDistanceIndex::temp_record_ref_t& snarl_index, const SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record, const vector& all_children, const HandleGraph* hgraph) { + + // Every net graph element (start node at rank 0, end node at rank 1, each + // child nose/chain) needs to have 4 Boost graph nodes. We need separate + // representations for forward and reverse orientations, and within each + // orientation we need an in "port" and an out "port" so that we can draw the + // subgraphs describing internal reversals inside chains. We use the in ports + // to represent each element as a destination, and the out ports to represent + // each element as a source. + // + // We wouldn't really need all 4 vertices to represent node children, or the + // start and end, but we need O(1) mapping from net graph rank. + // + // TODO: Can we reassign the net graph ranks so all the snarls are first and + // store a number of snarl children to let us throw out a bunch of + // never-queried labels? + + size_t total_vertices = all_children.size() * 4; + +#ifdef debug_boost_graph + cerr << "=== make_boost_graph: Building net graph ===" << endl; + cerr << "Number of children: " << all_children.size() << endl; + cerr << "Allocating " << total_vertices << " Boost vertices" << endl; +#endif + + CHOverlay ov(total_vertices); + + // Maps inward-facing handle to Boost graph vertex ID. + // Doesn't include outward-facing handles. + unordered_map handle_bgnid_map; + +#ifdef debug_boost_graph + cerr << "--- Phase 1: Creating Boost vertices for each net graph child ---" << endl; +#endif + + for (auto& child : all_children) { + // Ignore index in all_children and use whether the ID matches the + // start/end or else use the stored rank to determine the child number. + + // Depending on the child type we need to load these different ways and some might be INF_INT. + // The start and end handles point forward, not inward. + size_t child_net_rank; + handle_t start_handle; + handle_t end_handle; + DIST_UINT start_end_distance; + DIST_UINT start_start_distance; + DIST_UINT end_end_distance; + + if (child.first == bdsg::SnarlDistanceIndex::TEMP_CHAIN) { + // This is a child chain + auto& record = temp_index.get_chain(child); + + // A child chain can never be the start or end boundary + child_net_rank = record.rank_in_parent; + + start_handle = hgraph->get_handle(record.start_node_id, record.start_node_rev); + end_handle = hgraph->get_handle(record.end_node_id, record.end_node_rev); + + // Fetch straight-through distance. + // Will be std::numeric_limits::max() if unconnected. + start_end_distance = demote_distance(record.min_length); + + // Fetch looping distances (thanks Xian!) + // If no loop is actually possible, the loop value will be std::numeric_limits::max() + if (!record.forward_loops.empty() && record.forward_loops[0] != std::numeric_limits::max()) { + // We know a chain always has a first child that's a node, so we can + // get the start node length. + auto& first_child = record.children.front(); + assert(first_child.first == bdsg::SnarlDistanceIndex::TEMP_NODE); + DIST_UINT start_node_length = demote_distance(temp_index.get_node(first_child).node_length); + // We know nothing can be infinite-distance here. + // TODO: Check for overflow? + start_start_distance = demote_distance(record.forward_loops[0]) + 2 * start_node_length; + } else { + start_start_distance = INF_INT; + } + if (!record.backward_loops.empty() && record.backward_loops.back() != std::numeric_limits::max()) { + // The end node length is already helpfully stored for us. + // // We know nothing can be infinite-distance here. + // TODO: Check for overflow? + end_end_distance = record.backward_loops.back() + 2 * demote_distance(record.end_node_length); + } else { + end_end_distance = INF_INT; + } + + if (record.reversed_in_parent) { + // Fix up everything so we're thinking of the orientation of the chain + // in its parent, rather than its local forward orientation. + auto temp = start_handle; + start_handle = hgraph->flip(end_handle); + end_handle = hgraph->flip(temp); + std::swap(start_start_distance, end_end_distance); + } + +#ifdef debug_boost_graph + cerr << "Child " << child_net_rank << " is CHAIN: start_node=" << record.start_node_id + << " (rev=" << record.start_node_rev << "), end_node=" << record.end_node_id + << " (rev=" << record.end_node_rev << "), min_length=" << record.min_length << endl; + cerr << " start_handle: id=" << hgraph->get_id(start_handle) << " rev=" << hgraph->get_is_reverse(start_handle) << endl; + cerr << " end_handle: id=" << hgraph->get_id(end_handle) << " rev=" << hgraph->get_is_reverse(end_handle) << endl; + cerr << " (reversed_in_parent=" << record.reversed_in_parent << ")" << endl; + cerr << " Boost vertices: " << bgid(child_net_rank, false, false) << " (start_fwd), " + << bgid(child_net_rank, true, true) << " (start_rev), " + << bgid(child_net_rank, false, true) << " (end_fwd), " + << bgid(child_net_rank, true, false) << " (end_rev)" << endl; +#endif + + } else if (child.first == bdsg::SnarlDistanceIndex::TEMP_NODE) { + // This is a child node + auto& record = temp_index.get_node(child); + + // The rank may need to be 0 or 1 if we are a start or end bound. + if (record.node_id == temp_snarl_record.start_node_id) { + // TODO: Don't we need to handle having the same node as a start and an end bound??? + child_net_rank = 0; + // Handles need to point along snarl + start_handle = hgraph->get_handle(temp_snarl_record.start_node_id, temp_snarl_record.start_node_rev); + } else if (record.node_id == temp_snarl_record.end_node_id) { + child_net_rank = 1; + // Handles need to point along snarl + start_handle = hgraph->get_handle(temp_snarl_record.end_node_id, temp_snarl_record.end_node_rev); + } else { + child_net_rank = record.rank_in_parent; + // Handle needs to represent the thing in the orientation we have it in in the snarl. + start_handle = hgraph->get_handle(record.node_id, record.reversed_in_parent); + } + + // Node is potentially reachable in both directions (though we only want to index one of these for bounds) + end_handle = start_handle; + + start_end_distance = record.node_length; + start_start_distance = INF_INT; + end_end_distance = INF_INT; + +#ifdef debug_boost_graph + cerr << "Child " << child_net_rank << " is NODE: node_id=" << record.node_id + << " (reversed_in_parent=" << record.reversed_in_parent << "), length=" << record.node_length << endl; + cerr << " id=" << hgraph->get_id(start_handle) << " rev=" << hgraph->get_is_reverse(start_handle) << endl; +#endif + } else { + throw std::runtime_error("unexpected rec_type: " + std::to_string(child.first)); + } + + // Initialize all the seqlen fields of the vertices to 0; we only use edge + // weights in this mode, but we're still responsible for them. + // TODO: Is it worth doing this just as a separate scan in order instead? + for (bool is_reverse : {false, true}) { + for (bool is_source : {false, true}) { + ov[bgid(child_net_rank, is_reverse, is_source)].seqlen = 0; + } + } + + // Map inward orientations of start and end handles + if (child_net_rank != 0) { + // We can arrive at the start of everything but our own start. + handle_bgnid_map[start_handle] = bgid(child_net_rank, false, false); + } + if (child_net_rank != 1) { + // We can arrive at the end of everything but our own end. + handle_bgnid_map[hgraph->flip(end_handle)] = bgid(child_net_rank, true, false); + } + +#ifdef debug_boost_graph + cerr << " Mapping start_handle (inward) -> Boost " << handle_bgnid_map[start_handle] << endl; + cerr << " Mapping flip(end_handle) (inward) -> Boost " << handle_bgnid_map[hgraph->flip(end_handle)] << endl; +#endif + + if (start_end_distance != INF_INT) { + // Add edges representing distance across chain + auto new_edge = add_edge(bgid(child_net_rank, false, false), bgid(child_net_rank, false, true), ov); + ov[new_edge.first].weight = start_end_distance; +#ifdef debug_boost_graph + cerr << " Edge " << bgid(child_net_rank, false, false) << " -> " << bgid(child_net_rank, false, true) << " (fwd traversal, weight=" << start_end_distance << ")" << endl; +#endif + + new_edge = add_edge(bgid(child_net_rank, true, false), bgid(child_net_rank, true, true), ov); + ov[new_edge.first].weight = start_end_distance; +#ifdef debug_boost_graph + cerr << " Edge " << bgid(child_net_rank, true, false) << " -> " << bgid(child_net_rank, true, true) << " (rev traversal, weight=" << start_end_distance << ")" << endl; +#endif + } + +#ifdef debug_boost_graph + cerr << " Loop distances: start_start=" << start_start_distance << ", end_end=" << end_end_distance << endl; +#endif + + if (end_end_distance != INF_INT) { + // Loops are edges between different orientations of the same endpoint + auto new_loop_edge = add_edge(bgid(child_net_rank, true, false), bgid(child_net_rank, false, true), ov); + ov[new_loop_edge.first].weight = end_end_distance; +#ifdef debug_boost_graph + cerr << " Edge " << bgid(child_net_rank, true, false) << " -> " << bgid(child_net_rank, false, true) << " (end loop, weight=" << end_end_distance << ")" << endl; +#endif + } + + if (start_start_distance != INF_INT) { + auto new_loop_edge = add_edge(bgid(child_net_rank, false, false), bgid(child_net_rank, true, true), ov); + ov[new_loop_edge.first].weight = start_start_distance; +#ifdef debug_boost_graph + cerr << " Edge " << bgid(child_net_rank, false, false) << " -> " << bgid(child_net_rank, true, true) << " (start loop, weight=" << start_start_distance << ")" << endl; +#endif + } + } + +#ifdef debug_boost_graph + cerr << "--- Phase 2: Adding edges between children based on handle graph edges ---" << endl; +#endif + + for (auto [handle_in, vertex_id_in] : handle_bgnid_map) { + // The map contains inward-facing orientations of every handle. + // So get the outward-facing version. + handle_t handle = hgraph->flip(handle_in); + NODE_UINT vertex_id = rev_bgid(vertex_id_in); + +#ifdef debug_boost_graph + cerr << " handle(id=" << hgraph->get_id(handle) << ", rev=" << hgraph->get_is_reverse(handle) << ") -> Boost " << vertex_id << endl; +#endif + + // We need to get all the edges off the right side of this outward-facing + // handle and create them if they don't already exist. + hgraph->follow_edges(handle, false, [&] (const handle_t& next) { + auto found = handle_bgnid_map.find(next); + if (found == handle_bgnid_map.end()) { + // We're looking outside our net graph, or have reached something + // not inward-facing (like across the inside of a chain). + // Don't add the edge. + return; + } + NODE_UINT next_id = found->second; + +#ifdef debug_boost_graph + cerr << " Connects to handle(id=" << hgraph->get_id(next) << ", rev=" << hgraph->get_is_reverse(next) << ") -> Boost " << next_id << endl; +#endif + + auto edge_info = edge(vertex_id, next_id, ov); + if (!edge_info.second) { +#ifdef debug_boost_graph + cerr << " Adding edge " << vertex_id << " -> " << next_id << endl; + cerr << " Adding reverse edge " << rev_bgid(next_id) << " -> " << rev_bgid(vertex_id) << endl; +#endif + add_edge(vertex_id, next_id, ov); + add_edge(rev_bgid(next_id), rev_bgid(vertex_id), ov); + } else { +#ifdef debug_boost_graph + cerr << " Edge already exists" << endl; +#endif + } + }); + } + + +#ifdef debug_boost_graph + cerr << "=== make_boost_graph complete ===" << endl; +#endif + return ov; +} + +/* Gets edge difference term for node priority calculation in contraction hierarchy build. + * See Geisberger et al. for more details: https://turing.iem.thm.de/routeplanning/hwy/contract.pdf + * Also updates the arc cover term for a node. + * Arc cover = H(node) term of Abraham et al: https://www.microsoft.com/en-us/research/wp-content/uploads/2011/05/hl-sea.pdf + */ +int edge_diff(ContractedGraph::vertex_descriptor nid, ContractedGraph& ch, CHOverlay& ov, vector& node_dists, int hop_limit = 2) { + auto [out_start, out_end] = out_edges(nid, ch); + auto [in_start, in_end] = in_edges(nid, ch); + + // Reset the arc_cover accumulator. arc_cover tracks how many original + // (pre-contraction) edges are represented by each shortcut + ov[nid].arc_cover = 0; + int eadd = 0; + + // For each in-neighbor V of U + //thanks https://theboostcpplibraries.com/boost.graph-vertices-and-edges for iteration code + std::for_each(in_start, in_end, [&](ContractedGraph::edge_descriptor eid) { + auto in_node = source(eid, ch); + DIST_UINT in_w = ch[eid].weight; + + // --- Witness search --- + // We need to find (approximately) how many shortcut edges will be required to preserve distances + // upon deleting U. + // First, we do Dijkstra at this in-neighbor V. + // Second, we check if the path V->U->W, where W is an out-neighbor of U + // has distance V->W, if so it is counted as a necessary shortcut. + std::priority_queue, vector>, greater>> q; + for (auto edge : boost::make_iterator_range(out_edges(in_node, ch))) { q.emplace(ch[edge].weight, target(edge, ch)); } + int num_iter = 0; + vector to_reset; + + // The furthest out-neighbor W we'd ever need a shortcut to is bounded by + // in_w + seqlen(U) + max_out(U), where max_out is the heaviest out-edge + // weight from U. Any Dijkstra path longer than this can't possibly be a + // witness for *any* V->U->W shortcut, so we can stop early. + DIST_UINT stop_dist = in_w + ov[nid].seqlen + ov[nid].max_out; + + //hop limit idea from https://turing.iem.thm.de/routeplanning/hwy/contract.pdf + // Explore paths from V, stop condition is any one of: + // 1) hop_limit steps (limit needed to avoid long runtimes) + // 2) reaching stop_dist + // 3) all nodes visited + while ((!q.empty()) && (num_iter < hop_limit)) { + auto [cur_dist, cur_node] = q.top(); + to_reset.push_back(cur_node); + if (cur_dist > stop_dist) { break; } + q.pop(); + + for (auto edge : boost::make_iterator_range(out_edges(cur_node, ch))) { + DIST_UINT new_dist = ch[edge].weight + cur_dist + ov[cur_node].seqlen; + auto t = target(edge,ch); + if (new_dist < node_dists[t]) { + q.emplace(new_dist, t); + node_dists[t] = new_dist; + } + } + num_iter += 1; + } + + // --- Shortcut necessity check --- + // For each out-neighbor W of U (i.e. edge U -> W exists), check whether + // the path V->U->W is necessary as a shortcut. + std::for_each(out_start, out_end, [&](ContractedGraph::edge_descriptor eid2) { + // Cost of the path through U: dist(V,U) + seqlen(U) + dist(U,W) + // If no witness path V->...->W was found with equal or shorter distance + // (node_dists[W] would reflect that), then this shortcut is required. + if (in_w+ch[nid].seqlen+ch[eid2].weight <= node_dists[target(eid2, ch)]) { + eadd += 1; + // Accumulate arc coverage: the shortcut "inherits" the coverage of + // both the incoming and outgoing original edges it replaces. + ov[nid].arc_cover += (ov[eid].arc_cover + ov[eid2].arc_cover); + } + }); + + // Reset node_dists for the next in-neighbor's search. + for (auto n: to_reset) { node_dists[n] = INF_INT; } + + while (!q.empty()) { node_dists[get<1>(q.top())] = INF_INT; q.pop(); } + }); + + // edges_removed = every in-edge + every out-edge of U (all get deleted on contraction). + int edel = out_degree(nid, ch) + in_degree(nid, ch); + int ediff = eadd - edel; + + return ediff; +} + +/* Contract node `nid` out of the graph by: + * 1. Running a "witness search" for each in-neighbor to find whether a + * shortcut edge is actually needed to preserve shortest paths. + * 2. Adding shortcut edges u → v for every (u → nid → v) pair where no + * alternative path exists. + * 3. Marking all edges incident to `nid` as contracted (so the filtered + * ContractedGraph view stops seeing them) and updating bookkeeping on + * neighbouring nodes. + * + * The key invariant of contraction hierarchies: after we remove nid from the + * graph, every shortest path that *used* to pass through nid must still be + * reachable via a direct shortcut edge, so that later distance queries never + * need to consider nid again. + */ +void contract(CHOverlay::vertex_descriptor nid, ContractedGraph& ch, CHOverlay& ov, vector& node_dists, vector& should_not_contract, int hop_limit = 2) { + auto [out_start, out_end] = out_edges(nid, ch); + auto [in_start, in_end] = in_edges(nid, ch); + + //TODO: this part is similar to edge_diff, refactor to eliminate redundancy? + //thanks https://theboostcpplibraries.com/boost.graph-vertices-and-edges for iteration code + std::for_each(in_start, in_end, [&](ContractedGraph::edge_descriptor eid) { + auto in_node = source(eid, ch); + if (in_node == nid) { return; } + DIST_UINT in_w = ch[eid].weight; + DIST_UINT stop_dist = in_w + ov[nid].seqlen + ov[nid].max_out; + + + std::priority_queue, vector>, greater>> q; + for (auto edge : boost::make_iterator_range(out_edges(in_node, ch))) { + q.emplace(ch[edge].weight, target(edge, ch)); + } + + int num_iter = 0; + vector to_reset; + while ((!q.empty()) && (num_iter < hop_limit)) { + auto [cur_dist, cur_node] = q.top(); + to_reset.push_back(cur_node); + if (cur_dist > stop_dist) { break; } + q.pop(); + + for (auto edge : boost::make_iterator_range(out_edges(cur_node, ch))) { + DIST_UINT new_dist = ch[edge].weight + cur_dist + ov[cur_node].seqlen; + auto t = target(edge,ch); + if (new_dist < node_dists[t]) { + q.emplace(new_dist, t); + node_dists[t] = new_dist; + } + } + + num_iter += 1; + } + + std::for_each(out_start, out_end, [&](ContractedGraph::edge_descriptor out_e) { + DIST_UINT new_len = in_w + ch[nid].seqlen + ch[out_e].weight; + auto out_node = target(out_e, ch); + if (new_len <= node_dists[out_node] && (out_node != nid)) { + auto edge_info = edge(in_node, out_node, ov); + if (!edge_info.second){ + auto new_edge = add_edge(in_node, out_node, ov).first; + ov[new_edge].weight = new_len; + ov[new_edge].arc_cover = ov[out_e].arc_cover + ov[eid].arc_cover; + ov[new_edge].ori = false; + ov[in_node].max_out = max(ov[in_node].max_out, new_len); + } else { + if (new_len < ov[edge_info.first].weight) { + ov[edge_info.first].weight = new_len; + ov[edge_info.first].arc_cover = ov[out_e].arc_cover + ov[eid].arc_cover; + ov[in_node].max_out = max(ov[in_node].max_out, new_len); + + } + ov[edge_info.first].contracted = false; + } + } + }); + + for (auto n: to_reset) { node_dists[n] = INF_INT; } + + while (!q.empty()) { node_dists[get<1>(q.top())] = INF_INT; q.pop(); } + }); + + // ── Mark nid's edges as contracted and update neighbour metadata ── + // + // The ContractedGraph filter uses the `contracted` flag to hide edges, so + // setting it here effectively removes nid from future witness searches and + // edge-difference calculations. + // + // contracted_neighbors and level are part of the priority formula used when + // choosing which node to contract next: nodes adjacent to many already- + // contracted nodes (or near high-level nodes) are deprioritised so we build + // a good hierarchy. + // + // should_not_contract[neighbour] = true prevents a neighbour from being + // contracted in the same "round", ensuring the independence heuristic that + // keeps the contraction order sensible. + std::for_each(in_start, in_end, [&](ContractedGraph::edge_descriptor eid) { + auto in_node = source(eid, ch); + ov[in_node].contracted_neighbors += 1; + ov[in_node].level = max(ov[in_node].level, ov[nid].level); + ch[eid].contracted = true; + should_not_contract[in_node] = true; + }); + + std::for_each(out_start, out_end, [&](ContractedGraph::edge_descriptor eid) { + auto out_node = target(eid, ch); + ov[out_node].contracted_neighbors += 1; + ov[out_node].level = max(ov[out_node].level, ov[nid].level); + ch[eid].contracted = true; + should_not_contract[out_node] = true; + }); + + ov[nid].contracted = true; +} + +/* Builds the contraction hierarchy and assigns the hub ordering. + * kinda does the staggered hop limit idea from https://www.microsoft.com/en-us/research/wp-content/uploads/2011/05/hl-sea.pdf + * but simpler (one hop limit for initial round(s), a higher one for most of the ones after those) + */ +void make_contraction_hierarchy(CHOverlay& ov) { +#ifdef debug_create + cerr << "starting degree: " << (double)num_edges(ov)/num_vertices(ov) << endl; +#endif + + //thanks https://stackoverflow.com/questions/53490593/boostget-with-boostfiltered-graph-on-adjacency-list-with-netsed-properties for filtered_graph code + auto contracted_filter = [&](CHOverlay::edge_descriptor eid) { return !(ov[eid].contracted); }; + + ContractedGraph contracted_g(ov, contracted_filter); + vector skip(num_vertices(ov), false); + int num_contractions = 0; + + vector node_dists(num_vertices(ov), INF_INT); + + // Do initial round(s). + // These round(s) do not use a priority queue. + // This helps speed things up as the initial number of nodes can be very large. + // A sufficiently 'stringy' graph should have about half of the nodes + // eliminated in the first round. + uint8_t num_starting_rounds = 1; + for (uint8_t rnd = 0; rnd < num_starting_rounds; rnd++) { + std::fill(skip.begin(), skip.end(), false); + + for (NODE_UINT i = 0; i < num_vertices(ov); i+=1) { + if (ov[i].contracted || skip[i]) { continue; } + int edif = edge_diff(i, contracted_g, ov, node_dists, 250); + + int min_priority = INF_INT; + + auto [out_start, out_end] = out_edges(i, contracted_g); + auto [in_start, in_end] = in_edges(i, contracted_g); + std::for_each(out_start, out_end, [&] (auto out_edge) { + auto neigh = target(out_edge, ov); + if (skip[neigh]) {return;} + int neigh_edge_difference = edge_diff(neigh, contracted_g, ov, node_dists, 250); + + int neigh_priority = (2*neigh_edge_difference) + (1*ov[neigh].contracted_neighbors) + (5*(ov[neigh].level+1)) + ov[neigh].arc_cover; + + if (neigh_priority < min_priority) { min_priority = neigh_priority; } + + }); + + std::for_each(in_start, in_end, [&] (auto in_edge) { + auto neigh = source(in_edge, ov); + if (skip[neigh]) {return;} + int neigh_edge_difference = edge_diff(neigh, contracted_g, ov, node_dists, 250); + + int neigh_priority = (2*neigh_edge_difference) + (1*ov[neigh].contracted_neighbors) + (5*(ov[neigh].level+1)) + ov[neigh].arc_cover; + + if (neigh_priority < min_priority) { min_priority = neigh_priority; } + + }); + + int priority = (2*edif) + (1*ov[i].contracted_neighbors) + (5*(ov[i].level+1)) + ov[i].arc_cover; + + if ((priority <= min_priority)) { + + std::for_each(out_start, out_end, [&] (auto out_edge) { + auto neigh = target(out_edge, ov); + skip[neigh] = true; + }); + + std::for_each(in_start, in_end, [&] (auto in_edge) { + auto neigh = source(in_edge, ov); + skip[neigh] = true; + }); + + } else { + skip[i] = true; + } + } + + for (auto i = 0u; i < num_vertices(ov); i+=1) { + + if (ov[i].contracted) { continue; } + if ((!skip[i])) { + ov[i].new_id = num_vertices(ov)-1-num_contractions; + contract(i, contracted_g, ov, node_dists, skip, 250); + skip[i] = true; + num_contractions += 1; + } + } + +#ifdef debug_create + cerr << "num contr: " << num_con << endl; + cerr << "after round " << rnd+1 << " degree: " << (double)num_edges(ov)/num_vertices(ov) << endl; +#endif + } + + +#ifdef debug_create + cerr << "left over: " << num_vertices(ov) - num_con << endl; +#endif + + // We maintain a priority queue that lets us find the smallest-priority item. + // + // We keep all but the last item heap-ified, and the smallest-priority item + // last, as our invariant. + vector> queue_objs; queue_objs.reserve(num_vertices(ov)/2); + for (int i = 0; i < num_vertices(ov); i+=1) { + if (ov[i].contracted) { continue; } + + int edge_difference = edge_diff(i, contracted_g, ov, node_dists, 1000); + + //priority formula based off that given in https://www.microsoft.com/en-us/research/wp-content/uploads/2011/05/hl-sea.pdf + int priority = (2*edge_difference) + (1*ov[i].contracted_neighbors) + (5*(ov[i].level+1)) + ov[i].arc_cover; + queue_objs.emplace_back(priority, i); + } + make_heap(queue_objs.begin(), queue_objs.end(), greater>()); + pop_heap(queue_objs.begin(), queue_objs.end(), greater>()); + + // Priority-queue-using hub order assignment. + // Stop when a certain number of nodes are left + // as they may take an enormous amount of time to finish otherwise. + uint8_t early_stop_threshold = 2; + while (queue_objs.size() > early_stop_threshold) { + auto [pri, node] = queue_objs.back(); queue_objs.pop_back(); + //preparing for next pop + pop_heap(queue_objs.begin(), queue_objs.end(), greater>()); + + int hop_limit = 1000; + int edge_difference = edge_diff(node, contracted_g, ov, node_dists, hop_limit); + + int recomputed_priority = ((2*edge_difference)+ (1*ov[node].contracted_neighbors)) + (5*(ov[node].level+1)) + ov[node].arc_cover; + + if (recomputed_priority > get<0>(queue_objs.back())) { + // After recomputing priority, the priority is actually greater than the next-lowest-priority entry. + // Put this back so we can get that one instead. + // First we need to put what's the current last item back in its proper place. + push_heap(queue_objs.begin(), queue_objs.end(), greater>()); + // Then we put this item back and heapify everything + queue_objs.emplace_back(recomputed_priority, node); + push_heap(queue_objs.begin(), queue_objs.end(), greater>()); + // Then we find the new smallest-priority item. + pop_heap(queue_objs.begin(), queue_objs.end(), greater>()); + continue; + } + ov[node].level += 1; + +#ifdef debug_create + if (queue_objs.size() % 100 == 1) { + cerr << "remaining: " << queue_objs.size() << ", deg: " << (double)num_edges(ov)/num_vertices(ov) << endl; + cerr << "lv: " << ov[node].level << endl; + } +#endif + + + ov[node].new_id = num_vertices(ov)-1-num_contractions; + contract(node, contracted_g, ov, node_dists, skip, hop_limit); num_contractions += 1; + } + + // Pop the remaining nodes off the queue, assign hub ordering accordingly. + // So long as these are few in number, performance should not be hurt too badly. + while (!queue_objs.empty()) { + auto [pri, node] = queue_objs.back(); queue_objs.pop_back(); + //preparing for next pop + pop_heap(queue_objs.begin(), queue_objs.end(), greater>()); + + ov[node].new_id = num_vertices(ov)-1-num_contractions; + num_contractions += 1; + } + + auto ori_filter = [&](CHOverlay::edge_descriptor eid) { return !(ov[eid].ori); }; + remove_edge_if(ori_filter, ov); +} + +DIST_UINT binary_intersection_ch(vector& v1, vector& v2) { + vector& key_vec = v1.size() < v2.size() ? v1 : v2; + vector& search_vec = v1.size() < v2.size() ? v2 : v1; + + auto start_itr = search_vec.begin(); + auto end_itr = search_vec.end(); + DIST_UINT min_dist = INF_INT; + for (auto k: key_vec) { + start_itr = lower_bound(start_itr, end_itr, k); + if (start_itr == end_itr) { + return min_dist; + } + if (start_itr->hub == k.hub) { + DIST_UINT d = start_itr->dist + k.dist; + min_dist = min(min_dist, d); + } + } + return min_dist; +} + + +void down_dijk(int node, CHOverlay& ov, vector& node_dists, vector>& labels, vector>& labels_back) { + auto in_node = node; + + std::priority_queue, vector>, greater>> q; + + for (auto edge : boost::make_iterator_range(out_edges(in_node, ov))) { + auto t = target(edge, ov); + + if (!ov[edge].ori) { return; } + q.emplace(ov[edge].weight, t); + node_dists[t] = ov[edge].weight; + } + + vector to_reset; + + while ((!q.empty())) { + auto [cur_dist, cur_node] = q.top(); q.pop(); + to_reset.push_back(cur_node); + + if (node_dists[cur_node] < cur_dist) { continue; } + DIST_UINT check_dist = binary_intersection_ch(labels_back[cur_node], labels[node]); + + if (check_dist <= cur_dist) {continue;} + labels_back[cur_node].emplace_back(ov[node].new_id, cur_dist); + + + for (auto edge : boost::make_iterator_range(out_edges(cur_node, ov))) { + auto t = target(edge, ov); + + if (!ov[edge].ori) { return; } + DIST_UINT new_dist = ov[edge].weight + cur_dist + ov[cur_node].seqlen; + if (new_dist < node_dists[t]) { + q.emplace(new_dist, t); + node_dists[t] = new_dist; + } + } + } + + node_dists[node] = INF_INT; + for (auto n: to_reset) { node_dists[n] = INF_INT; } + + while (!q.empty()) { node_dists[get<1>(q.top())] = INF_INT; q.pop(); } +} + +void down_dijk_back(int node, CHOverlay& ov, vector& node_dists, vector>& labels, vector>& labels_back) { + auto in_node = node; + labels[node].emplace_back(ov[node].new_id, 0); + node_dists[node] = 0; + + //std::priority_queue, vector>, greater>> q; + vector> q; if (ov[node].new_id < 100) { q.reserve(num_vertices(ov)/2); } + + for (auto edge : boost::make_iterator_range(in_edges(in_node, ov))) { + auto s = source(edge, ov); + + if (!ov[edge].ori) { return; } + q.emplace_back(ov[edge].weight, s); + node_dists[s] = ov[edge].weight; + } + make_heap(q.begin(), q.end(), greater>()); + + vector to_reset; if (ov[node].new_id < 100) { to_reset.reserve(num_vertices(ov)/2); } + while ((!q.empty())) { + pop_heap(q.begin(), q.end(), greater>()); + auto [cur_dist, cur_node] = q.back(); q.pop_back(); + + to_reset.push_back(cur_node); + if (node_dists[cur_node] < cur_dist) { + continue; + } + + DIST_UINT check_dist = binary_intersection_ch(labels[cur_node], labels_back[node]); + if (check_dist <= cur_dist) {continue;} + labels[cur_node].emplace_back(ov[node].new_id, cur_dist+ov[node].seqlen); + + for (auto edge : boost::make_iterator_range(in_edges(cur_node, ov))) { + auto t = source(edge, ov); + + if (!ov[edge].ori) { return; } + DIST_UINT new_dist = ov[edge].weight + cur_dist + ov[cur_node].seqlen; + if (new_dist < node_dists[t]) { + q.emplace_back(new_dist, t); + push_heap(q.begin(), q.end(), greater>()); + node_dists[t] = new_dist; + } + } + + } + + node_dists[node] = INF_INT; + for (auto n: to_reset) { node_dists[n] = INF_INT; } + + for (auto& t: q) { node_dists[get<1>(t)] = INF_INT; } + q.clear(); +} + +void test_dijk(int node, CHOverlay& ov, vector& node_dists, vector>& labels, vector>& labels_back) { + auto in_node = node; //node_dists[node] = 0; + + std::priority_queue, vector>, greater>> q; + for (auto edge : boost::make_iterator_range(out_edges(in_node, ov))) { + auto t = target(edge, ov); + + if (!ov[edge].ori) { return; } + q.emplace(ov[edge].weight, t); + node_dists[t] = ov[edge].weight; + } + + vector to_reset; + + while ((!q.empty())) { + auto [cur_dist, cur_node] = q.top(); q.pop(); + to_reset.push_back(cur_node); + + if (cur_dist > node_dists[cur_node]) { continue; } + + for (auto edge : boost::make_iterator_range(out_edges(cur_node, ov))) { + auto t = target(edge, ov); + + if (!ov[edge].ori) { return; } + DIST_UINT new_dist = ov[edge].weight + cur_dist + ov[cur_node].seqlen; + if (new_dist < node_dists[t]) { + q.emplace(new_dist, t); + node_dists[t] = new_dist; + } + } + } + + for (int cur_node = 0; cur_node < static_cast(num_vertices(ov)); cur_node++) { + + DIST_UINT check_dist = binary_intersection_ch(labels_back[cur_node], labels[node]); + if (cur_node == node) { + check_dist = min(check_dist, labels_back[cur_node].back().dist + labels[node].back().dist); + } + + if (check_dist != node_dists[cur_node]) { + cerr << "node " << cur_node << " mismatch: " << check_dist << ", actual: " << node_dists[cur_node] << endl; + } + } + + node_dists[node] = INF_INT; + for (auto n: to_reset) { node_dists[n] = INF_INT; } + + while (!q.empty()) { node_dists[get<1>(q.top())] = INF_INT; q.pop(); } +} + +void test_dijk_back(int node, CHOverlay& ov, vector& node_dists, vector>& labels, vector>& labels_back) { + auto in_node = node; + + std::priority_queue, vector>, greater>> q; + for (auto edge : boost::make_iterator_range(in_edges(in_node, ov))) { + auto s = source(edge, ov); + + if (!ov[edge].ori) { return; } + q.emplace(ov[edge].weight, s); + node_dists[s] = ov[edge].weight; + } + + vector to_reset; + + while ((!q.empty())) { + auto [cur_dist, cur_node] = q.top(); q.pop(); + to_reset.push_back(cur_node); + + if (cur_dist > node_dists[cur_node]) { continue; } + + for (auto edge : boost::make_iterator_range(in_edges(cur_node, ov))) { + auto s = source(edge, ov); + //if (ov[t].new_id <= ov[node].new_id) { return; } + if (!ov[edge].ori) { return; } + DIST_UINT new_dist = ov[edge].weight + cur_dist + ov[cur_node].seqlen; + if (new_dist < node_dists[s]) { + q.emplace(new_dist, s); + node_dists[s] = new_dist; + } + } + } + for (auto cur_node = 0u; cur_node < num_vertices(ov); cur_node++) { + DIST_UINT check_dist = binary_intersection_ch(labels[cur_node], labels_back[node]); + if (cur_node == node) { + check_dist = min(check_dist, labels[cur_node].back().dist + labels_back[node].back().dist); + } + + if (check_dist != node_dists[cur_node]) { + cerr << "node " << cur_node << " mismatch: " << check_dist << ", actual: " << node_dists[cur_node] << endl; + } + + } + + node_dists[node] = INF_INT; + for (auto n: to_reset) { node_dists[n] = INF_INT; } + + while (!q.empty()) { node_dists[get<1>(q.top())] = INF_INT; q.pop(); } +} + +void create_labels(vector>& labels, vector>& labels_back, CHOverlay& ov) { +#ifdef debug_create + cerr << "start create labels" << endl; +#endif + vector node_dists(num_vertices(ov), INF_INT); + vector v; v.resize(num_vertices(ov)); + for (auto i = 0u; i < num_vertices(ov); i++) { + v[ov[i].new_id] = i; + } + + for (auto j = 0u; j < num_vertices(ov); j++) { + +#ifdef debug_create + cerr << j << "th node, " << v[j] << endl; +#endif + + down_dijk_back(v[j], ov, node_dists, labels, labels_back); + + down_dijk(v[j], ov, node_dists, labels, labels_back); + } +} + +vector pack_labels(const vector>& labels, const vector>& labels_back) { + auto label_count = labels.size(); + //label_count+1 is so we can look at the next offset to determine end of range + size_t flat_vec_size = 2*(label_count+1)+1; + vector flat_label_vec; + flat_label_vec.resize(flat_vec_size); + flat_label_vec[0] = label_count; + + //fill in fwd offsets + size_t current_offset = flat_vec_size; + for (size_t i = 0; i < labels.size(); i++) { + auto& label = labels[i]; + size_t label_size = label.size(); + flat_label_vec[i+1] = current_offset; + current_offset += label_size; + + flat_vec_size += (label_size*2); + } + flat_label_vec[label_count+1] = current_offset; + + //fill in back offsets + current_offset = flat_vec_size; + for (size_t i = 0; i < labels_back.size(); i++) { + auto& label = labels_back[i]; + size_t label_size = label.size(); + flat_label_vec[i+1+label_count+1] = current_offset; + current_offset += label_size; + + flat_vec_size += (label_size*2); + } + flat_label_vec[(label_count+1)+label_count+1] = current_offset; + + flat_label_vec.resize(flat_vec_size); + + //copy forward label information into flat_label_vec + size_t jump_to_dist = flat_label_vec[label_count+1] - flat_label_vec[1]; + for (size_t i = 0; i < labels.size(); i++) { + auto& label = labels[i]; + size_t hub_offset = flat_label_vec[i+1]; + for (size_t j = 0; j < label.size(); j++) { + flat_label_vec[hub_offset+j] = label[j].hub; + flat_label_vec[hub_offset+j+jump_to_dist] = label[j].dist; + } + } + + //copy forward label information into flat_label_vec + jump_to_dist = flat_label_vec[(label_count+1)+label_count+1] - flat_label_vec[label_count+1+1]; + for (size_t i = 0; i < labels_back.size(); i++) { + auto& label = labels_back[i]; + size_t hub_offset = flat_label_vec[1+label_count+1+i]; + for (size_t j = 0; j < label.size(); j++) { + flat_label_vec[hub_offset+j] = label[j].hub; + flat_label_vec[hub_offset+j+jump_to_dist] = label[j].dist; + } + } + + return flat_label_vec; +} + +void write_to_csv(CHOverlay& ov, string out_path) { + ofstream out(out_path); + out << "source,target" << endl; + auto [edge_start, edge_end] = edges(ov); + + std::for_each(edge_start, edge_end, [&](const auto& e) { + out << source(e, ov) << "," << target(e, ov) << endl; + }); +} + +void write_to_gr(CHOverlay& ov, string out_path) { + ofstream out(out_path); + stringstream out_sstr; + out_sstr << "p tww " << num_vertices(ov) << " " << num_edges(ov) << endl; + auto [edge_start, edge_end] = edges(ov); + + std::for_each(edge_start, edge_end, [&](const auto& e) { + out_sstr << source(e, ov)+1 << " " << target(e, ov)+1 << endl; + }); + out << out_sstr.str(); +} + +vector read_node_order(string in_path) { + vector ordered_nodes; + ifstream in(in_path); + string node_string; + while (getline(in, node_string)) { + ordered_nodes.push_back(stoi(node_string)); + } + + return ordered_nodes; +} +} diff --git a/bdsg/src/snarl_distance_index.cpp b/bdsg/src/snarl_distance_index.cpp index 9a27592d..42df7fb4 100644 --- a/bdsg/src/snarl_distance_index.cpp +++ b/bdsg/src/snarl_distance_index.cpp @@ -1,9 +1,11 @@ //#define debug_distance_indexing //#define debug_snarl_traversal //#define debug_distances +//#define debug_parent //#define debug_distance_paths #include "bdsg/snarl_distance_index.hpp" +#include "bdsg/ch.hpp" #include #include @@ -11,6 +13,25 @@ using namespace std; using namespace handlegraph; namespace bdsg { +const vector SnarlDistanceIndex::record_t_as_string = { + "ROOT", "NODE", "DISTANCED_NODE", + "TRIVIAL_SNARL", "DISTANCED_TRIVIAL_SNARL", + "SIMPLE_SNARL", "DISTANCED_SIMPLE_SNARL", + "REGULAR_SNARL", "DISTANCED_REGULAR_SNARL", "OVERSIZED_REGULAR_SNARL", + "SNARL", "DISTANCED_SNARL", "OVERSIZED_SNARL", + "ROOT_SNARL", "DISTANCED_ROOT_SNARL", + "CHAIN", "DISTANCED_CHAIN", "MULTICOMPONENT_CHAIN", + "CHILDREN" +}; +const vector SnarlDistanceIndex::connectivity_t_as_string = { + "START_START", "START_END", "START_TIP", + "END_START", "END_END", "END_TIP", + "TIP_START", "TIP_END", "TIP_TIP" +}; +const vector SnarlDistanceIndex::net_handle_record_t_string = { + "ROOT_HANDLE", "NODE_HANDLE", "SNARL_HANDLE", + "CHAIN_HANDLE", "SENTINEL_HANDLE" +}; /////////////////////////////////////////////////////////////////////////////////////////////////// //Constructor @@ -28,26 +49,31 @@ SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryDistanceIndex(){} SnarlDistanceIndex::TemporaryDistanceIndex::~TemporaryDistanceIndex(){} -string SnarlDistanceIndex::TemporaryDistanceIndex::structure_start_end_as_string(pair index) const { - if (index.first == TEMP_NODE) { - assert(index.second == temp_node_records[index.second-min_node_id].node_id); - return "node " + std::to_string(temp_node_records[index.second-min_node_id].node_id); - } else if (index.first == TEMP_SNARL) { - const TemporarySnarlRecord& temp_snarl_record = temp_snarl_records[index.second]; - return "snarl " + std::to_string( temp_snarl_record.start_node_id) - + (temp_snarl_record.start_node_rev ? " rev" : " fd") - + " -> " + std::to_string( temp_snarl_record.end_node_id) - + (temp_snarl_record.end_node_rev ? " rev" : " fd"); - } else if (index.first == TEMP_CHAIN) { - const TemporaryChainRecord& temp_chain_record = temp_chain_records[index.second]; - return "chain " + std::to_string( temp_chain_record.start_node_id) - + (temp_chain_record.start_node_rev ? " rev" : " fd") - + " -> " + std::to_string( temp_chain_record.end_node_id) - + (temp_chain_record.end_node_rev ? " rev" : " fd"); - } else if (index.first == TEMP_ROOT) { - return (string) "root"; - } else { - return (string)"???" + std::to_string(index.first) + "???"; +string SnarlDistanceIndex::TemporaryDistanceIndex::structure_start_end_as_string(temp_record_ref_t index) const { + try { + if (index.first == TEMP_NODE) { + const TemporaryNodeRecord& temp_node_record = get_node(index); + assert(index.second == temp_node_record.node_id); + return "node " + std::to_string(temp_node_record.node_id); + } else if (index.first == TEMP_SNARL) { + const TemporarySnarlRecord& temp_snarl_record = get_snarl(index); + return "snarl " + std::to_string(temp_snarl_record.start_node_id) + + (temp_snarl_record.start_node_rev ? " rev" : " fd") + + " -> " + std::to_string(temp_snarl_record.end_node_id) + + (temp_snarl_record.end_node_rev ? " rev" : " fd"); + } else if (index.first == TEMP_CHAIN) { + const TemporaryChainRecord& temp_chain_record = get_chain(index); + return "chain " + std::to_string(temp_chain_record.start_node_id) + + (temp_chain_record.start_node_rev ? " rev" : " fd") + + " -> " + std::to_string(temp_chain_record.end_node_id) + + (temp_chain_record.end_node_rev ? " rev" : " fd"); + } else if (index.first == TEMP_ROOT) { + return (string) "root"; + } else { + return (string)"???" + std::to_string(index.first) + "???"; + } + } catch (std::out_of_range& e) { + throw std::out_of_range("Unable to look up (" + std::to_string(index.first) + ", " + std::to_string(index.second) + ") in temporary distance index due to out of range error: " + e.what()); } } //The max record length of this chain @@ -60,7 +86,7 @@ size_t SnarlDistanceIndex::TemporaryDistanceIndex::TemporaryChainRecord::get_max size_t last_node_count = 0; // How many nodes have we seen in a row? size_t nontrivial_snarl_count = 0; size_t total_node_count = 0; - for (const pair& child : children) { + for (const temp_record_ref_t& child : children) { if (child.first == TEMP_NODE) { if (total_node_count==0 || child != children.front()) { //If this is the last node in the chain, don't do anything @@ -100,9 +126,13 @@ size_t SnarlDistanceIndex::TemporaryDistanceIndex::TemporarySnarlRecord::get_max return SimpleSnarlRecord::record_size(node_count, include_distances); } else { if (parent.first == TEMP_ROOT) { - return SnarlRecord::record_size(include_distances ? DISTANCED_ROOT_SNARL : ROOT_SNARL, node_count) + node_count; + //TODO: Why is node_count being added? + return SnarlRecord::record_size(encode_root_snarl(include_distances), node_count, 0) + node_count; + } else if (!(hub_labels.empty())) { + return SnarlRecord::record_size(encode_nonroot_nonsimple_snarl(true, is_regular, true), node_count, hub_labels.size()); } else { - return SnarlRecord::record_size(include_distances ? DISTANCED_SNARL : SNARL, node_count) + node_count; + //TODO: Why is node_count being added? + return SnarlRecord::record_size(encode_nonroot_nonsimple_snarl(include_distances, is_regular, false), node_count, 0) + node_count; } } } @@ -128,8 +158,7 @@ bool SnarlDistanceIndex::is_root(const net_handle_t& net) const { bool SnarlDistanceIndex::is_root_snarl(const net_handle_t& net) const { #ifdef debug_distances if (get_handle_type(net) == ROOT_HANDLE && get_record_offset(net) != 0) { - assert(SnarlTreeRecord(net, &snarl_tree_records).get_record_type() == ROOT_SNARL || - SnarlTreeRecord(net, &snarl_tree_records).get_record_type() == DISTANCED_ROOT_SNARL); + assert(is_root_snarl(SnarlTreeRecord(net, &snarl_tree_records).get_record_type())); } #endif @@ -141,8 +170,7 @@ bool SnarlDistanceIndex::is_snarl(const net_handle_t& net) const { #ifdef debug_distances if(get_handle_type(net) == SNARL_HANDLE){ assert(SnarlTreeRecord(net, &snarl_tree_records).get_record_handle_type() == SNARL_HANDLE || - SnarlTreeRecord(net, &snarl_tree_records).get_record_type() == ROOT_SNARL || - SnarlTreeRecord(net, &snarl_tree_records).get_record_type() == DISTANCED_ROOT_SNARL); + is_root_snarl(SnarlTreeRecord(net, &snarl_tree_records).get_record_type()); assert(get_node_record_offset(net) == 0 || get_node_record_offset(net) == 1); } #endif @@ -150,19 +178,20 @@ if(get_handle_type(net) == SNARL_HANDLE){ } bool SnarlDistanceIndex::is_oversized_snarl(const net_handle_t& net) const { - return SnarlTreeRecord(net, &snarl_tree_records).get_record_type() == OVERSIZED_SNARL; + return is_oversized_snarl(SnarlTreeRecord(net, &snarl_tree_records).get_record_type()); } bool SnarlDistanceIndex::is_dag(const net_handle_t& snarl) const { record_t record_type = SnarlTreeRecord(snarl, &snarl_tree_records).get_record_type(); - if ( record_type == SNARL || record_type == ROOT_SNARL ) { + if ( is_nontrivial_snarl(record_type) && !has_distances(record_type) ) { //If this is a snarl but didn't store distances cerr << "warning: checking if a snarl is a dag in an index without distances. Returning true" << endl; return true; - } else if (record_type == DISTANCED_SNARL || record_type == OVERSIZED_SNARL || record_type == DISTANCED_ROOT_SNARL) { + } else if (is_nonsimple_snarl(record_type)) { //If this is any kind of non-simple snarl + //(We already ruled out not having distances) - if (record_type != DISTANCED_ROOT_SNARL) { + if (!is_root_snarl(record_type)) { //If there were boundary nodes, check for loops on the bounds //The bounds of the snarl facing in @@ -203,149 +232,21 @@ bool SnarlDistanceIndex::is_simple_snarl(const net_handle_t& net) const { #ifdef debug_distances if(get_handle_type(net) == SNARL_HANDLE){ assert(SnarlTreeRecord(net, &snarl_tree_records).get_record_handle_type() == SNARL_HANDLE || - SnarlTreeRecord(net, &snarl_tree_records).get_record_type() == ROOT_SNARL || - SnarlTreeRecord(net, &snarl_tree_records).get_record_type() == DISTANCED_ROOT_SNARL); + is_root_snarl(SnarlTreeRecord(net, &snarl_tree_records).get_record_type())); } #endif return get_handle_type(net) == SNARL_HANDLE && get_node_record_offset(net) == 1; } -bool SnarlDistanceIndex::is_regular_snarl(const net_handle_t& net, bool allow_internal_loops, const handlegraph::HandleGraph* graph) const { +bool SnarlDistanceIndex::is_regular_snarl(const net_handle_t& net) const { #ifdef debug_distances if(get_handle_type(net) == SNARL_HANDLE){ assert(SnarlTreeRecord(net, &snarl_tree_records).get_record_handle_type() == SNARL_HANDLE || - SnarlTreeRecord(net, &snarl_tree_records).get_record_type() == ROOT_SNARL || - SnarlTreeRecord(net, &snarl_tree_records).get_record_type() == DISTANCED_ROOT_SNARL); + is_root_snarl(SnarlTreeRecord(net, &snarl_tree_records).get_record_type())); } #endif - - // Helper function to check if an edge exists based on the distance in the distance index - // Depends on if we allow internal distances or not - auto has_edge = [&] (const net_handle_t& n1, const net_handle_t& n2) { - size_t dist = distance_in_parent(net, n1, n2); - if (allow_internal_loops) { - // If we allow internal loops, then we only check if the edge in the snarl netgraph exists - return dist == 0; - } else { - // If we don't allow internal loops, then check the distance itself - return dist != std::numeric_limits::max(); - } - }; - - record_t record_type = SnarlTreeRecord(net, &snarl_tree_records).get_record_type(); - if (record_type == ROOT_SNARL || record_type == DISTANCED_ROOT_SNARL) { - // Root snarls are not regular - return false; - } else if (record_type == SIMPLE_SNARL || record_type == DISTANCED_SIMPLE_SNARL) { - // All simple snarls are regular - return true; - } - if ((record_type == SNARL || record_type == OVERSIZED_SNARL) && graph == nullptr) { - throw runtime_error("error: is_regular_snarl requires a graph if the distance index doesn't contain distances"); - } - if (record_type == SNARL && !allow_internal_loops) { - throw runtime_error("error: is_regular_snarl requires distances in the distance index to verify that there are no internal loops"); - } - - //If there is any edge from the boundary nodes to themselves, then it cannot be regular - // How we check this depends on if we have distances or not - net_handle_t start_in = get_bound(net, false, true); - net_handle_t end_in = get_bound(net, true, true); - if (record_type == DISTANCED_SNARL || record_type == OVERSIZED_SNARL) { - if (has_edge(start_in, start_in) || - has_edge(end_in, end_in)) { - return false; - } - } else if (record_type != DISTANCED_SNARL && record_type != OVERSIZED_SNARL) { - if (graph->has_edge( get_handle(flip(start_in), graph), get_handle(flip(start_in), graph)) || - graph->has_edge( get_handle(flip(end_in), graph), get_handle(flip(end_in), graph))) { - return false; - } - } - bool is_regular = true; - - - for_each_child(net, [&](const net_handle_t& child) { - //If there isn't a path through the snarl that passes through the child - //or there's an extra path through the child then it is irregular - - // Graph handles for the left/right sides of the child, filled in if necessary - handlegraph::handle_t child_start_in; - handlegraph::handle_t child_end_in; - - // First check that each child is connected to the two bounds by one possible traversal - bool start_right; - bool start_left; - bool end_right; - bool end_left; - - if (record_type == DISTANCED_SNARL || record_type == OVERSIZED_SNARL) { - // If the distance index has distances, then check the distances - start_right = has_edge(start_in, child); - start_left = has_edge(start_in, flip(child)); - end_right = has_edge(end_in, child); - end_left = has_edge(end_in, flip(child)); - } else { - // If the snarl doesn't store distances then check the edges in the graph - child_start_in = is_node(child) ? get_handle(child, graph) : get_handle(get_bound(child, false, true), graph); - child_end_in = is_node(child) ? get_handle(flip(child), graph) : get_handle(get_bound(child, true, true), graph); - start_left = graph->has_edge(get_handle(start_in, graph), child_start_in); - start_right = graph->has_edge(get_handle(start_in, graph), child_end_in); - end_left = graph->has_edge(get_handle(end_in, graph), child_start_in); - end_right = graph->has_edge(get_handle(end_in, graph), child_end_in); - } - - if (start_right && end_left) { - if (start_left || end_right) { - is_regular = false; - return false; - } - } else if (start_left && end_right) { - if (start_right || end_left) { - is_regular = false; - return false; - } - } else { - //There wasn't a path through this node so it is irregular - is_regular = false; - return false; - } - - - //Next, if there is an edge to any other child, then it is irregular - for_each_child(net, [&](const net_handle_t& child2) { - if (record_type == DISTANCED_SNARL) { - if (has_edge(child, child2) || - has_edge(child, flip(child2)) || - has_edge(flip(child), child2) || - has_edge(flip(child), flip(child2))) { - is_regular = false; - return false; - } - //Return true to continue traversing - return true; - } else { - - //This may not have been filled in for an oversized snarl - child_start_in = is_node(child) ? get_handle(child, graph) : get_handle(get_bound(child, false, true), graph); - child_end_in = is_node(child) ? get_handle(flip(child), graph) : get_handle(get_bound(child, true, true), graph); - handlegraph::handle_t child2_start_in = is_node(child2) ? get_handle(child2, graph) : get_handle(get_bound(child2, false, true), graph); - handlegraph::handle_t child2_end_in = is_node(child2) ? get_handle(flip(child2), graph) : get_handle(get_bound(child2, true, true), graph); - if (graph->has_edge(child_start_in, child2_start_in) || - graph->has_edge(child_start_in, child2_end_in) || - graph->has_edge(child_end_in, child2_start_in) || - graph->has_edge(child_end_in, child2_end_in)) { - is_regular = false; - } - return false; - } - }); - - //Return true to continue traversing - return true; - }); - return is_regular; + return is_regular_snarl(record_type); } bool SnarlDistanceIndex::is_chain(const net_handle_t& net) const { @@ -353,8 +254,7 @@ bool SnarlDistanceIndex::is_chain(const net_handle_t& net) const { if (get_handle_type(net) ==CHAIN_HANDLE) { assert(SnarlTreeRecord(net, &snarl_tree_records).get_record_handle_type() == CHAIN_HANDLE || SnarlTreeRecord(net, &snarl_tree_records).get_record_handle_type() == NODE_HANDLE || - SnarlTreeRecord(net, &snarl_tree_records).get_record_type() == SIMPLE_SNARL || - SnarlTreeRecord(net, &snarl_tree_records).get_record_type() == DISTANCED_SIMPLE_SNARL); + is_simple_snarl(SnarlTreeRecord(net, &snarl_tree_records).get_record_type()); } #endif return get_handle_type(net) == CHAIN_HANDLE; @@ -364,8 +264,7 @@ bool SnarlDistanceIndex::is_multicomponent_chain(const net_handle_t& net) const if (get_handle_type(net) ==CHAIN_HANDLE) { assert(SnarlTreeRecord(net, &snarl_tree_records).get_record_handle_type() == CHAIN_HANDLE || SnarlTreeRecord(net, &snarl_tree_records).get_record_handle_type() == NODE_HANDLE || - SnarlTreeRecord(net, &snarl_tree_records).get_record_type() == SIMPLE_SNARL || - SnarlTreeRecord(net, &snarl_tree_records).get_record_type() == DISTANCED_SIMPLE_SNARL); + is_simple_snarl(SnarlTreeRecord(net, &snarl_tree_records).get_record_type())); } #endif return get_handle_type(net) == CHAIN_HANDLE @@ -402,8 +301,7 @@ bool SnarlDistanceIndex::is_ordered_in_chain(const net_handle_t& child1, const n bool SnarlDistanceIndex::is_trivial_chain(const net_handle_t& net) const { bool handle_is_chain =get_handle_type(net) == CHAIN_HANDLE; bool record_is_node = SnarlTreeRecord(net, &snarl_tree_records).get_record_handle_type() == NODE_HANDLE; - bool record_is_simple_snarl = SnarlTreeRecord(net, &snarl_tree_records).get_record_type() == SIMPLE_SNARL || - SnarlTreeRecord(net, &snarl_tree_records).get_record_type() == DISTANCED_SIMPLE_SNARL ; + bool record_is_simple_snarl = is_simple_snarl(SnarlTreeRecord(net, &snarl_tree_records).get_record_type()); bool handle_has_node_offset = get_node_record_offset(net) >= 2; return handle_is_chain && (record_is_node @@ -414,8 +312,7 @@ bool SnarlDistanceIndex::is_node(const net_handle_t& net) const { #ifdef debug_distances if(get_handle_type(net) == NODE_HANDLE){ assert( SnarlTreeRecord(net, &snarl_tree_records).get_record_handle_type() == NODE_HANDLE - || SnarlTreeRecord(net, &snarl_tree_records).get_record_type() == SIMPLE_SNARL - || SnarlTreeRecord(net, &snarl_tree_records).get_record_type() == DISTANCED_SIMPLE_SNARL ); + || is_simple_snarl(SnarlTreeRecord(net, &snarl_tree_records).get_record_type())); } #endif return get_handle_type(net) == NODE_HANDLE; @@ -424,8 +321,7 @@ bool SnarlDistanceIndex::is_sentinel(const net_handle_t& net) const { #ifdef debug_distances if(get_handle_type(net) == SENTINEL_HANDLE){ assert(SnarlTreeRecord(net, &snarl_tree_records).get_record_handle_type() == SNARL_HANDLE - || SnarlTreeRecord(net, &snarl_tree_records).get_record_type() == SIMPLE_SNARL - || SnarlTreeRecord(net, &snarl_tree_records).get_record_type() == DISTANCED_SIMPLE_SNARL); + || is_simple_snarl(SnarlTreeRecord(net, &snarl_tree_records).get_record_type())); } #endif return get_handle_type(net) == SENTINEL_HANDLE; @@ -439,7 +335,10 @@ net_handle_t SnarlDistanceIndex::get_net(const handle_t& handle, const handlegra } handle_t SnarlDistanceIndex::get_handle(const net_handle_t& net, const handlegraph::HandleGraph* graph) const{ if (get_handle_type(net) == SENTINEL_HANDLE) { - SnarlRecord snarl_record(net, &snarl_tree_records); + // We don't know if this is a trivial or nontrivial snarl, so we need + // to access it with the base class. + // TODO: Make a base class for any kind of snarl. + SnarlTreeRecord snarl_record(net, &snarl_tree_records); if (starts_at(net) == START) { return graph->get_handle(snarl_record.get_start_id(), ends_at(net) == START ? !snarl_record.get_start_orientation() //Going out @@ -480,25 +379,39 @@ net_handle_t SnarlDistanceIndex::get_parent(const net_handle_t& child) const { SNARL_HANDLE, get_node_record_offset(child)); } else if (get_handle_type(child) == ROOT_HANDLE) { throw runtime_error("error: trying to find the parent of the root"); - } else if (get_record_type(snarl_tree_records->at(get_record_offset(child))) == SIMPLE_SNARL || - get_record_type(snarl_tree_records->at(get_record_offset(child))) == DISTANCED_SIMPLE_SNARL) { + } else if (is_simple_snarl(get_record_type(snarl_tree_records->at(get_record_offset(child))))) { +#ifdef debug_parent + std::cerr << "Child " << net_handle_as_string(child) << " has simple snarl record type " << stringify(get_record_type(snarl_tree_records->at(get_record_offset(child)))) << " and current handle type " << stringify(get_handle_type(child)) << std::endl; +#endif + //If this is a simple snarl and a node or chain, then the parent offset doesn't change if (get_handle_type(child) == NODE_HANDLE) { - //If this is a node, then return it as a chain + // If this is a node, then return it as a chain +#ifdef debug_parent + std::cerr << "We were looking at a simple snarl as a node; project it as a chain." << std::endl; +#endif return get_net_handle_from_values(get_record_offset(child), child_connectivity, CHAIN_HANDLE, get_node_record_offset(child)); } else if (get_handle_type(child) == CHAIN_HANDLE) { //If this is a chain, then return the same thing as a snarl +#ifdef debug_parent + std::cerr << "We were looking at a simple snarl as a chain; project it as a snarl." << std::endl; +#endif return get_net_handle_from_values(get_record_offset(child), START_END, SNARL_HANDLE, 1); } } //Otherwise, we need to move up one level in the snarl tree + SnarlTreeRecord child_record(child, &snarl_tree_records); //Get the pointer to the parent to find its type - size_t parent_pointer = SnarlTreeRecord(child, &snarl_tree_records).get_parent_record_offset(); - net_handle_record_t parent_type = SnarlTreeRecord(parent_pointer, &snarl_tree_records).get_record_handle_type(); + size_t parent_pointer = child_record.get_parent_record_offset(); + SnarlTreeRecord parent_record(parent_pointer, &snarl_tree_records); + net_handle_record_t parent_type = parent_record.get_record_handle_type(); +#ifdef debug_parent + std::cerr << "Parent of " << net_handle_as_string(child) << " at " << parent_pointer << " has record type " << stringify(parent_record.get_record_type()) << std::endl; +#endif //The connectivity of the parent defaults to start-end connectivity_t parent_connectivity = START_END; @@ -517,8 +430,13 @@ net_handle_t SnarlDistanceIndex::get_parent(const net_handle_t& child) const { } if (get_handle_type(child) == NODE_HANDLE && parent_type != CHAIN_HANDLE) { //If this is a node and it's parent is not a chain, we want to pretend that its - //parent is a chain - return get_net_handle_from_values(get_record_offset(child), child_connectivity, CHAIN_HANDLE, get_node_record_offset(child)); + //parent is a chain version of the child + net_handle_t projected = get_net_handle_from_values(get_record_offset(child), child_connectivity, CHAIN_HANDLE, get_node_record_offset(child)); +#ifdef debug_parent + std::cerr << "Parent of " << net_handle_as_string(child) << " projected as " << net_handle_as_string(projected) << std::endl; +#endif + + return projected; } return get_net_handle(parent_pointer, parent_connectivity); @@ -526,6 +444,12 @@ net_handle_t SnarlDistanceIndex::get_parent(const net_handle_t& child) const { net_handle_t SnarlDistanceIndex::get_bound(const net_handle_t& snarl, bool get_end, bool face_in) const { if (get_handle_type(snarl) == CHAIN_HANDLE) { + // This could be a real chain, a node looking like a chain, or a simple + // snarl record looking like a chain (maybe because the node it was + // looking like needs to look like a chain now). ChainRecord promises + // to know how to interpret all of them. + // TODO: the concepts involved in things looking like other things + // should be documented somewhere. ChainRecord chain_record(snarl, &snarl_tree_records); size_t offset; size_t node_offset; @@ -611,7 +535,7 @@ net_handle_t SnarlDistanceIndex::flip(const net_handle_t& net) const { net_handle_t SnarlDistanceIndex::canonical(const net_handle_t& net) const { SnarlTreeRecord record(net, &snarl_tree_records); record_t type = record.get_record_type(); - if (type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL) { + if (is_any_root(type)) { return get_root(); } @@ -649,11 +573,9 @@ SnarlDecomposition::endpoint_t SnarlDistanceIndex::ends_at(const net_handle_t& t size_t SnarlDistanceIndex::get_rank_in_parent(const net_handle_t& net) const { size_t tag = snarl_tree_records->at(get_record_offset(net)); - if (get_record_type(tag) == TRIVIAL_SNARL || - get_record_type(tag) == DISTANCED_TRIVIAL_SNARL) { + if (is_trivial_snarl(get_record_type(tag))) { return TrivialSnarlRecord(get_record_offset(net), &snarl_tree_records).get_rank_in_parent(get_node_record_offset(net)); - } else if (get_record_type(tag) == SIMPLE_SNARL || - get_record_type(tag) == DISTANCED_SIMPLE_SNARL) { + } else if (is_simple_snarl(get_record_type(tag))) { if (is_snarl(net)) { return get_record_offset(net); } else { @@ -715,6 +637,15 @@ bool SnarlDistanceIndex::has_distances() const { return has_distances(get_node_net_handle(root_record.get_min_node_id())); } +size_t SnarlDistanceIndex::get_snarl_child_count(const net_handle_t& net) const { + record_t specific_type = SnarlTreeRecord(net, &snarl_tree_records).get_record_type(); + if (is_simple_snarl(specific_type)) { + return SimpleSnarlRecord(net, &snarl_tree_records).get_node_count(); + } else { + return SnarlRecord(net, &snarl_tree_records).get_node_count(); + } +} + bool SnarlDistanceIndex::for_each_child_impl(const net_handle_t& traversal, const std::function& iteratee) const { #ifdef debug_snarl_traversal cerr << "Go through children of " << net_handle_as_string(traversal) << endl; @@ -727,8 +658,7 @@ bool SnarlDistanceIndex::for_each_child_impl(const net_handle_t& traversal, cons if (record_type == ROOT_HANDLE) { RootRecord root_record(get_root(), &snarl_tree_records); return root_record.for_each_child(iteratee); - } else if (SnarlTreeRecord(traversal, &snarl_tree_records).get_record_type() == SIMPLE_SNARL || - SnarlTreeRecord(traversal, &snarl_tree_records).get_record_type() == DISTANCED_SIMPLE_SNARL ) { + } else if (is_simple_snarl(SnarlTreeRecord(traversal, &snarl_tree_records).get_record_type())) { //If this is a simple snarl then it is a bit different if (handle_type == CHAIN_HANDLE) { //If the handle thinks it's a chain, then it is a trivial chain in the snarl so we do @@ -741,8 +671,15 @@ bool SnarlDistanceIndex::for_each_child_impl(const net_handle_t& traversal, cons throw runtime_error("error: Looking for children of a node or sentinel in a simple snarl"); } } else if (record_type == SNARL_HANDLE) { - SnarlRecord snarl_record(traversal, &snarl_tree_records); - return snarl_record.for_each_child(iteratee); + // This could be a simple or non-simple snarl + record_t specific_type = SnarlTreeRecord(traversal, &snarl_tree_records).get_record_type(); + if (is_simple_snarl(specific_type)) { + SimpleSnarlRecord snarl_record(traversal, &snarl_tree_records); + return snarl_record.for_each_child(iteratee); + } else { + SnarlRecord snarl_record(traversal, &snarl_tree_records); + return snarl_record.for_each_child(iteratee); + } } else if (record_type == CHAIN_HANDLE) { ChainRecord chain_record(traversal, &snarl_tree_records); return chain_record.for_each_child(iteratee); @@ -790,8 +727,8 @@ bool SnarlDistanceIndex::follow_net_edges_impl(const net_handle_t& here, const h SnarlTreeRecord parent_record (get_parent(here), &snarl_tree_records); if (parent_record.get_record_handle_type() == ROOT_HANDLE && - parent_record.get_record_type() != ROOT_SNARL && - parent_record.get_record_type() != DISTANCED_ROOT_SNARL) { + !is_root_snarl(parent_record.get_record_type())) { + // TODO: should we check for ROOT record type here? #ifdef debug_snarl_traversal cerr << "The parent is a root so just check self connectivity" << endl; #endif @@ -838,8 +775,7 @@ bool SnarlDistanceIndex::follow_net_edges_impl(const net_handle_t& here, const h //If this is a chain (or a node pretending to be a chain) and it is the child of a snarl //Or if it is the sentinel of a snarl, then we walk through edges in the snarl //It can either run into another chain (or node) or the boundary node - bool is_root_snarl = parent_record.get_record_type() == ROOT_SNARL - || parent_record.get_record_type() == DISTANCED_ROOT_SNARL; + bool is_root_snarl = SnarlDistanceIndex::is_root_snarl(parent_record.get_record_type()); //Get the graph handle for the end node of whatever this is, pointing in the right direction @@ -895,8 +831,7 @@ bool SnarlDistanceIndex::follow_net_edges_impl(const net_handle_t& here, const h node_net_handle = flip(node_net_handle); } - if (get_record_type(snarl_tree_records->at(get_record_offset(node_net_handle))) == NODE || - get_record_type(snarl_tree_records->at(get_record_offset(node_net_handle))) == DISTANCED_NODE ) { + if (is_node(get_record_type(snarl_tree_records->at(get_record_offset(node_net_handle))))) { //If this is a node make a net_handle_t of a node pretending to be a chain net_handle_t next_net = get_net_handle_from_values(get_record_offset(node_net_handle), graph->get_is_reverse(h) ? END_START : START_END, @@ -905,8 +840,7 @@ bool SnarlDistanceIndex::follow_net_edges_impl(const net_handle_t& here, const h cerr << " -> actual child node " << net_handle_as_string(next_net) << endl; #endif return iteratee(next_net); - } else if (get_record_type(snarl_tree_records->at(get_record_offset(node_net_handle))) == SIMPLE_SNARL || - get_record_type(snarl_tree_records->at(get_record_offset(node_net_handle))) == DISTANCED_SIMPLE_SNARL ) { + } else if (is_simple_snarl(get_record_type(snarl_tree_records->at(get_record_offset(node_net_handle))))) { //If the node is a node in a simple snarl net_handle_t next_net = get_net_handle_from_values(get_record_offset(node_net_handle), graph->get_is_reverse(h) ? END_START : START_END, @@ -1120,11 +1054,21 @@ size_t SnarlDistanceIndex::distance_in_parent(const net_handle_t& parent, const net_handle_t& child1, const net_handle_t& child2, const HandleGraph* graph, size_t distance_limit) const { #ifdef debug_distances + auto child1_parent = get_parent(child1); + auto child2_parent = get_parent(child2); cerr << "\t\tFind distance between " << net_handle_as_string(child1) - << " and " << net_handle_as_string(child2) - << "\tin parent " << net_handle_as_string(parent) << endl; - assert(canonical(parent) == canonical(get_parent(child1))); - assert(canonical(parent) == canonical(get_parent(child2))); + << " and " << net_handle_as_string(child2) + << " facing back toward it in parent " << net_handle_as_string(canonical(parent)) << endl; + cerr << "\t\tChild parents are " << net_handle_as_string(canonical(child1_parent)) << " and " << net_handle_as_string(canonical(child2_parent)) << endl; + + if (canonical(parent) != canonical(child1_parent) || canonical(parent) != canonical(child2_parent)) { + std::stringstream ss; + ss << "Error: parent mismatch!" << std::endl; + ss << as_integer(canonical(parent)) << " = " << net_handle_as_string(canonical(parent)) << std::endl; + ss << as_integer(canonical(child1_parent)) << " = " << net_handle_as_string(canonical(child1_parent)) << std::endl; + ss << as_integer(canonical(child2_parent)) << " = " << net_handle_as_string(canonical(child2_parent)) << std::endl; + throw std::runtime_error(ss.str()); + } #endif //Get the orientation of the children. This only cares about the end endpoint, and assumes that things that end @@ -1172,7 +1116,8 @@ size_t SnarlDistanceIndex::distance_in_parent(const net_handle_t& parent, cerr << "=>They are in a snarl, check distance in snarl" << endl; cerr << "\tsnarl at offset " << parent_record_offset1 << " with ranks " << get_rank_in_parent(child1) << " " << get_rank_in_parent(child2) << endl; #endif - //They are in the same root snarl, so find the distance between them + //They are in the same root snarl, so find the distance between them. + // We know this isn't a simple snarl. SnarlRecord snarl_record(parent_record_offset1, &snarl_tree_records); return snarl_record.get_distance(get_rank_in_parent(child1), !child_ends_at_start1, @@ -1183,6 +1128,9 @@ size_t SnarlDistanceIndex::distance_in_parent(const net_handle_t& parent, } else if (is_chain(parent)) { if (get_record_handle_type(get_record_type(snarl_tree_records->at(get_record_offset(parent)))) == NODE_HANDLE || get_record_handle_type(get_record_type(snarl_tree_records->at(get_record_offset(parent)))) == SNARL_HANDLE) { +#ifdef debug_distances + std::cerr << "=>They are not reachable because this \"chain\" is really a node or snarl" << std::endl; +#endif return std::numeric_limits::max(); } ChainRecord chain_record(parent, &snarl_tree_records); @@ -1279,6 +1227,10 @@ size_t SnarlDistanceIndex::distance_in_parent(const net_handle_t& parent, //If the snarls are adjacent (and not the same snarl) return node_length2;//return the node length } + +#ifdef debug_distances + std::cerr << "=>Measure chain distance between chain ranks " << rank_in_chain1 << " and " << rank_in_chain2 << std::endl; +#endif return sum(chain_record.get_distance(rank_in_chain1, go_left1, node_length1, prefix_sum1, @@ -1292,20 +1244,20 @@ size_t SnarlDistanceIndex::distance_in_parent(const net_handle_t& parent, } else if (is_snarl(parent)) { bool snarl_is_root = is_root_snarl(parent); - size_t rank1, rank2; bool rev1, rev2; + size_t rank1, rank2; bool dir1, dir2; if (is_sentinel(child1)) { rank1 = starts_at(child1) == START ? 0 : 1; - rev1 = false; + dir1 = false; } else { rank1 = get_rank_in_parent(child1); - rev1 = !child_ends_at_start1; + dir1 = !child_ends_at_start1; } if (is_sentinel(child2)) { rank2 = starts_at(child2) == START ? 0 : 1; - rev2 = false; + dir2 = false; } else { rank2 = get_rank_in_parent(child2); - rev2 = !child_ends_at_start2; + dir2 = !child_ends_at_start2; } if ((is_sentinel(child1) && starts_at(child1) == ends_at(child1)) || (is_sentinel(child2) && starts_at(child2) == ends_at(child2)) ) { @@ -1317,93 +1269,142 @@ size_t SnarlDistanceIndex::distance_in_parent(const net_handle_t& parent, } #ifdef debug_distances - cerr << " between ranks " << rank1 << " " << rev1 << " " << rank2 << " " << rev2 << endl; + cerr << " between ranks " << rank1 << " " << dir1 << " " << rank2 << " " << dir2 << endl; #endif if (get_record_type(snarl_tree_records->at(get_record_offset(parent))) == DISTANCED_SIMPLE_SNARL) { - return SimpleSnarlRecord(parent, &snarl_tree_records).get_distance(rank1, rev1, rank2, rev2); - } else if (get_record_type(snarl_tree_records->at(get_record_offset(parent))) == OVERSIZED_SNARL - && !(rank1 == 0 || rank1 == 1 || rank2 == 0 || rank2 == 1) ) { - //If this is an oversized snarl and we're looking for internal distances, then we didn't store the - //distance and we have to find it using dijkstra's algorithm - if (graph == nullptr) { - if (size_limit_warnings.load() < max_num_size_limit_warnings) { - int warning_num = const_cast(this)->size_limit_warnings++; - if (warning_num < max_num_size_limit_warnings) { - std::string msg = "warning: trying to find the distance in an oversized snarl without a graph. Returning inf\n"; - if (warning_num + 1 == max_num_size_limit_warnings) { - msg += "suppressing further warnings\n"; - } - std::cerr << msg; - } + auto result = SimpleSnarlRecord(parent, &snarl_tree_records).get_distance(rank1, dir1, rank2, dir2); +#ifdef debug_distances + std::cerr << " Retrieving simple snarl value: " << result << endl; +#endif + return result; + } else if (is_oversized_snarl(get_record_type(snarl_tree_records->at(get_record_offset(parent))))) { +#ifdef debug_distances + cerr << " Performing HHL query" << endl; +#endif + // We need to point at the hub labeling data, which lives after the fixed-size snarl record header and the length value + // This points to the whole record, including the fixed-size header + auto record_it = snarl_tree_records->begin() + get_record_offset(parent); + // This points to the length and the variable-sized data + auto length_data_it = record_it + SNARL_RECORD_SIZE; +#ifdef debug_hub_label_storage + std::cerr << " Hub label data length: " << *length_data_it << endl; + std::cerr << " Hub label data: "; + for (size_t i = 0; i < *length_data_it; i++) { + // Dump the hub label data as retrieved + if (i > 0) { + std::cerr << " | "; } - return std::numeric_limits::max(); + std::cerr << *(length_data_it + 1 + i); } - handle_t handle1 = is_node(child1) ? get_handle(child1, graph) : get_handle(get_bound(child1, !child_ends_at_start1, false), graph); - handle_t handle2 = is_node(child2) ? get_handle(child2, graph) : get_handle(get_bound(child2, !child_ends_at_start2, false), graph); - handle2 = graph->flip(handle2); - - size_t distance = std::numeric_limits::max(); - handlegraph::algorithms::dijkstra(graph, handle1, [&](const handle_t& reached, size_t dist) { - if (reached == handle2) { - distance = dist; - return false; - } else if (dist > distance_limit) { - distance = std::numeric_limits::max(); - return false; - } - return true; - }, false); - return distance; - + std::cerr << std::endl; +#endif + // Here we need to convert from child rank (where 0 and 1 represent + // snarl boundaries oriented along the snarl) and child orientation + // to HHL vertex ranks. + // + // The HHL index thinks a child being "reverse" means that we're + // thinking of the child in the opposite orientation form how it + // appears in us. (So even not-reversed won't mean local forward + // orientation if is_reversed_in_parent() is true for that child). + // + // Note that dir1 and dir2 aren't just normal is_reverse flags. + // + // For a sentinel rank 1 (end node) as rank1, dir1 false needs to mean into the snarl (so start of end node, reverse strand). + // For a sentinel rank 0 (start node) as rank1, dir1 false needs to mean into the snarl (so end of start node, forward strand). + // + // For a node as rank1, with its end connected to rank2, dir1 true needs to mean towards the thing attached to its end. If we're a source, that means it must be forward strand. + // + // For a node as rank2, with its end connected to rank1, dir2 true needs to mean towards the thing attached to its end. If we're not a source, that means it must be reverse strand. + size_t from_port = bgid(rank1, !dir1 ^ (rank1 == 0), true); +#ifdef debug_distances + std::cerr << " Query from vertex " << from_port << " = rank " << rank1 << " " << (dir1 ? "rev" : "fd") << " " << (is_sentinel(child1)? "sentinel" : "non-sentinel") << ", source" << std::endl; +#endif + size_t to_port = bgid(rank2, dir2, false); +#ifdef debug_distances + std::cerr << " Query to vertex " << to_port << " = rank " << rank2 << " " << (dir2 ? "rev" : "fd") << " " << (is_sentinel(child2)? "sentinel" : "non-sentinel") << ", non-source" << std::endl; +#endif + + size_t distance = promote_distance(hhl_query(length_data_it + 1, from_port, to_port)); +#ifdef debug_distances + cerr << " Resulting distance: " << distance << endl; +#endif + return distance; + } else if (rank1 == 0 && rank2 == 0 && !snarl_is_root) { //Start to start is stored in the snarl - return SnarlRecord(parent, &snarl_tree_records).get_distance_start_start(); + auto result = SnarlRecord(parent, &snarl_tree_records).get_distance_start_start(); +#ifdef debug_distances + std::cerr << " Retrieving snarl start-start value: " << result << endl; +#endif + return result; } else if ((rank1 == 0 && rank2 == 1) || (rank1 == 1 && rank2 == 0) && !snarl_is_root) { //start to end / end to start is stored in the snarl - return SnarlRecord(parent, &snarl_tree_records).get_min_length(); + auto result = SnarlRecord(parent, &snarl_tree_records).get_min_length(); +#ifdef debug_distances + std::cerr << " Retrieving snarl min-length value: " << result << endl; +#endif + return result; } else if (rank1 == 1 && rank2 == 1 && !snarl_is_root) { //end to end is stored in the snarl - return SnarlRecord(parent, &snarl_tree_records).get_distance_end_end(); + auto result = SnarlRecord(parent, &snarl_tree_records).get_distance_end_end(); +#ifdef debug_distances + std::cerr << " Retrieving snarl end-end value: " << result << endl; +#endif + return result; } else if ((rank1 == 0 || rank1 == 1 || rank2 == 0 || rank2 == 1) && !snarl_is_root) { //If one node is a boundary and the other is a child size_t boundary_rank = (rank1 == 0 || rank1 == 1) ? rank1 : rank2; const net_handle_t& internal_child = (rank1 == 0 || rank1 == 1) ? child2 : child1; - bool internal_is_reversed = (rank1 == 0 || rank1 == 1) ? rev2 : rev1; + bool internal_is_reversed = (rank1 == 0 || rank1 == 1) ? dir2 : dir1; if (is_trivial_chain( internal_child) ) { //Child is just a node pretending to be a chain + size_t result; if (boundary_rank == 0 && !internal_is_reversed) { //Start to left of child - return NodeRecord(internal_child, &snarl_tree_records).get_distance_left_start(); + result = NodeRecord(internal_child, &snarl_tree_records).get_distance_left_start(); } else if (boundary_rank == 0 && internal_is_reversed) { //Start to right of child - return NodeRecord(internal_child, &snarl_tree_records).get_distance_right_start(); + result = NodeRecord(internal_child, &snarl_tree_records).get_distance_right_start(); } else if (boundary_rank == 1 && !internal_is_reversed) { //End to left of child - return NodeRecord(internal_child, &snarl_tree_records).get_distance_left_end(); + result = NodeRecord(internal_child, &snarl_tree_records).get_distance_left_end(); } else { //End to right of child - return NodeRecord(internal_child, &snarl_tree_records).get_distance_right_end(); + result = NodeRecord(internal_child, &snarl_tree_records).get_distance_right_end(); } +#ifdef debug_distances + std::cerr << " Retrieving node record value: " << result << endl; +#endif + return result; } else { //Child is an actual chain + size_t result; if (boundary_rank == 0 && !internal_is_reversed) { //Start to left of child - return ChainRecord(internal_child, &snarl_tree_records).get_distance_left_start(); + result = ChainRecord(internal_child, &snarl_tree_records).get_distance_left_start(); } else if (boundary_rank == 0 && internal_is_reversed) { //Start to right of child - return ChainRecord(internal_child, &snarl_tree_records).get_distance_right_start(); + result = ChainRecord(internal_child, &snarl_tree_records).get_distance_right_start(); } else if (boundary_rank == 1 && !internal_is_reversed) { //End to left of child - return ChainRecord(internal_child, &snarl_tree_records).get_distance_left_end(); + result = ChainRecord(internal_child, &snarl_tree_records).get_distance_left_end(); } else { //End to right of child - return ChainRecord(internal_child, &snarl_tree_records).get_distance_right_end(); + result = ChainRecord(internal_child, &snarl_tree_records).get_distance_right_end(); } +#ifdef debug_distances + std::cerr << " Retrieving chain record value: " << result << endl; +#endif + return result; } } else { - return SnarlRecord(parent, &snarl_tree_records).get_distance(rank1, rev1, rank2, rev2); + auto result = SnarlRecord(parent, &snarl_tree_records).get_distance(rank1, dir1, rank2, dir2); +#ifdef debug_distances + std::cerr << " Retrieving snarl record value: " << result << endl; +#endif + return result; } } else { throw runtime_error("error: Trying to find distance in the wrong type of handle"); @@ -1418,48 +1419,15 @@ size_t SnarlDistanceIndex::distance_in_snarl(const net_handle_t& parent, if (get_record_type(snarl_tree_records->at(get_record_offset(parent))) == DISTANCED_SIMPLE_SNARL) { return SimpleSnarlRecord(parent, &snarl_tree_records).get_distance(rank1, right_side1, rank2, right_side2); - } else if (get_record_type(snarl_tree_records->at(get_record_offset(parent))) == OVERSIZED_SNARL + } else if (is_oversized_snarl(get_record_type(snarl_tree_records->at(get_record_offset(parent)))) && !(rank1 == 0 || rank1 == 1 || rank2 == 0 || rank2 == 1) ) { - //If this is an oversized snarl and we're looking for internal distances, then we didn't store the - //distance and we have to find it using dijkstra's algorithm - if (graph == nullptr) { - if (size_limit_warnings.load() < max_num_size_limit_warnings) { - int warning_num = const_cast(this)->size_limit_warnings++; - if (warning_num < max_num_size_limit_warnings) { - std::string msg = "warning: Trying to find the distance in an oversized snarl with zip codes. Returning inf\n"; - if (warning_num + 1 == max_num_size_limit_warnings) { - msg += "suppressing further warnings\n"; - } - std::cerr << msg; - } - } - return std::numeric_limits::max(); - } else { - net_handle_t net1 = get_snarl_child_from_rank(parent, rank1); - if (!right_side1) { - net1 = flip(net1); - } - net_handle_t net2 = get_snarl_child_from_rank(parent, rank2); - if (right_side2) { - net2 = flip(net2); - } - handle_t handle1 = get_handle(net1, graph); - handle_t handle2 = get_handle(net2, graph); + //If this is an oversized snarl and we're looking for internal distances, use the hub labels. + auto record_it = snarl_tree_records->begin() + get_record_offset(parent); + auto length_data_it = record_it + SNARL_RECORD_SIZE; + size_t from_port = bgid(rank1, !right_side1 ^ (rank1 == 0), true); + size_t to_port = bgid(rank2, right_side2, false); + return promote_distance(hhl_query(length_data_it + 1, from_port, to_port)); - size_t distance = std::numeric_limits::max(); - handlegraph::algorithms::dijkstra(graph, handle1, [&](const handle_t& reached, size_t dist) { - if (reached == handle2) { - distance = dist; - return false; - } else if (dist > distance_limit) { - distance = std::numeric_limits::max(); - return false; - } - return true; - }, false); - return distance; - } - } else if (rank1 == 0 && rank2 == 0 && !snarl_is_root) { //Start to start is stored in the snarl return SnarlRecord(parent, &snarl_tree_records).get_distance_start_start(); @@ -1846,7 +1814,7 @@ size_t SnarlDistanceIndex::minimum_distance(const handlegraph::nid_t id1, const #ifdef debug_distances cerr << endl; - cerr << "Find the minimum distance between " << id1 << " " <at(get_record_offset(net))) == DISTANCED_NODE) { + auto record_type = get_record_type(snarl_tree_records->at(get_record_offset(net))); + if (record_type == DISTANCED_NODE) { return NodeRecord(net, &snarl_tree_records).get_node_length(); - } else if (get_record_type(snarl_tree_records->at(get_record_offset(net))) == DISTANCED_SIMPLE_SNARL) { + } else if (record_type == DISTANCED_SIMPLE_SNARL) { return SimpleSnarlRecord(net, &snarl_tree_records).get_node_length(); } else { - assert(get_record_type(snarl_tree_records->at(get_record_offset(net))) == DISTANCED_TRIVIAL_SNARL); + assert(record_type == DISTANCED_TRIVIAL_SNARL); return TrivialSnarlRecord(get_record_offset(net), &snarl_tree_records).get_node_length(get_node_record_offset(net)); } } else if (is_sentinel(net)) { @@ -3316,18 +3287,18 @@ size_t SnarlDistanceIndex::maximum_length(const net_handle_t& net) const { } nid_t SnarlDistanceIndex::node_id(const net_handle_t& net) const { if (is_node(net) || is_trivial_chain(net)) { - if (get_record_type(snarl_tree_records->at(get_record_offset(net))) == NODE - || get_record_type(snarl_tree_records->at(get_record_offset(net))) == DISTANCED_NODE) { + if (is_node(get_record_type(snarl_tree_records->at(get_record_offset(net))))) { return NodeRecord(net, &snarl_tree_records).get_node_id(); - } else if (get_record_type(snarl_tree_records->at(get_record_offset(net))) == SIMPLE_SNARL - || get_record_type(snarl_tree_records->at(get_record_offset(net))) == DISTANCED_SIMPLE_SNARL) { + } else if (is_simple_snarl(get_record_type(snarl_tree_records->at(get_record_offset(net))))) { return SimpleSnarlRecord(net, &snarl_tree_records).get_node_id(); } else { return TrivialSnarlRecord(get_record_offset(net), &snarl_tree_records).get_node_id(get_node_record_offset(net)); } } else if (is_sentinel(net)) { - SnarlRecord snarl_record(net, &snarl_tree_records); - NodeRecord node_record; + // We don't know if this is a trivial or nontrivial snarl, so we need + // to access it with the base class. + // TODO: Make a base class for any kind of snarl. + SnarlTreeRecord snarl_record(net, &snarl_tree_records); if (get_start_endpoint(net) == START) { return snarl_record.get_start_id(); } else { @@ -3346,9 +3317,9 @@ bool SnarlDistanceIndex::has_node(const nid_t id) const { bool SnarlDistanceIndex::is_reversed_in_parent(const net_handle_t& net) const { SnarlTreeRecord record(net, &snarl_tree_records); - if (record.get_record_type() == TRIVIAL_SNARL || record.get_record_type() == DISTANCED_TRIVIAL_SNARL) { + if (is_trivial_snarl(record.get_record_type())) { return TrivialSnarlRecord(get_record_offset(net), &snarl_tree_records).get_is_reversed_in_parent(get_node_record_offset(net)); - } else if ((record.get_record_type() == SIMPLE_SNARL || record.get_record_type() == DISTANCED_SIMPLE_SNARL) && is_chain(net)) { + } else if (is_simple_snarl(record.get_record_type()) && is_chain(net)) { return SimpleSnarlRecord(net, &snarl_tree_records).get_node_is_reversed(); } else { return record.get_is_reversed_in_parent(); @@ -3369,8 +3340,7 @@ size_t SnarlDistanceIndex::get_max_tree_depth() const { size_t SnarlDistanceIndex::get_depth(const net_handle_t& net) const { if (is_root(net)) { return 0; - } else if (SnarlTreeRecord(net, &snarl_tree_records).get_record_type() == SIMPLE_SNARL || - SnarlTreeRecord(net, &snarl_tree_records).get_record_type() == DISTANCED_SIMPLE_SNARL ){ + } else if (is_simple_snarl(SnarlTreeRecord(net, &snarl_tree_records).get_record_type())){ //If this is a simple snarl, then it can be a node, snarl, or chain //The depth of the snarl's parent chain @@ -3540,22 +3510,22 @@ size_t SnarlDistanceIndex::get_chain_component(const net_handle_t& net, bool get size_t SnarlDistanceIndex::SnarlTreeRecord::get_min_length() const { record_t type = get_record_type(); size_t val; - if (type == DISTANCED_NODE ) { + if (is_any_root(type)) { + throw runtime_error("error: trying to find the length of the root"); + } else if (!has_distances(type)) { + throw runtime_error("error: trying to access get distance in a distanceless index"); + } else if (is_node(type)) { return (*records)->at(record_offset + NODE_LENGTH_OFFSET); - } else if (type == DISTANCED_TRIVIAL_SNARL) { + } else if (is_trivial_snarl(type)) { size_t last_node_offset = TrivialSnarlRecord(record_offset, records).get_node_count()-1; return (*records)->at(record_offset + DISTANCED_TRIVIAL_SNARL_RECORD_SIZE + (last_node_offset*2) + 1); - } else if (type == DISTANCED_SNARL || type == OVERSIZED_SNARL) { - val = (*records)->at(record_offset + SNARL_MIN_LENGTH_OFFSET); - } else if (type == DISTANCED_SIMPLE_SNARL) { + } else if (is_simple_snarl(type)) { size_t raw_val = (*records)->at(record_offset + SIMPLE_SNARL_NODE_COUNT_AND_LENGTHS_OFFSET); return (raw_val >> 11) & ((1 << 11) - 1); - } else if (type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN) { + } else if (is_nonroot_nonsimple_snarl(type)) { + val = (*records)->at(record_offset + SNARL_MIN_LENGTH_OFFSET); + } else if (is_chain(type)) { val = (*records)->at(record_offset + CHAIN_MIN_LENGTH_OFFSET); - } else if (type == NODE || type == SNARL || type == CHAIN) { - throw runtime_error("error: trying to access get distance in a distanceless index"); - } else if (type == ROOT || type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL) { - throw runtime_error("error: trying to find the length of the root"); } else { throw runtime_error("error: trying to access a snarl tree node of the wrong type"); } @@ -3565,22 +3535,22 @@ size_t SnarlDistanceIndex::SnarlTreeRecord::get_min_length() const { size_t SnarlDistanceIndex::SnarlTreeRecord::get_max_length() const { record_t type = get_record_type(); size_t val; - if (type == DISTANCED_NODE ) { + if (is_any_root(type)) { + throw runtime_error("error: trying to find the length of the root"); + } else if (!has_distances(type)) { + throw runtime_error("error: trying to access get distance in a distanceless index"); + } else if (is_node(type)) { return (*records)->at(record_offset + NODE_LENGTH_OFFSET); - } else if (type == DISTANCED_TRIVIAL_SNARL) { + } else if (is_trivial_snarl(type)) { size_t last_node_offset = TrivialSnarlRecord(record_offset, records).get_node_count()-1; return (*records)->at(record_offset + DISTANCED_TRIVIAL_SNARL_RECORD_SIZE + (last_node_offset*2) + 1); - } else if (type == DISTANCED_SNARL || type == OVERSIZED_SNARL) { - val = (*records)->at(record_offset + SNARL_MAX_LENGTH_OFFSET); - } else if (type == DISTANCED_SIMPLE_SNARL) { + } else if (is_simple_snarl(type)) { size_t raw_val = (*records)->at(record_offset + SIMPLE_SNARL_NODE_COUNT_AND_LENGTHS_OFFSET); return raw_val & ((1 << 11) - 1); - } else if (type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN) { + } else if (is_nonroot_nonsimple_snarl(type)) { + val = (*records)->at(record_offset + SNARL_MAX_LENGTH_OFFSET); + } else if (is_chain(type)) { val = (*records)->at(record_offset + CHAIN_MAX_LENGTH_OFFSET); - } else if (type == NODE || type == SNARL || type == CHAIN) { - throw runtime_error("error: trying to access get distance in a distanceless index"); - } else if (type == ROOT || type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL) { - throw runtime_error("error: trying to find the length of the root"); } else { throw runtime_error("error: trying to access a snarl tree node of the wrong type"); } @@ -3590,18 +3560,16 @@ size_t SnarlDistanceIndex::SnarlTreeRecord::get_max_length() const { size_t SnarlDistanceIndex::SnarlTreeRecord::get_rank_in_parent() const { record_t type = get_record_type(); - if (type == NODE || type == DISTANCED_NODE) { + if (is_node(type)) { return (*records)->at(record_offset + NODE_RANK_OFFSET); - } else if (type == TRIVIAL_SNARL || type == DISTANCED_TRIVIAL_SNARL) { + } else if (is_trivial_snarl(type)) { throw runtime_error("error: node ranks need the node offsets"); - } else if (type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL) { + } else if (is_root_snarl(type)) { //For root snarls, the rank gets stored in the length slot return (*records)->at( record_offset + SNARL_MIN_LENGTH_OFFSET); - } else if (type == SNARL || type == DISTANCED_SNARL || type == OVERSIZED_SNARL - || type == SIMPLE_SNARL - || type == DISTANCED_SIMPLE_SNARL) { + } else if (is_nonroot_nontrivial_snarl(type)) { return record_offset; - } else if (type == CHAIN || type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN) { + } else if (is_chain(type)) { return (*records)->at(record_offset + CHAIN_RANK_OFFSET) >> 1; } else { throw runtime_error("error: trying to access a snarl tree node of the wrong type"); @@ -3609,16 +3577,14 @@ size_t SnarlDistanceIndex::SnarlTreeRecord::get_rank_in_parent() const { }; bool SnarlDistanceIndex::SnarlTreeRecord::get_is_reversed_in_parent() const { record_t type = get_record_type(); - if (type == NODE || type == DISTANCED_NODE) { + if (is_node(type)) { return false; - } else if (type == TRIVIAL_SNARL || type == DISTANCED_TRIVIAL_SNARL) { + } else if (is_trivial_snarl(type)) { cerr << "warning: Getting orientation of a trivial snarl" << endl; return false; - } else if (type == SNARL || type == DISTANCED_SNARL || type == OVERSIZED_SNARL - || type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL - || type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL) { + } else if (is_nontrivial_snarl(type)) { return false; - } else if (type == CHAIN || type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN) { + } else if (is_chain(type)) { return (*records)->at(record_offset + CHAIN_RANK_OFFSET) & 1; } else { throw runtime_error("error: trying to access a snarl tree node of the wrong type"); @@ -3629,22 +3595,21 @@ handlegraph::nid_t SnarlDistanceIndex::SnarlTreeRecord::get_start_id() const { record_t type = get_record_type(); if (type == ROOT) { throw runtime_error("error: trying to get the start node of the root"); - } else if (type == NODE || type == DISTANCED_NODE) { + } else if (is_node(type)) { //cerr << "warning: Looking for the start of a node" << endl; return (*records)->at(record_offset + NODE_ID_OFFSET); - } else if (type == TRIVIAL_SNARL || type == DISTANCED_TRIVIAL_SNARL) { + } else if (is_trivial_snarl(type)) { TrivialSnarlRecord trivial_snarl_record(record_offset, records); return trivial_snarl_record.get_node_id(0); - } else if (type == SNARL || type == DISTANCED_SNARL || type == OVERSIZED_SNARL - || type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL) { + } else if (is_nonroot_nontrivial_snarl(type)) { //To get the start node of a snarl, get the thing to the left of it in the chain ChainRecord parent_record (get_parent_record_offset(), records); - size_t node_offset = (type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL) ? 1 : 0; + size_t node_offset = is_simple_snarl(type) ? 1 : 0; net_handle_t next_node_in_chain = parent_record.get_next_child(get_net_handle_from_values(record_offset, START_END, SNARL_HANDLE, node_offset), true); return TrivialSnarlRecord(get_record_offset(next_node_in_chain), records).get_node_id(get_node_record_offset(next_node_in_chain)); - } else if (type == CHAIN || type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN) { + } else if (is_chain(type)) { return ((*records)->at(record_offset + CHAIN_START_NODE_OFFSET)) >> 1; - } else if (type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL) { + } else if (is_root_snarl(type)) { throw runtime_error("error: trying to find the start node of a root snarl"); } else { throw runtime_error("error: trying to access a snarl tree node of the wrong type"); @@ -3654,20 +3619,19 @@ bool SnarlDistanceIndex::SnarlTreeRecord::get_start_orientation() const { record_t type = get_record_type(); if (type == ROOT) { throw runtime_error("error: trying to get the start node of the root"); - } else if (type == NODE || type == DISTANCED_NODE || type == TRIVIAL_SNARL || type == DISTANCED_TRIVIAL_SNARL) { + } else if (is_node(type) || is_trivial_snarl(type)) { //cerr << "warning: Looking for the start of a node" << endl; return false; - } else if (type == SNARL || type == DISTANCED_SNARL || type == OVERSIZED_SNARL - || type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL) { + } else if (is_nonroot_nontrivial_snarl(type)) { ChainRecord parent_record (get_parent_record_offset(), records); - size_t node_offset = (type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL) ? 1 : 0; + size_t node_offset = is_simple_snarl(type) ? 1 : 0; //Get the next node in the chain (going left) //The handle will be pointing in the direction we just moved, so if it is going START_END, then it is reversed net_handle_t next_node_in_chain = parent_record.get_next_child(get_net_handle_from_values(record_offset, START_END, SNARL_HANDLE, node_offset), true); return get_end_endpoint( get_connectivity(next_node_in_chain)) == END; - } else if (type == CHAIN || type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN) { + } else if (is_chain(type)) { return ((*records)->at(record_offset + CHAIN_START_NODE_OFFSET)) & 1; - } else if (type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL) { + } else if (is_root_snarl(type)) { throw runtime_error("error: trying to find the start node of a root snarl"); }else { throw runtime_error("error: trying to access a snarl tree node of the wrong type"); @@ -3677,48 +3641,46 @@ handlegraph::nid_t SnarlDistanceIndex::SnarlTreeRecord::get_end_id() const { record_t type = get_record_type(); if (type == ROOT) { throw runtime_error("error: trying to get the end node of the root"); - } else if (type == NODE || type == DISTANCED_NODE ) { + } else if (is_node(type)) { //cerr << "warning: Looking for the end of a node" << endl; //Offset of the start of the node vector return (*records)->at(record_offset + NODE_ID_OFFSET); - } else if (type == TRIVIAL_SNARL || type == DISTANCED_TRIVIAL_SNARL) { + } else if (is_trivial_snarl(type)) { TrivialSnarlRecord trivial_snarl_record(record_offset, records); return trivial_snarl_record.get_node_id(trivial_snarl_record.get_node_count()-1); - } else if (type == SNARL || type == DISTANCED_SNARL || type == OVERSIZED_SNARL - || type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL) { + } else if (is_nonroot_nontrivial_snarl(type)) { //For a snarl, walk right in the chain ChainRecord parent_record (get_parent_record_offset(), records); - size_t node_offset = (type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL) ? 1 : 0; + size_t node_offset = is_simple_snarl(type) ? 1 : 0; net_handle_t next_node_in_chain = parent_record.get_next_child(get_net_handle_from_values(record_offset, START_END, SNARL_HANDLE, node_offset), false); return TrivialSnarlRecord(get_record_offset(next_node_in_chain), records).get_node_id(get_node_record_offset(next_node_in_chain)); - } else if (type == CHAIN || type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN) { + } else if (is_chain(type)) { return ((*records)->at(record_offset + CHAIN_END_NODE_OFFSET)) >> 1; - } else if (type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL) { + } else if (is_root_snarl(type)) { throw runtime_error("error: trying to find the end node of a root snarl"); } else { throw runtime_error("error: trying to access a snarl tree node of the wrong type"); } } -handlegraph::nid_t SnarlDistanceIndex::SnarlTreeRecord::get_end_orientation() const { +bool SnarlDistanceIndex::SnarlTreeRecord::get_end_orientation() const { record_t type = get_record_type(); if (type == ROOT) { throw runtime_error("error: trying to get the end node of the root"); - } else if (type == NODE || type == DISTANCED_NODE || type == TRIVIAL_SNARL || type == DISTANCED_TRIVIAL_SNARL) { + } else if (is_node(type) || is_trivial_snarl(type)) { //cerr << "warning: Looking for the end of a node" << endl; //Offset of the start of the node vector return false; - } else if (type == SNARL || type == DISTANCED_SNARL || type == OVERSIZED_SNARL - || type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL) { + } else if (is_nonroot_nontrivial_snarl(type)) { ChainRecord parent_record (get_parent_record_offset(), records); - size_t node_offset = (type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL) ? 1 : 0; + size_t node_offset = is_simple_snarl(type) ? 1 : 0; //Get the next node in the chain (going right) net_handle_t next_node_in_chain = parent_record.get_next_child(get_net_handle_from_values(record_offset, START_END, SNARL_HANDLE, node_offset), false); //The handle will be pointing in the direction we just moved, so if it is going END_START, then it is reversed return get_end_endpoint( get_connectivity(next_node_in_chain)) == START; - } else if (type == CHAIN || type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN) { + } else if (is_chain(type)) { return ((*records)->at(record_offset + CHAIN_END_NODE_OFFSET)) & 1; - } else if (type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL) { + } else if (is_root_snarl(type)) { throw runtime_error("error: trying to find the end node of a root snarl"); } else { throw runtime_error("error: trying to access a snarl tree node of the wrong type"); @@ -3748,16 +3710,15 @@ size_t SnarlDistanceIndex::SnarlTreeRecord::get_parent_record_offset() const { record_t type = get_record_type(); if (type == ROOT) { return 0; - } else if (type == NODE || type == DISTANCED_NODE) { + } else if (is_node(type)) { return ((*records)->at(record_offset + NODE_PARENT_OFFSET)); - } else if (type == TRIVIAL_SNARL || type == DISTANCED_TRIVIAL_SNARL) { + } else if (is_trivial_snarl(type)) { return (*records)->at(record_offset + TRIVIAL_SNARL_PARENT_OFFSET); - } else if (type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL) { + } else if (is_simple_snarl(type)) { return (*records)->at(record_offset + SIMPLE_SNARL_PARENT_OFFSET); - } else if (type == SNARL || type == DISTANCED_SNARL || type == OVERSIZED_SNARL - || type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL) { + } else if (is_nonsimple_snarl(type)) { return ((*records)->at(record_offset + SNARL_PARENT_OFFSET)); - } else if (type == CHAIN || type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN) { + } else if (is_chain(type)) { return ((*records)->at(record_offset + CHAIN_PARENT_OFFSET)); } else { throw runtime_error("error: trying to access a snarl tree node of the wrong type"); @@ -3770,12 +3731,7 @@ SnarlDistanceIndex::SnarlTreeRecordWriter::SnarlTreeRecordWriter (size_t pointer #ifdef debug_distance_indexing record_t type = get_record_type(); - assert(type == ROOT || type == NODE || type == DISTANCED_NODE || - type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL || - type == TRIVIAL_SNARL || type == DISTANCED_TRIVIAL_SNARL || - type == SNARL || type == DISTANCED_SNARL || type == OVERSIZED_SNARL || - type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL || type == CHAIN || - type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN); + assert(is_any_nonchildren(type)); #endif } @@ -3785,12 +3741,7 @@ SnarlDistanceIndex::SnarlTreeRecordWriter::SnarlTreeRecordWriter (const net_hand records = tree_records; #ifdef debug_distance_indexing record_t type = get_record_type(); - assert(type == ROOT || type == NODE || type == DISTANCED_NODE || - type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL || - type == TRIVIAL_SNARL || type == DISTANCED_TRIVIAL_SNARL || - type == SNARL || type == DISTANCED_SNARL || type == OVERSIZED_SNARL || - type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL || type == CHAIN || - type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN); + assert(is_any_nonchildren(type)); #endif } @@ -3870,13 +3821,17 @@ void SnarlDistanceIndex::SnarlTreeRecordWriter::set_record_type(record_t type) { void SnarlDistanceIndex::SnarlTreeRecordWriter::set_min_length(size_t length) { record_t type = get_record_type(); size_t offset; - if (type == DISTANCED_NODE) { + if (!has_distances(type)) { + throw runtime_error("error: trying to access get distance in a distanceless index"); + } else if (is_any_root(type)) { + throw runtime_error("error: set the length of a root snarl"); + } else if (is_node(type)) { offset = record_offset + NODE_LENGTH_OFFSET; - } else if (type == DISTANCED_SNARL || type == OVERSIZED_SNARL) { + } else if (is_nonroot_nonsimple_snarl(type)) { offset = record_offset + SNARL_MIN_LENGTH_OFFSET; - } else if (type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN) { + } else if (is_chain(type)) { offset = record_offset + CHAIN_MIN_LENGTH_OFFSET; - } else if (type == DISTANCED_SIMPLE_SNARL) { + } else if (is_simple_snarl(type)) { if (length > (1 << 11)-1) { throw runtime_error("error: node length is too large"); } @@ -3884,10 +3839,6 @@ void SnarlDistanceIndex::SnarlTreeRecordWriter::set_min_length(size_t length) { size_t new_val = old_val | (length << 11); (*records)->at(record_offset + SIMPLE_SNARL_NODE_COUNT_AND_LENGTHS_OFFSET)= new_val; return; - } else if (type == NODE || type == SNARL || type == CHAIN ) { - throw runtime_error("error: trying to access get distance in a distanceless index"); - } else if (type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL) { - throw runtime_error("error: set the length of a root snarl"); } else { throw runtime_error("error: trying to access a snarl tree node of the wrong type"); } @@ -3901,11 +3852,15 @@ void SnarlDistanceIndex::SnarlTreeRecordWriter::set_min_length(size_t length) { void SnarlDistanceIndex::SnarlTreeRecordWriter::set_max_length(size_t length) { record_t type = get_record_type(); size_t offset; - if (type == DISTANCED_NODE) { + if (!has_distances(type)) { + throw runtime_error("error: trying to access get distance in a distanceless index"); + } else if (is_any_root(type)) { + throw runtime_error("error: set the length of a root snarl"); + } else if (is_node(type)) { throw runtime_error("error: set the max length of a node"); - } else if (type == DISTANCED_SNARL || type == OVERSIZED_SNARL) { + } else if (is_nonroot_nonsimple_snarl(type)) { offset = record_offset + SNARL_MAX_LENGTH_OFFSET; - } else if (type == DISTANCED_SIMPLE_SNARL) { + } else if (is_simple_snarl(type)) { if (length > (1 << 11)-1) { throw runtime_error("error: node length is too large"); } @@ -3913,12 +3868,8 @@ void SnarlDistanceIndex::SnarlTreeRecordWriter::set_max_length(size_t length) { size_t new_val = old_val | length; (*records)->at(record_offset + SIMPLE_SNARL_NODE_COUNT_AND_LENGTHS_OFFSET) = new_val; return; - } else if (type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN) { + } else if (is_chain(type)) { offset = record_offset + CHAIN_MAX_LENGTH_OFFSET; - } else if (type == DISTANCED_NODE || type == SNARL || type == CHAIN) { - throw runtime_error("error: trying to access get distance in a distanceless index"); - } else if (type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL) { - throw runtime_error("error: set the length of a root snarl"); } else { throw runtime_error("error: trying to access a snarl tree node of the wrong type"); } @@ -3933,20 +3884,18 @@ void SnarlDistanceIndex::SnarlTreeRecordWriter::set_max_length(size_t length) { void SnarlDistanceIndex::SnarlTreeRecordWriter::set_rank_in_parent(size_t rank) { record_t type = get_record_type(); size_t offset; - if (type == NODE || type == DISTANCED_NODE) { + if (is_node(type)) { offset = record_offset + NODE_RANK_OFFSET; - } else if (type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL) { + } else if (is_root_snarl(type)) { (*records)->at(record_offset + SNARL_MIN_LENGTH_OFFSET) = rank; return; - } else if (type == SNARL || type == DISTANCED_SNARL || type == OVERSIZED_SNARL - || type == SIMPLE_SNARL - || type == DISTANCED_SIMPLE_SNARL) { - cerr << "SETTING THE RANK OF A SNARL WHICH I'M PRETTY SURE DOESN'T MEAN ANYTHING" << endl; + } else if (is_nonroot_nontrivial_snarl(type)) { + cerr << "SETTING THE RANK OF A " << stringify(type) << " SNARL WHICH I'M PRETTY SURE DOESN'T MEAN ANYTHING" << endl; return; - } else if (type == CHAIN || type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN) { + } else if (is_chain(type)) { offset = record_offset + CHAIN_RANK_OFFSET; } else { - throw runtime_error("error: trying to access a snarl tree node of the wrong type"); + throw runtime_error("error: trying to set rank of a snarl tree node of the wrong type: " + stringify(type)); } #ifdef debug_distance_indexing cerr << offset << " set rank in parent to be " << rank << endl; @@ -3959,11 +3908,9 @@ void SnarlDistanceIndex::SnarlTreeRecordWriter::set_rank_in_parent(size_t rank) void SnarlDistanceIndex::SnarlTreeRecordWriter::set_is_reversed_in_parent(bool rev) { record_t type = get_record_type(); size_t offset; - if (type == SNARL || type == DISTANCED_SNARL || type == OVERSIZED_SNARL - || type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL - || type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL) { + if (is_nontrivial_snarl(type)) { return; - } else if (type == CHAIN || type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN) { + } else if (is_chain(type)) { offset = record_offset + CHAIN_RANK_OFFSET; } else { throw runtime_error("error: trying to access a snarl tree node of the wrong type"); @@ -3977,22 +3924,21 @@ void SnarlDistanceIndex::SnarlTreeRecordWriter::set_is_reversed_in_parent(bool r void SnarlDistanceIndex::SnarlTreeRecordWriter::set_parent_record_offset(size_t pointer){ record_t type = get_record_type(); size_t offset; - if (type == NODE || type == DISTANCED_NODE) { + if (is_node(type)) { offset = record_offset + NODE_PARENT_OFFSET; - } else if (type == TRIVIAL_SNARL || type == DISTANCED_TRIVIAL_SNARL) { + } else if (is_trivial_snarl(type)) { offset = record_offset + TRIVIAL_SNARL_PARENT_OFFSET; - } else if (type == SNARL || type == DISTANCED_SNARL || type == OVERSIZED_SNARL - || type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL) { + } else if (is_nonsimple_snarl(type)) { #ifdef debug_distance_indexing - if (type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL) { + if (is_root_snarl(type)) { assert(pointer == 0); } #endif offset = record_offset + SNARL_PARENT_OFFSET; - } else if (type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL) { + } else if (is_simple_snarl(type)) { offset = record_offset + SIMPLE_SNARL_PARENT_OFFSET; - } else if (type == CHAIN || type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN) { + } else if (is_chain(type)) { offset = record_offset + CHAIN_PARENT_OFFSET; } else { throw runtime_error("error: trying to access a snarl tree node of the wrong type"); @@ -4008,16 +3954,16 @@ void SnarlDistanceIndex::SnarlTreeRecordWriter::set_parent_record_offset(size_t void SnarlDistanceIndex::SnarlTreeRecordWriter::set_start_node(handlegraph::nid_t id, bool rev) { record_t type = get_record_type(); size_t offset; - if (type == ROOT || type == NODE || type == DISTANCED_NODE || type == TRIVIAL_SNARL || type == DISTANCED_TRIVIAL_SNARL) { + if (type == ROOT || is_node(type) || is_trivial_snarl(type)) { throw runtime_error("error: trying to set the start node id of a node or root"); - } else if (type == SNARL || type == DISTANCED_SNARL || type == OVERSIZED_SNARL || type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL) { + } else if (is_nonroot_nontrivial_snarl(type)) { throw runtime_error("error: trying to set the start node id of a snarl"); - } else if (type == CHAIN || type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN) { + } else if (is_chain(type)) { offset = record_offset + CHAIN_START_NODE_OFFSET; - } else if (type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL) { + } else if (is_root_snarl(type)) { throw runtime_error("error: set the start node of a root snarl"); } else { - throw runtime_error("error: trying to access a snarl tree node of the wrong type"); + throw runtime_error("error: trying to set start on a snarl tree node of the wrong type: " + stringify(type)); } #ifdef debug_distance_indexing cerr << offset << " set start node to be " << id << " facing " << (rev ? "rev" : "fd") << endl; @@ -4029,16 +3975,16 @@ void SnarlDistanceIndex::SnarlTreeRecordWriter::set_start_node(handlegraph::nid_ void SnarlDistanceIndex::SnarlTreeRecordWriter::set_end_node(handlegraph::nid_t id, bool rev) const { record_t type = get_record_type(); size_t offset; - if (type == ROOT || type == NODE || type == DISTANCED_NODE || type == TRIVIAL_SNARL || type == DISTANCED_TRIVIAL_SNARL) { + if (type == ROOT || is_node(type) || is_trivial_snarl(type)) { throw runtime_error("error: trying to set the node id of a node or root"); - } else if (type == SNARL || type == DISTANCED_SNARL || type == OVERSIZED_SNARL || type == SIMPLE_SNARL || type == DISTANCED_SIMPLE_SNARL) { + } else if (is_nonroot_nontrivial_snarl(type)) { throw runtime_error("error: trying to set the end node id of a snarl"); - } else if (type == CHAIN || type == DISTANCED_CHAIN || type == MULTICOMPONENT_CHAIN) { + } else if (is_chain(type)) { offset = record_offset + CHAIN_END_NODE_OFFSET; - } else if (type == ROOT_SNARL || type == DISTANCED_ROOT_SNARL) { + } else if (is_root_snarl(type)) { throw runtime_error("error: set the end node of a root snarl"); } else { - throw runtime_error("error: trying to access a snarl tree node of the wrong type"); + throw runtime_error("error: trying to set end on a snarl tree node of the wrong type: " + stringify(type)); } #ifdef debug_distance_indexing cerr << offset << " set end node to be " << id << " facing " << (rev ? "rev" : "fd") << endl; @@ -4077,7 +4023,7 @@ bool SnarlDistanceIndex::RootRecord::for_each_child(const std::function* tree_records){ - record_offset = get_record_offset(net); - records = tree_records; -#ifdef debug_distance_indexing - net_handle_record_t type = get_handle_type(net); - assert(type == SNARL_HANDLE || type == SENTINEL_HANDLE || type == ROOT_HANDLE); -#endif +SnarlDistanceIndex::SnarlRecord::SnarlRecord (net_handle_t net, const bdsg::yomo::UniqueMappedPointer* tree_records) : + SnarlRecord(get_record_offset(net), tree_records) { + // Nothing to do! } size_t SnarlDistanceIndex::SnarlRecord::distance_vector_size(record_t type, size_t node_count) { - if (type == SNARL || type == ROOT_SNARL){ - //For a normal snarl, its just the record size and the pointers to children - return 0; - } else if (type == DISTANCED_SNARL || type == DISTANCED_ROOT_SNARL) { - //For a normal min distance snarl just the distances between internal node sides - size_t node_side_count = node_count * 2; - size_t vector_size = (((node_side_count+1)*node_side_count) / 2); - return vector_size; - } else if (type == OVERSIZED_SNARL){ - //For a large min_distance snarl, all distances get stored in the children - return 0; + if (!is_nonsimple_snarl(type)) { + throw runtime_error("error: trying to get size of distance matrix for something other than a snarl that would have one"); + } + if (has_distances(type)) { + if (is_oversized_snarl(type)) { + // For oversized snarls, vec_size for the hub labeling data is accounted separately. + return 0; + } else { + // Must be a distanced non-simple snarl or root snarl. + + //For a normal min distance snarl just the distances between internal node sides + size_t node_side_count = node_count * 2; + size_t vector_size = (((node_side_count+1)*node_side_count) / 2); + return vector_size; + } } else { - throw runtime_error ("error: this is not a snarl"); + //For a non-distanced snarl, its just the record size and the pointers to children + return 0; } } -size_t SnarlDistanceIndex::SnarlRecord::record_size (record_t type, size_t node_count) { - return SNARL_RECORD_SIZE + distance_vector_size(type, node_count); +size_t SnarlDistanceIndex::SnarlRecord::record_size (record_t type, size_t node_count, size_t vec_size) { + if (is_oversized_snarl(type)) { + // Oversized snarls need the fixed-size header, the slot for the length + // of the packed hub label vector, and the packed hub label vector + // itself. + + // TODO: Can we stop storing the packed hub label vector length? Do we + // ever use it??? + return SNARL_RECORD_SIZE + 1 + vec_size; + } else { + // Normal snarl records need the fixed-size header and the distance matrix + return SNARL_RECORD_SIZE + distance_vector_size(type, node_count); + } } size_t SnarlDistanceIndex::SnarlRecord::record_size() { - record_t type = get_record_type(); - return record_size(type, get_node_count()); + record_t type = get_record_type(); + //vec_size only for oversized snarls + size_t vec_size = (*records)->at(record_offset + SNARL_RECORD_SIZE); + return record_size(type, get_node_count(), vec_size); } size_t SnarlDistanceIndex::SnarlRecord::get_distance_start_start() const { @@ -4241,7 +4203,7 @@ size_t SnarlDistanceIndex::SnarlRecord::get_distance_end_end() const { return stored_value == 0 ? std::numeric_limits::max() : stored_value - 1; } -SnarlDistanceIndex::SnarlRecordWriter::SnarlRecordWriter (size_t node_count, bdsg::yomo::UniqueMappedPointer* records, record_t type){ +SnarlDistanceIndex::SnarlRecordWriter::SnarlRecordWriter (size_t node_count, bdsg::yomo::UniqueMappedPointer* records, record_t type, size_t vec_size){ //Constructor for making a new record, including allocating memory. //Assumes that this is the latest record being made, so pointer will be the end of //the array and we need to allocate extra memory past it @@ -4253,16 +4215,21 @@ SnarlDistanceIndex::SnarlRecordWriter::SnarlRecordWriter (size_t node_count, bds SnarlRecord::record_offset = (*records)->size(); SnarlRecord::records = records; - size_t extra_size = record_size(type, node_count); + //vec_size only used for oversized snarls + size_t extra_size = record_size(type, node_count, vec_size); #ifdef debug_distance_indexing - if (type == OVERSIZED_SNARL) { + if (is_oversized_snarl(type)) { cerr << "oversized" << endl; } cerr << " Resizing array to add snarl: length " << (*records)->size() << " -> " << (*records)->size() + extra_size << endl; -#endif +#endif (*records)->resize((*records)->size() + extra_size); set_node_count(node_count); set_record_type(type); + + if (is_oversized_snarl(type)) { + set_vec_size(vec_size); + } #ifdef count_allocations cerr << "new_snarl\t" << extra_size << "\t" << (*records)->size() << endl; @@ -4291,10 +4258,20 @@ void SnarlDistanceIndex::SnarlRecordWriter::set_distance_end_end(size_t value) { size_t SnarlDistanceIndex::SnarlRecord::get_distance_vector_offset(size_t rank1, bool right_side1, size_t rank2, bool right_side2, size_t node_count, record_t type) { + if (!has_distances(type)) { + throw runtime_error("error: trying to access distance in a distanceless snarl tree"); + } + if (is_oversized_snarl(type)) { + throw runtime_error("error: trying to access distance matrix in an oversized snarl"); + } + if (!is_nonsimple_snarl(type)) { + throw runtime_error("error: trying to access distance matrix in something other than a snarl that would have one"); + } + //how many node sides in this snarl size_t node_side_count = node_count * 2; - if (type == DISTANCED_SNARL) { + if (!is_root_snarl(type)) { //For distances snarls, the ranks 0 and 1 are for the start and end nodes. The distance //matrix in the snarl record is only for distances between internal nodes, so 0 and 1 are //never stored and we decrement the ranks by 2 @@ -4320,15 +4297,9 @@ size_t SnarlDistanceIndex::SnarlRecord::get_distance_vector_offset(size_t rank1, rank2 = tmp; } - if (type == SNARL || type == ROOT_SNARL || type == OVERSIZED_SNARL) { - throw runtime_error("error: trying to access distance in a distanceless snarl tree"); - } else if (type == DISTANCED_SNARL || type == DISTANCED_ROOT_SNARL) { - //normal distance index - size_t k = node_side_count-rank1; - return (((node_side_count+1) * node_side_count)/2) - (((k+1)*k) / 2) + rank2 - rank1; - } else { - throw runtime_error("error: trying to distance from something that isn't a snarl"); - } + //normal distance index + size_t k = node_side_count-rank1; + return (((node_side_count+1) * node_side_count)/2) - (((k+1)*k) / 2) + rank2 - rank1; } size_t SnarlDistanceIndex::SnarlRecord::get_distance_vector_offset(size_t rank1, bool right_side1, @@ -4364,7 +4335,7 @@ void SnarlDistanceIndex::SnarlRecordWriter::set_distance(size_t rank1, bool righ get_distance(rank1, right_side1, rank2, right_side2) == distance)); #endif //Don't save internal distances for oversized snarls - if (get_record_type() == OVERSIZED_SNARL && !(rank1 == 0 || rank1 == 1 || rank2 == 0 || rank2 == 1)){ + if (is_oversized_snarl(get_record_type()) && !(rank1 == 0 || rank1 == 1 || rank2 == 0 || rank2 == 1)){ return; } @@ -4382,7 +4353,7 @@ size_t SnarlDistanceIndex::SnarlRecord::get_distance(size_t rank1, bool right_si if (!has_distances(get_record_type())) { throw runtime_error("error: trying to access get distance in a distanceless index"); } - if (get_record_type() == OVERSIZED_SNARL) { + if (is_oversized_snarl(get_record_type())) { throw runtime_error("error: trying to distance from an oversized snarl"); } @@ -4409,6 +4380,27 @@ void SnarlDistanceIndex::SnarlRecordWriter::set_node_count(size_t node_count) { (*records)->at(record_offset + SNARL_NODE_COUNT_OFFSET) = node_count; } +void SnarlDistanceIndex::SnarlRecordWriter::set_vec_size(size_t vec_size) { +#ifdef debug_distance_indexing + cerr << record_offset + SNARL_RECORD_SIZE << " set vec_size " << vec_size << endl; + assert(vec_size > 0); + assert((*records)->at(record_offset + SNARL_RECORD_SIZE) == 0); +#endif + + (*records)->at(record_offset + SNARL_RECORD_SIZE) = vec_size; +} + +void SnarlDistanceIndex::SnarlRecordWriter::set_vec_entry(size_t index, size_t value) { +#ifdef debug_distance_indexing + cerr << record_offset + SNARL_RECORD_SIZE + 1 + index << " set vec entry " << value << endl; + assert(index < (*records)->at(record_offset + SNARL_RECORD_SIZE)); + assert((*records)->at(record_offset + SNARL_RECORD_SIZE + 1 + index) == 0); +#endif + // The hub label data sits right after its size, after the end of the fixed-size header. + (*records)->at(record_offset + SNARL_RECORD_SIZE + 1 + index) = value; +} + + size_t SnarlDistanceIndex::SnarlRecord::get_child_record_pointer() const { return (*records)->at(record_offset+SNARL_CHILD_RECORD_OFFSET) ; } @@ -4444,7 +4436,7 @@ SnarlDistanceIndex::SimpleSnarlRecord::SimpleSnarlRecord (size_t pointer, const node_rank = node; #ifdef debug_distance_indexing assert (node_rank >=2); - assert(get_record_type() == SIMPLE_SNARL || get_record_type() == DISTANCED_SIMPLE_SNARL); + assert(is_simple_snarl(get_record_type())); #endif } @@ -4455,7 +4447,7 @@ SnarlDistanceIndex::SimpleSnarlRecord::SimpleSnarlRecord (net_handle_t net, cons #ifdef debug_distance_indexing assert (node_rank >=2); - assert(get_record_type() == SIMPLE_SNARL || get_record_type() == DISTANCED_SIMPLE_SNARL); + assert(is_simple_snarl(get_record_type())); #endif } @@ -4617,7 +4609,7 @@ SnarlDistanceIndex::NodeRecord::NodeRecord (size_t pointer, size_t node_offset, records = tree_records; #ifdef debug_distance_indexing - assert(get_record_type() == NODE || get_record_type() == DISTANCED_NODE); + assert(is_node(get_record_type())); #endif } @@ -4627,7 +4619,7 @@ SnarlDistanceIndex::NodeRecord::NodeRecord (net_handle_t net, const bdsg::yomo:: #ifdef debug_distance_indexing assert(get_handle_type(net) == NODE_HANDLE || get_handle_type(net) == CHAIN_HANDLE); - assert(get_record_type() == NODE || get_record_type() == DISTANCED_NODE); + assert(is_node(get_record_type())); assert(get_connectivity(net) == START_END || get_connectivity(net) == END_START || get_connectivity(net) == START_START || get_connectivity(net) == END_END); #endif @@ -4687,7 +4679,7 @@ SnarlDistanceIndex::TrivialSnarlRecord::TrivialSnarlRecord (size_t offset, const record_offset = offset; #ifdef debug_distance_indexing - assert(get_record_type() == TRIVIAL_SNARL || get_record_type() == DISTANCED_TRIVIAL_SNARL); + assert(is_trivial_snarl(get_record_type())); //assert(get_connectivity(net) == START_END || get_connectivity(net) == END_START // || get_connectivity(net) == START_START || get_connectivity(net) == END_END); #endif @@ -4703,7 +4695,7 @@ tuple SnarlDistanceIndex::TrivialSnarlRecord::ge throw runtime_error("error: trying to access get distance in a distanceless index"); } #ifdef debug_distances - assert(get_record_type() == TRIVIAL_SNARL || get_record_type() == DISTANCED_TRIVIAL_SNARL); + assert(is_trivial_snarl(get_record_type())); #endif size_t prefix_sum = (*records)->at(record_offset+TRIVIAL_SNARL_PREFIX_SUM_OFFSET); size_t forward_loop = (*records)->at(record_offset+TRIVIAL_SNARL_FORWARD_LOOP_OFFSET); @@ -4753,7 +4745,7 @@ size_t SnarlDistanceIndex::TrivialSnarlRecord::get_max_prefix_sum(size_t node_ra throw runtime_error("error: trying to access get distance in a distanceless index"); } #ifdef debug_distances - assert(get_record_type() == TRIVIAL_SNARL || get_record_type() == DISTANCED_TRIVIAL_SNARL); + assert(is_trivial_snarl(get_record_type())); #endif size_t prefix_sum = (*records)->at(record_offset+TRIVIAL_SNARL_MAX_PREFIX_SUM_OFFSET); @@ -4772,7 +4764,7 @@ size_t SnarlDistanceIndex::TrivialSnarlRecord::get_prefix_sum(size_t node_rank) throw runtime_error("error: trying to access get distance in a distanceless index"); } #ifdef debug_distances - assert(get_record_type() == TRIVIAL_SNARL || get_record_type() == DISTANCED_TRIVIAL_SNARL); + assert(is_trivial_snarl(get_record_type())); #endif size_t prefix_sum = (*records)->at(record_offset+TRIVIAL_SNARL_PREFIX_SUM_OFFSET); prefix_sum = prefix_sum == 0 ? std::numeric_limits::max() : prefix_sum - 1; @@ -4788,7 +4780,7 @@ size_t SnarlDistanceIndex::TrivialSnarlRecord::get_forward_loop(size_t node_rank throw runtime_error("error: trying to access get distance in a distanceless index"); } #ifdef debug_distances - assert(get_record_type()== TRIVIAL_SNARL || get_record_type() == DISTANCED_TRIVIAL_SNARL); + assert(is_trivial_snarl(get_record_type())); #endif size_t forward_loop = (*records)->at(record_offset+TRIVIAL_SNARL_FORWARD_LOOP_OFFSET); forward_loop = forward_loop == 0 ? std::numeric_limits::max() : forward_loop - 1; @@ -4809,7 +4801,7 @@ size_t SnarlDistanceIndex::TrivialSnarlRecord::get_reverse_loop(size_t node_rank throw runtime_error("error: trying to access get distance in a distanceless index"); } #ifdef debug_distances - assert(get_record_type() == TRIVIAL_SNARL || get_record_type() == DISTANCED_TRIVIAL_SNARL); + assert(is_trivial_snarl(get_record_type())); #endif size_t reverse_loop = (*records)->at(record_offset+TRIVIAL_SNARL_REVERSE_LOOP_OFFSET); reverse_loop = reverse_loop == 0 ? std::numeric_limits::max() : reverse_loop - 1; @@ -4874,7 +4866,7 @@ SnarlDistanceIndex::NodeRecordWriter::NodeRecordWriter (size_t pointer, size_t n //Set the pointer for the node to this record #ifdef debug_distance_indexinging - assert (type == NODE || type == DISTANCED_NODE); + assert (is_node(type)); cerr << get_node_pointer_offset(node_id, (*records)->at(MIN_NODE_ID_OFFSET), @@ -4949,7 +4941,7 @@ SnarlDistanceIndex::TrivialSnarlRecordWriter::TrivialSnarlRecordWriter (size_t p TrivialSnarlRecord::record_offset = pointer; TrivialSnarlRecord::records = records; - assert (type == TRIVIAL_SNARL || type == DISTANCED_TRIVIAL_SNARL); + assert (is_trivial_snarl(type)); #ifdef debug_distance_indexing cerr << " Resizing array to add trivial snarl: length " << (*records)->size() << " -> " << @@ -5037,36 +5029,35 @@ SnarlDistanceIndex::ChainRecord::ChainRecord (size_t pointer, const bdsg::yomo:: record_offset = pointer; records = tree_records; - net_handle_record_t record_type= get_record_handle_type(); - if (record_type == NODE_HANDLE) { - net_handle_record_t parent_type = SnarlTreeRecord( - NodeRecord(pointer, 0, records).get_parent_record_offset(), records - ).get_record_handle_type(); -#ifdef debug_distance_indexing - assert(parent_type == ROOT_HANDLE || parent_type == SNARL_HANDLE); -#endif - } else { -#ifdef debug_distance_indexing - assert(get_record_handle_type() == CHAIN_HANDLE); -#endif - } -} -SnarlDistanceIndex::ChainRecord::ChainRecord (net_handle_t net, const bdsg::yomo::UniqueMappedPointer* tree_records){ - record_offset = get_record_offset(net); - records = tree_records; - - net_handle_record_t record_type = get_record_handle_type(); #ifdef debug_distance_indexing - if (record_type == NODE_HANDLE) { + net_handle_record_t type = get_record_handle_type(); + if (type == NODE_HANDLE) { net_handle_record_t parent_type = SnarlTreeRecord( NodeRecord(record_offset, 0, records).get_parent_record_offset(), records ).get_record_handle_type(); + assert(parent_type == ROOT_HANDLE || parent_type == SNARL_HANDLE); - } else { - assert(get_record_handle_type() == CHAIN_HANDLE); + return; + } + record_t record_type = get_record_type(); + if (type == SNARL_HANDLE) { + // Simple snarls are also able to be looked at as chains, and ChainRecord knows how to parse them. + if (is_simple_snarl(record_type)) { + // This is allowed + return; + } + } else if (type == CHAIN_HANDLE) { + // Chain records as stored are allowed. + return; } + throw std::runtime_error("ChainRecord with handle type " + std::to_string(type) + " " + stringify(type) + " and record type " + std::to_string(record_type) + " " + stringify(record_type) + " at offset " + std::to_string(record_offset) + " is not a node or a chain or a simple snarl"); #endif +} + +SnarlDistanceIndex::ChainRecord::ChainRecord (net_handle_t net, const bdsg::yomo::UniqueMappedPointer* tree_records) : + ChainRecord(get_record_offset(net), tree_records) { + // Nothing to do! } SnarlDistanceIndex::ChainRecord::ChainRecord (net_handle_t net, const bdsg::yomo::UniqueMappedPointer* tree_records, size_t tag){ @@ -5074,15 +5065,27 @@ SnarlDistanceIndex::ChainRecord::ChainRecord (net_handle_t net, const bdsg::yomo records = tree_records; #ifdef debug_distance_indexing - net_handle_record_t record_type= SnarlDistanceIndex::get_record_handle_type(SnarlDistanceIndex::get_record_type(tag )); - if (record_type == NODE_HANDLE) { + net_handle_record_t type = SnarlDistanceIndex::get_record_handle_type(SnarlDistanceIndex::get_record_type(tag )); + if (type == NODE_HANDLE) { net_handle_record_t parent_type = SnarlTreeRecord( NodeRecord(record_offset, 0, records).get_parent_record_offset(), records ).get_record_handle_type(); + assert(parent_type == ROOT_HANDLE || parent_type == SNARL_HANDLE); - } else { - assert(get_record_handle_type() == CHAIN_HANDLE); - } + return; + } + record_t record_type = get_record_type(); + if (type == SNARL_HANDLE) { + // Simple snarls are also able to be looked at as chains, and ChainRecord knows how to parse them. + if (is_simple_snarl(record_type)) { + // This is allowed + return; + } + } else if (type == CHAIN_HANDLE) { + // Chain records as stored are allowed. + return; + } + throw std::runtime_error("ChainRecord with handle type " + std::to_string(type) + " " + stringify(type) + " and record type " + std::to_string(record_type) + " " + stringify(record_type) + " at offset " + std::to_string(record_offset) + " is not a node or a chain or a simple snarl"); #endif } @@ -5141,10 +5144,18 @@ size_t SnarlDistanceIndex::ChainRecord::get_distance(size_t rank1, bool left_sid throw runtime_error("error: Trying to get chain distances from a node"); } #endif - - if (get_record_type() == MULTICOMPONENT_CHAIN) { + + record_t record_type = get_record_type(); + if (record_type == MULTICOMPONENT_CHAIN) { if (component1 != component2) { +#ifdef debug_distances + std::cerr << "Ranks " << rank1 << " and " << rank2 << " are in different multicomponent chain components." << std::endl; +#endif if (is_looping_chain) { +#ifdef debug_distances + std::cerr << "Chain is looping." << std::endl; +#endif + //If this is a looping chain, then the first/last node could be in two //components return get_distance_taking_chain_loop(rank1, left_side1, node_length1, @@ -5152,9 +5163,14 @@ size_t SnarlDistanceIndex::ChainRecord::get_distance(size_t rank1, bool left_sid rank2, left_side2, node_length2, prefix_sum2, forward_loop2, reverse_loop2, end_component2); } else { +#ifdef debug_distances + std::cerr << "Chain is not looping." << std::endl; +#endif return std::numeric_limits::max(); } } + } else if (!is_chain(record_type)) { + std::cerr << "Warning: weird record type for chain: " << stringify(record_type) << std::endl; } @@ -5165,30 +5181,50 @@ size_t SnarlDistanceIndex::ChainRecord::get_distance(size_t rank1, bool left_sid if (rank1 == rank2) { //If these are the same node, then the path would need to go around the node distance = sum(sum(forward_loop1,reverse_loop2),node_length1); +#ifdef debug_distances + std::cerr << "Distance around shared node is " << distance << std::endl; +#endif } else { distance = minus(prefix_sum2 - prefix_sum1, node_length1); +#ifdef debug_distances + std::cerr << "Distance forward along chain is " << distance << std::endl; +#endif } } else if (!left_side1 && !left_side2) { //Right side of 1 and right side of 2 if (rank1 == rank2) { distance = forward_loop2; - +#ifdef debug_distances + std::cerr << "Distance on right self loop is " << distance << std::endl; +#endif } else { distance = minus( sum(sum(prefix_sum2 - prefix_sum1, node_length2), forward_loop2), node_length1); +#ifdef debug_distances + std::cerr << "Distance from right to other right is " << distance << std::endl; +#endif } } else if (left_side1 && left_side2) { //Left side of 1 and left side of 2 if (rank1 == rank2) { distance = reverse_loop1; +#ifdef debug_distances + std::cerr << "Distance on left self loop is " << distance << std::endl; +#endif } else { distance = sum(prefix_sum2 - prefix_sum1, reverse_loop1); +#ifdef debug_distances + std::cerr << "Distance from left to other left " << distance << std::endl; +#endif } } else { //Left side of 1 and right side of 2 distance = sum(sum(sum(prefix_sum2 - prefix_sum1, reverse_loop1), forward_loop2), node_length2); +#ifdef debug_distances + std::cerr << "Distance back along chain is " << distance << std::endl; +#endif } if (is_looping_chain) { @@ -5196,6 +5232,9 @@ size_t SnarlDistanceIndex::ChainRecord::get_distance(size_t rank1, bool left_sid prefix_sum1, forward_loop1, reverse_loop1, end_component1, rank2, left_side2, node_length2, prefix_sum2, forward_loop2, reverse_loop2, end_component2)); +#ifdef debug_distances + std::cerr << "After handling looping, distance is " << distance << std::endl; +#endif } return distance; } @@ -5335,7 +5374,7 @@ net_handle_t SnarlDistanceIndex::ChainRecord::get_next_child(const net_handle_t& if (get_handle_type(net_handle) == NODE_HANDLE) { //If this is a node in a trivial snarl #ifdef debug_snarl_traversal - cerr << "GEt next in chain after " << TrivialSnarlRecord(get_record_offset(net_handle), records).get_node_id(get_node_record_offset(net_handle)) << endl; + cerr << "Get next in chain after " << TrivialSnarlRecord(get_record_offset(net_handle), records).get_node_id(get_node_record_offset(net_handle)) << endl; #endif if (go_left && get_node_record_offset(net_handle) != 0) { //If we are going left and this is not the first node in the trivial snarl, @@ -5426,13 +5465,10 @@ net_handle_t SnarlDistanceIndex::ChainRecord::get_next_child(const net_handle_t& size_t next_pointer = get_record_offset(net_handle) + (go_left ? -(*records)->at(get_record_offset(net_handle)-2)-2 : (*records)->at(get_record_offset(net_handle)-1)+2); - if (SnarlDistanceIndex::get_record_type((*records)->at(next_pointer)) == SNARL || - SnarlDistanceIndex::get_record_type((*records)->at(next_pointer)) == DISTANCED_SNARL|| - SnarlDistanceIndex::get_record_type((*records)->at(next_pointer)) == OVERSIZED_SNARL) { + if (SnarlDistanceIndex::is_nonroot_nonsimple_snarl(SnarlDistanceIndex::get_record_type((*records)->at(next_pointer)))) { //If the next thing is a snarl, then just return the snarl going in the direction we just moved in return get_net_handle_from_values(next_pointer, (go_left ? END_START : START_END), SNARL_HANDLE); - } else if (SnarlDistanceIndex::get_record_type((*records)->at(next_pointer)) == SIMPLE_SNARL || - SnarlDistanceIndex::get_record_type((*records)->at(next_pointer)) == DISTANCED_SIMPLE_SNARL) { + } else if (SnarlDistanceIndex::is_simple_snarl(SnarlDistanceIndex::get_record_type((*records)->at(next_pointer)))) { //If the next thing is a snarl, then just return the snarl going in the direction we just moved in return get_net_handle_from_values(next_pointer, (go_left ? END_START : START_END), SNARL_HANDLE, 1); } else{ @@ -5492,9 +5528,7 @@ bool SnarlDistanceIndex::ChainRecord::for_each_child(const std::function* records){ #ifdef debug_distance_indexing - assert(type == CHAIN || - type == DISTANCED_CHAIN || - type == MULTICOMPONENT_CHAIN); + assert(is_chain(type)); #endif record_offset = pointer; records = records; @@ -5570,13 +5604,13 @@ void SnarlDistanceIndex::ChainRecordWriter::set_distance_right_end(size_t distan } //Add a snarl to the end of the chain and return a SnarlRecordWriter pointing to it -SnarlDistanceIndex::SnarlRecordWriter SnarlDistanceIndex::ChainRecordWriter::add_snarl(size_t snarl_size, record_t type, size_t previous_child_offset) { +SnarlDistanceIndex::SnarlRecordWriter SnarlDistanceIndex::ChainRecordWriter::add_snarl(size_t snarl_size, record_t type, size_t vec_size, size_t previous_child_offset) { - size_t snarl_record_size = SnarlRecord::record_size(type, snarl_size); + size_t snarl_record_size = SnarlRecord::record_size(type, snarl_size, vec_size); #ifdef debug_distance_indexing cerr << (*records)->size() << " Adding child snarl length to the end of the array " << endl; cerr << "Previous child was at " << previous_child_offset << endl; - assert(SnarlDistanceIndex::get_record_type((*records)->at(previous_child_offset))== DISTANCED_TRIVIAL_SNARL || SnarlDistanceIndex::get_record_type((*records)->at(previous_child_offset))== TRIVIAL_SNARL); + assert(SnarlDistanceIndex::is_trivial_snarl(SnarlDistanceIndex::get_record_type((*records)->at(previous_child_offset)))); #endif @@ -5590,7 +5624,7 @@ SnarlDistanceIndex::SnarlRecordWriter SnarlDistanceIndex::ChainRecordWriter::add (*records)->resize(start_i+1); (*records)->at(start_i) = snarl_record_size; (*records)->reserve(start_i + snarl_record_size); - SnarlRecordWriter snarl_record(snarl_size, records, type); + SnarlRecordWriter snarl_record(snarl_size, records, type, vec_size); snarl_record.set_parent_record_offset(get_offset()); #ifdef debug_distance_indexing cerr << (*records)->size() << " Adding child snarl length to the end of the array " << endl; @@ -5609,7 +5643,7 @@ SnarlDistanceIndex::SimpleSnarlRecordWriter SnarlDistanceIndex::ChainRecordWrite size_t snarl_record_size = SIMPLE_SNARL_RECORD_SIZE + 2*snarl_size; #ifdef debug_distance_indexing cerr << (*records)->size() << " Adding simple snarl to the end of the array " << endl; - assert(SnarlDistanceIndex::get_record_type((*records)->at(previous_child_offset))== DISTANCED_TRIVIAL_SNARL || SnarlDistanceIndex::get_record_type((*records)->at(previous_child_offset))== TRIVIAL_SNARL); + assert(SnarlDistanceIndex::is_trivial_snarl(SnarlDistanceIndex::get_record_type((*records)->at(previous_child_offset)))); #endif @@ -5645,8 +5679,7 @@ size_t SnarlDistanceIndex::ChainRecordWriter::add_node(nid_t node_id, size_t nod #ifdef debug_distance_indexing cerr << "Adding new node to chain, with previous child at offset " << previous_child_offset << endl; #endif - if ((SnarlDistanceIndex::get_record_type((*records)->at(previous_child_offset)) == DISTANCED_TRIVIAL_SNARL || - SnarlDistanceIndex::get_record_type((*records)->at(previous_child_offset)) == TRIVIAL_SNARL) + if ((SnarlDistanceIndex::is_trivial_snarl(SnarlDistanceIndex::get_record_type((*records)->at(previous_child_offset)))) && (TrivialSnarlRecord(previous_child_offset, records).get_node_count() == MAX_TRIVIAL_SNARL_NODE_COUNT || new_record || reverse_loop == 0)) { //If the last thing was a trivial snarl and it is full, then finish it off @@ -5657,11 +5690,7 @@ size_t SnarlDistanceIndex::ChainRecordWriter::add_node(nid_t node_id, size_t nod } if (previous_child_offset == 0 - || SnarlDistanceIndex::get_record_type((*records)->at(previous_child_offset)) == SNARL - || SnarlDistanceIndex::get_record_type((*records)->at(previous_child_offset)) == DISTANCED_SNARL - || SnarlDistanceIndex::get_record_type((*records)->at(previous_child_offset)) == OVERSIZED_SNARL - || SnarlDistanceIndex::get_record_type((*records)->at(previous_child_offset)) == SIMPLE_SNARL - || SnarlDistanceIndex::get_record_type((*records)->at(previous_child_offset)) == DISTANCED_SIMPLE_SNARL + || SnarlDistanceIndex::is_nonroot_nontrivial_snarl(SnarlDistanceIndex::get_record_type((*records)->at(previous_child_offset))) || TrivialSnarlRecord(previous_child_offset, records).get_node_count() == MAX_TRIVIAL_SNARL_NODE_COUNT || reverse_loop == 0 || new_record) { //If the last thing was a snarl or nothing (previous_child_offset == 0, meaning that this is the @@ -5766,21 +5795,26 @@ string SnarlDistanceIndex::net_handle_as_string(const net_handle_t& net) const { net_handle_record_t type = get_handle_type(net); SnarlTreeRecord record (net, &snarl_tree_records); net_handle_record_t record_type = record.get_record_handle_type(); - string result; + string result = stringify(type) + " "; if (type == ROOT_HANDLE) { - if (record.get_record_type() == ROOT_SNARL || record.get_record_type() == DISTANCED_ROOT_SNARL) { - return "root snarl"; + if (is_root_snarl(record.get_record_type())) { + result += "root snarl"; + return result; } else { - return "root"; + result += "root"; + return result; } } else if (type == NODE_HANDLE) { if (ends_at(net) == starts_at(net)) { - return "node" + std::to_string( node_id(net)) + (ends_at(net) == START ? "rev" : "fd") + " that is the end node of a looping chain"; + result += "node" + std::to_string( node_id(net)) + (ends_at(net) == START ? "rev" : "fd") + " that is the end node of a looping chain"; + return result; } - return "node " + std::to_string( node_id(net)) + (ends_at(net) == START ? "rev" : "fd"); + result += "node " + std::to_string( node_id(net)) + (ends_at(net) == START ? "rev" : "fd"); + return result; } else if (type == SNARL_HANDLE) { if (record.get_record_type() == ROOT) { - return "root snarl"; + result += "root snarl"; + return result; } if (get_node_record_offset(net) == 1) { result += "simple snarl "; @@ -5791,11 +5825,13 @@ string SnarlDistanceIndex::net_handle_as_string(const net_handle_t& net) const { result += "snarl "; } } else if (type == CHAIN_HANDLE && record_type == NODE_HANDLE) { - return "node " + std::to_string( NodeRecord(net, &snarl_tree_records).get_node_id()) + result += "node " + std::to_string( NodeRecord(net, &snarl_tree_records).get_node_id()) + (ends_at(net) == START ? "rev" : "fd") + " pretending to be a chain"; + return result; } else if (type == CHAIN_HANDLE && record_type == SNARL_HANDLE) { - return "node " + std::to_string( SimpleSnarlRecord(net, &snarl_tree_records).get_node_id()) + result += "node " + std::to_string( SimpleSnarlRecord(net, &snarl_tree_records).get_node_id()) + (ends_at(net) == START ? "rev" : "fd") + " pretending to be a chain in a simple snarl"; + return result; }else if (type == CHAIN_HANDLE) { result += "chain "; } else if (type == SENTINEL_HANDLE) { @@ -5808,7 +5844,7 @@ string SnarlDistanceIndex::net_handle_as_string(const net_handle_t& net) const { + "->" + std::to_string(record.get_end_id()) + (record.get_end_orientation() ? "rev" : "fd")); - result += "traversing "; + result += " traversing "; result += (starts_at(net) == START ? "start" : (starts_at(net) == END ? "end" : "tip")); result += "->"; result += (ends_at(net) == START ? "start" : (ends_at(net) == END ? "end" : "tip")); @@ -5879,18 +5915,13 @@ void SnarlDistanceIndex::print_descendants_of(const net_handle_t net) const { } else { parent = net_handle_as_string(get_parent(net)); if (record_type == CHAIN_HANDLE) { - child_count = ChainRecord(net, &snarl_tree_records).get_node_count(); - } else if (record.get_record_type() == SNARL || - record.get_record_type() == DISTANCED_SNARL|| - record.get_record_type() == OVERSIZED_SNARL - ){ + child_count = ChainRecord(net, &snarl_tree_records).get_node_count(); + } else if (is_nonroot_nonsimple_snarl(record.get_record_type())) { child_count = SnarlRecord(net, &snarl_tree_records).get_node_count(); - } else if (record.get_record_type() == TRIVIAL_SNARL || - record.get_record_type() == DISTANCED_TRIVIAL_SNARL) { + } else if (is_trivial_snarl(record.get_record_type())) { child_count = TrivialSnarlRecord(get_record_offset(net), &snarl_tree_records).get_node_count(); - }else if (record.get_record_type() == SIMPLE_SNARL || - record.get_record_type() == DISTANCED_SIMPLE_SNARL) { + } else if (is_simple_snarl(record.get_record_type())) { child_count = SimpleSnarlRecord(net, &snarl_tree_records).get_node_count(); } else { throw runtime_error("error: printing the wrong kind of record"); @@ -5915,14 +5946,10 @@ void SnarlDistanceIndex::print_snarl_stats() const { //Get the number of children depending on the type of record size_t child_count; - if (record.get_record_type() == SNARL || - record.get_record_type() == DISTANCED_SNARL|| - record.get_record_type() == OVERSIZED_SNARL - ){ - + if (is_nonroot_nonsimple_snarl(record.get_record_type())) { + child_count = SnarlRecord(snarl_child, &snarl_tree_records).get_node_count(); - } else if (record.get_record_type() == SIMPLE_SNARL || - record.get_record_type() == DISTANCED_SIMPLE_SNARL) { + } else if (is_simple_snarl(record.get_record_type())) { child_count = SimpleSnarlRecord(snarl_child, &snarl_tree_records).get_node_count(); } else { throw runtime_error("error: getting the snarl child count of the wrong type of record"); @@ -5985,14 +6012,11 @@ void SnarlDistanceIndex::write_snarls_to_json() const { json_object_set_new(out_json, "parent", parent_json); //Get the number of children depending on the type of record - if (record.get_record_type() == SNARL || - record.get_record_type() == DISTANCED_SNARL|| - record.get_record_type() == OVERSIZED_SNARL - ){ + if (is_nonroot_nonsimple_snarl(record.get_record_type())) { + size_t child_count = SnarlRecord(snarl_child, &snarl_tree_records).get_node_count(); json_object_set_new(out_json, "child_count", json_integer(child_count)); - } else if (record.get_record_type() == SIMPLE_SNARL || - record.get_record_type() == DISTANCED_SIMPLE_SNARL) { + } else if (is_simple_snarl(record.get_record_type())) { size_t child_count = SimpleSnarlRecord(snarl_child, &snarl_tree_records).get_node_count(); json_object_set_new(out_json, "child_count", json_integer(child_count)); } else { @@ -6000,8 +6024,8 @@ void SnarlDistanceIndex::write_snarls_to_json() const { } //Set the min and max length, if applicable - if (record.get_record_type() == DISTANCED_SNARL || - record.get_record_type() == DISTANCED_SIMPLE_SNARL){ + if (has_distances(record.get_record_type()) && is_nonroot_nontrivial_snarl(record.get_record_type()) && !is_oversized_snarl(record.get_record_type())){ + // TODO: May need a new accessor to match just snarls and simple snarls that have distances. json_object_set_new(out_json, "minimum_length", json_integer(minimum_length(snarl_child))); json_object_set_new(out_json, "maximum_length", json_integer(maximum_length(snarl_child))); } @@ -6230,7 +6254,7 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vector> to new offset - unordered_map>, size_t> record_to_offset; + unordered_map, size_t> record_to_offset; //Set the root index for (size_t temp_index_i = 0 ; temp_index_i < temporary_indexes.size() ; temp_index_i++) { //Any root will point to the same root @@ -6242,10 +6266,10 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vector> temp_record_stack = temp_index->components; + vector temp_record_stack = temp_index->components; while (!temp_record_stack.empty()) { - pair current_record_index = temp_record_stack.back(); + temp_record_ref_t current_record_index = temp_record_stack.back(); temp_record_stack.pop_back(); #ifdef debug_distance_indexing @@ -6256,8 +6280,7 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vectortemp_chain_records[current_record_index.second]; + const TemporaryDistanceIndex::TemporaryChainRecord& temp_chain_record = temp_index->get_chain(current_record_index); if (!temp_chain_record.is_trivial) { //If this chain contains at least two nodes #ifdef debug_distance_indexing @@ -6271,8 +6294,7 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vectortemp_snarl_records[temp_chain_record.parent.second]; + const TemporaryDistanceIndex::TemporarySnarlRecord& temp_parent_record = temp_index->get_snarl(temp_chain_record.parent); if (temp_parent_record.is_root_snarl) { is_child_of_root_snarl = true; } @@ -6286,12 +6308,12 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vectorsize(), record_type, temp_chain_record.prefix_sum.size(), &snarl_tree_records); chain_record_constructor.set_start_end_connected(); } else { - chain_record_constructor = ChainRecordWriter(snarl_tree_records->size(), MULTICOMPONENT_CHAIN, + chain_record_constructor = ChainRecordWriter(snarl_tree_records->size(), encode_chain(!ignore_distances, true), temp_chain_record.prefix_sum.size(), &snarl_tree_records); } chain_record_constructor.set_parent_record_offset( @@ -6333,7 +6355,7 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vector& child_record_index = temp_chain_record.children[child_record_index_i]; + const temp_record_ref_t& child_record_index = temp_chain_record.children[child_record_index_i]; //Go through each node and snarl in the chain and add them to the index #ifdef debug_distance_indexing cerr << " Adding child of the chain: " << temp_index->structure_start_end_as_string(child_record_index) << endl; @@ -6345,9 +6367,7 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vectortemp_node_records[child_record_index.second-min_node_id]; - + const TemporaryDistanceIndex::TemporaryNodeRecord& temp_node_record = temp_index->get_node(child_record_index); //Make a new node record size_t new_offset = chain_record_constructor.add_node( @@ -6383,8 +6403,7 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vectortemp_snarl_records[child_record_index.second]; + const TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record = temp_index->get_snarl(child_record_index); if (!temp_snarl_record.is_trivial && !temp_snarl_record.is_simple) { //If this is an actual snarl that we need to make @@ -6392,11 +6411,11 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vector snarl_size_limit); - record_t record_type = ignore_distances ? SNARL : - (temp_snarl_record.node_count <= snarl_size_limit ? DISTANCED_SNARL : OVERSIZED_SNARL); SnarlRecordWriter snarl_record_constructor = - chain_record_constructor.add_snarl(temp_snarl_record.node_count, record_type, last_child_offset.first); + chain_record_constructor.add_snarl(temp_snarl_record.node_count, record_type, temp_snarl_record.hub_labels.size(), last_child_offset.first); //Record how to find the new snarl record record_to_offset.emplace(make_pair(temp_index_i, child_record_index), snarl_record_constructor.record_offset); @@ -6410,26 +6429,38 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vector node_rank1 = it.first.first; - pair node_rank2 = it.first.second; - const size_t distance = it.second; - - if (!ignore_distances) { - //If we are keeping track of distances - //If the distance exceeded the limit, then it wasn't found in the first place - snarl_record_constructor.set_distance(node_rank1.first, node_rank1.second, - node_rank2.first, node_rank2.second, distance); - - if (temp_snarl_record.tippy_child_ranks.count(node_rank1.first) - && temp_snarl_record.tippy_child_ranks.count(node_rank2.first)) { - snarl_record_constructor.set_tip_tip_connected(); - } + if (is_oversized_snarl(record_type)) { + // We need to copy the packed hub label vector into place. + for (size_t i = 0; i < temp_snarl_record.hub_labels.size(); i++) { + // TODO: Make this an std::copy or something. + snarl_record_constructor.set_vec_entry(i, temp_snarl_record.hub_labels.at(i)); + } + // TODO: When should we call + // snarl_record_constructor.set_tip_tip_connected()? + // Add code to determine that somewhere! + } else { + // Store individual distance entries. + for (const auto& it : temp_snarl_record.distances) { + pair node_rank1 = it.first.first; + pair node_rank2 = it.first.second; + const size_t distance = it.second; + + if (!ignore_distances) { + //If we are keeping track of distances + //If the distance exceeded the limit, then it wasn't found in the first place + snarl_record_constructor.set_distance(node_rank1.first, node_rank1.second, + node_rank2.first, node_rank2.second, distance); + + if (temp_snarl_record.tippy_child_ranks.count(node_rank1.first) + && temp_snarl_record.tippy_child_ranks.count(node_rank2.first)) { + snarl_record_constructor.set_tip_tip_connected(); + } #ifdef debug_distance_indexing - assert(distance <= temp_snarl_record.max_distance); - assert(snarl_record_constructor.get_distance(node_rank1.first, node_rank1.second, - node_rank2.first, node_rank2.second) == distance); + assert(distance <= temp_snarl_record.max_distance); + assert(snarl_record_constructor.get_distance(node_rank1.first, node_rank1.second, + node_rank2.first, node_rank2.second) == distance); #endif + } } } //Now set the connectivity of this snarl @@ -6447,7 +6478,7 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vector& child : temp_snarl_record.children) { + for (const temp_record_ref_t& child : temp_snarl_record.children) { temp_record_stack.emplace_back(child); #ifdef debug_distance_indexing cerr << " " << temp_index->structure_start_end_as_string(child) << endl; @@ -6494,7 +6525,8 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vector& child_index = temp_snarl_record.children[i]; + const temp_record_ref_t& child_index = temp_snarl_record.children[i]; if( child_index.first == TEMP_CHAIN) { - assert(temp_index->temp_chain_records[child_index.second].children.size() == 1); - const pair& node_index = temp_index->temp_chain_records[child_index.second].children.front(); - const TemporaryDistanceIndex::TemporaryNodeRecord& temp_node_record = - temp_index->temp_node_records[node_index.second-min_node_id]; + if (temp_index->get_chain(child_index).children.size() != 1) { + throw runtime_error("size of children != 1, child index: "+to_string(child_index.second)+", bounding nodes: "+to_string(temp_snarl_record.start_node_id)+" "+to_string(temp_snarl_record.end_node_id)); + } + const temp_record_ref_t& node_index = temp_index->get_chain(child_index).children.front(); + const TemporaryDistanceIndex::TemporaryNodeRecord& temp_node_record = temp_index->get_node(node_index); //If there is a way to go from the node forward to the start node, //then it is reversed - size_t rank =temp_index->temp_chain_records[child_index.second].rank_in_parent; + size_t rank = temp_index->get_chain(child_index).rank_in_parent; snarl_record_constructor.add_child(i+2, temp_node_record.node_id, temp_node_record.node_length, temp_node_record.reversed_in_parent); } else { assert(child_index.first == TEMP_NODE); - const TemporaryDistanceIndex::TemporaryNodeRecord& temp_node_record = - temp_index->temp_node_records[child_index.second-min_node_id]; + const TemporaryDistanceIndex::TemporaryNodeRecord& temp_node_record = temp_index->get_node(child_index); size_t rank =temp_node_record.rank_in_parent; snarl_record_constructor.add_child(i+2, temp_node_record.node_id, temp_node_record.node_length, temp_node_record.reversed_in_parent); @@ -6552,13 +6584,12 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vectortemp_node_records[temp_chain_record.children[0].second-min_node_id]; + const TemporaryDistanceIndex::TemporaryNodeRecord& temp_node_record = temp_index->get_node(temp_chain_record.children[0]); bool ignore_distances = (snarl_size_limit == 0) || only_top_level_chain_distances; - record_t record_type = ignore_distances ? NODE : DISTANCED_NODE; + record_t record_type = encode_node(!ignore_distances); NodeRecordWriter node_record(snarl_tree_records->size(), 0, record_type, &snarl_tree_records, temp_node_record.node_id); node_record.set_node_id(temp_node_record.node_id); node_record.set_rank_in_parent(temp_chain_record.rank_in_parent); @@ -6582,12 +6613,12 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vectortemp_snarl_records[current_record_index.second]; + const TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record = temp_index->get_snarl(current_record_index); record_to_offset.emplace(make_pair(temp_index_i,current_record_index), snarl_tree_records->size()); - SnarlRecordWriter snarl_record_constructor (temp_snarl_record.node_count, &snarl_tree_records, record_type); + SnarlRecordWriter snarl_record_constructor (temp_snarl_record.node_count, &snarl_tree_records, record_type, temp_snarl_record.hub_labels.size()); //Fill in snarl info snarl_record_constructor.set_parent_record_offset(0); @@ -6621,7 +6652,7 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vector& child : temp_snarl_record.children) { + for (const temp_record_ref_t& child : temp_snarl_record.children) { temp_record_stack.emplace_back(child); } @@ -6632,11 +6663,10 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vectorstructure_start_end_as_string(current_record_index) << endl; #endif - const TemporaryDistanceIndex::TemporaryNodeRecord& temp_node_record = - temp_index->temp_node_records[current_record_index.second-min_node_id]; + const TemporaryDistanceIndex::TemporaryNodeRecord& temp_node_record = temp_index->get_node(current_record_index); bool ignore_distances = (snarl_size_limit == 0) || only_top_level_chain_distances; - record_t record_type = ignore_distances ? NODE : DISTANCED_NODE; + record_t record_type = encode_node(!ignore_distances); NodeRecordWriter node_record(snarl_tree_records->size(), 0, record_type, &snarl_tree_records, temp_node_record.node_id); node_record.set_node_id(temp_node_record.node_id); node_record.set_rank_in_parent(temp_node_record.rank_in_parent); @@ -6661,7 +6691,7 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vectorcomponents.size() ; component_num++){ - const pair& component_index = temp_index->components[component_num]; + const temp_record_ref_t& component_index = temp_index->components[component_num]; //Let the root record know that it has another root root_record.add_component(component_num,record_to_offset[make_pair(temp_index_i,component_index)]); @@ -6673,7 +6703,7 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vectorget_handle(record.get_start_id(), !record.get_start_orientation()); handle_t end_out = graph->get_handle(record.get_end_id(), record.get_end_orientation()); @@ -6718,20 +6748,21 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vectortemp_snarl_records.size() ; temp_snarl_i ++) { + temp_record_ref_t temp_snarl_ref = make_pair(TEMP_SNARL, temp_snarl_i); //Get the temporary index for this snarl - const TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record = temp_index->temp_snarl_records[temp_snarl_i]; + const TemporaryDistanceIndex::TemporarySnarlRecord& temp_snarl_record = temp_index->get_snarl(temp_snarl_ref); if (!temp_snarl_record.is_trivial && !temp_snarl_record.is_simple) { //And a constructor for the permanent record, which we've already created SnarlRecordWriter snarl_record_constructor (&snarl_tree_records, - record_to_offset[make_pair(temp_index_i, make_pair(TEMP_SNARL, temp_snarl_i))]); + record_to_offset[make_pair(temp_index_i, temp_snarl_ref)]); //Now add the children and tell the record where to find them snarl_record_constructor.set_child_record_pointer(snarl_tree_records->size()); - for (pair child : temp_snarl_record.children) { + for (temp_record_ref_t child : temp_snarl_record.children) { snarl_record_constructor.add_child(record_to_offset[make_pair(temp_index_i, child)]); //Check if the child is a tip, and if so set start/end_tip connectivity of parent snarl if (child.first == TEMP_NODE) { - auto temp_node_record = temp_index->temp_node_records[child.second-min_node_id]; + auto& temp_node_record = temp_index->get_node(child); if (temp_node_record.is_tip) { if (temp_node_record.distance_left_start != std::numeric_limits::max() || temp_node_record.distance_right_start != std::numeric_limits::max()){ @@ -6743,7 +6774,7 @@ void SnarlDistanceIndex::get_snarl_tree_records(const vectortemp_chain_records[child.second]; + auto& temp_chain_record = temp_index->get_chain(child); if (temp_chain_record.is_tip) { if (temp_chain_record.distance_left_start != std::numeric_limits::max() || temp_chain_record.distance_right_start != std::numeric_limits::max()){ diff --git a/bdsg/src/test_libbdsg.cpp b/bdsg/src/test_libbdsg.cpp index 94c936d0..95490451 100644 --- a/bdsg/src/test_libbdsg.cpp +++ b/bdsg/src/test_libbdsg.cpp @@ -1,5094 +1,5466 @@ // // test_libbdsg.cpp -// +// // Contains tests for the data structures in libbdsg // -#include -#include -#include +#include #include -#include -#include -#include -#include #include #include +#include +#include +#include #include -#include +#include +#include +#include +#include #include // BINDER_IGNORE because Binder can't find this -#include #include +#include -#include "bdsg/packed_graph.hpp" +#include "bdsg/ch.hpp" #include "bdsg/hash_graph.hpp" -#include "bdsg/snarl_distance_index.hpp" -#include "bdsg/internal/packed_structs.hpp" #include "bdsg/internal/mapped_structs.hpp" -#include "bdsg/overlays/path_position_overlays.hpp" +#include "bdsg/internal/packed_structs.hpp" #include "bdsg/overlays/packed_path_position_overlay.hpp" #include "bdsg/overlays/packed_reference_path_overlay.hpp" -#include "bdsg/overlays/vectorizable_overlays.hpp" #include "bdsg/overlays/packed_subgraph_overlay.hpp" +#include "bdsg/overlays/path_position_overlays.hpp" #include "bdsg/overlays/reference_path_overlay.hpp" - +#include "bdsg/overlays/vectorizable_overlays.hpp" +#include "bdsg/packed_graph.hpp" +#include "bdsg/snarl_distance_index.hpp" using namespace bdsg; using namespace handlegraph; using namespace std; -//#define debug_at +// #define debug_at // Have helpers to store and check some test data size_t mix(size_t in, size_t salt = 0) { - return ((in * in + (in << 2)) ^ salt) + 1; + return ((in * in + (in << 2)) ^ salt) + 1; } -template -void fill_to(Vectorish& data, size_t count, int64_t nonce) { - for (size_t i = 0; i < count; i++) { - data.at(i) = mix(i, nonce); - } +template +void fill_to(Vectorish &data, size_t count, int64_t nonce) { + for (size_t i = 0; i < count; i++) { + data.at(i) = mix(i, nonce); + } } -template -void verify_to(const Vectorish& data, size_t count, int64_t nonce) { - if (count > data.size()) { - throw std::runtime_error("Trying to check " + std::to_string(count) + " items but only " + std::to_string(data.size()) + " are available"); - } - for (size_t i = 0; i < count; i++) { - auto correct_value = mix(i, nonce); - auto observed_value = data.at(i); - if (observed_value != correct_value) { - cerr << "At index " << i << " observed " << observed_value << " but expected " << correct_value << endl; - } - assert(observed_value == correct_value); +template +void verify_to(const Vectorish &data, size_t count, int64_t nonce) { + if (count > data.size()) { + throw std::runtime_error("Trying to check " + std::to_string(count) + + " items but only " + std::to_string(data.size()) + + " are available"); + } + for (size_t i = 0; i < count; i++) { + auto correct_value = mix(i, nonce); + auto observed_value = data.at(i); + if (observed_value != correct_value) { + cerr << "At index " << i << " observed " << observed_value + << " but expected " << correct_value << endl; } + assert(observed_value == correct_value); + } } /** * Given a resizeable two-level container of numbers, vigorously resize it and * its members and make sure they have the right values. */ -template -void bother_vector(TwoLevel& storage) { - - vector> truth; - - auto check = [&]() { - // Make sure the structure under test is holding the correct data. - if (storage.size() != truth.size()) { - std::cerr << "Structure under test has " << storage.size() << " items but should have " << truth.size() << std::endl; - assert(storage.size() == truth.size()); - } - for (size_t i = 0; i < truth.size(); i++) { - if (storage.at(i).size() != truth.at(i).size()) { - std::cerr << "Structure under test has " << storage.at(i).size() - << " items in item " << i << " but should have " << truth.size() << std::endl; - assert(storage.at(i).size() == truth.at(i).size()); - } - for (size_t j = 0; j < truth.at(i).size(); j++) { - if (storage.at(i).at(j) != truth.at(i).at(j)) { - std::cerr << "Structure under test has " << storage.at(i).at(j) - << " at " << j << " address " << ((intptr_t) &storage.at(i).at(j)) - << " in item " << i << " address " << ((intptr_t) &storage.at(i)) - << " but should have " << truth.at(i).at(j) << std::endl; - assert(storage.at(i).at(j) == truth.at(i).at(j)); - } - } +template void bother_vector(TwoLevel &storage) { + + vector> truth; + + auto check = [&]() { + // Make sure the structure under test is holding the correct data. + if (storage.size() != truth.size()) { + std::cerr << "Structure under test has " << storage.size() + << " items but should have " << truth.size() << std::endl; + assert(storage.size() == truth.size()); + } + for (size_t i = 0; i < truth.size(); i++) { + if (storage.at(i).size() != truth.at(i).size()) { + std::cerr << "Structure under test has " << storage.at(i).size() + << " items in item " << i << " but should have " + << truth.size() << std::endl; + assert(storage.at(i).size() == truth.at(i).size()); + } + for (size_t j = 0; j < truth.at(i).size(); j++) { + if (storage.at(i).at(j) != truth.at(i).at(j)) { + std::cerr << "Structure under test has " << storage.at(i).at(j) + << " at " << j << " address " + << ((intptr_t)&storage.at(i).at(j)) << " in item " << i + << " address " << ((intptr_t)&storage.at(i)) + << " but should have " << truth.at(i).at(j) << std::endl; + assert(storage.at(i).at(j) == truth.at(i).at(j)); } - }; + } + } + }; - size_t seed = 0; + size_t seed = 0; - for (size_t iteration = 0; iteration < 2; iteration++) { - truth.resize(0); - storage.resize(0); - check(); - - for (size_t parent_size = 0; parent_size < 100; parent_size++) { + for (size_t iteration = 0; iteration < 2; iteration++) { + truth.resize(0); + storage.resize(0); + check(); + + for (size_t parent_size = 0; parent_size < 100; parent_size++) { #ifdef debug_bother - std::cerr << "Resize parent to " << parent_size << endl; + std::cerr << "Resize parent to " << parent_size << endl; #endif - truth.resize(parent_size); - storage.resize(parent_size); - check(); - - for (size_t child = 0; child < parent_size; child++) { - auto& truth_child = truth.at(child); - auto& storage_child = storage.at(child); - - size_t child_size = seed % 100; - seed = mix(seed); - - for (size_t i = 0; i <= std::min(child_size, (size_t)5); i++) { - // Resize 1 bigger a bunch + truth.resize(parent_size); + storage.resize(parent_size); + check(); + + for (size_t child = 0; child < parent_size; child++) { + auto &truth_child = truth.at(child); + auto &storage_child = storage.at(child); + + size_t child_size = seed % 100; + seed = mix(seed); + + for (size_t i = 0; i <= std::min(child_size, (size_t)5); i++) { + // Resize 1 bigger a bunch #ifdef debug_bother - std::cerr << "Resize child " << child << " of " << parent_size << " to " << i << endl; + std::cerr << "Resize child " << child << " of " << parent_size + << " to " << i << endl; #endif - truth_child.resize(i); - storage_child.resize(i); + truth_child.resize(i); + storage_child.resize(i); #ifdef debug_bother - std::cerr << "Check after resize to " << i << endl; + std::cerr << "Check after resize to " << i << endl; #endif - check(); + check(); #ifdef debug_bother - std::cerr << "Completed check after resize to " << i << endl; + std::cerr << "Completed check after resize to " << i << endl; #endif - } - - truth_child.resize(child_size); - storage_child.resize(child_size); - check(); - + } + + truth_child.resize(child_size); + storage_child.resize(child_size); + check(); + #ifdef debug_bother - std::cerr << "Fill in " << child_size << " items in child " << child << endl; + std::cerr << "Fill in " << child_size << " items in child " << child + << endl; #endif - - for (size_t i = 0; i < child_size; i++) { - // Fill in with data - truth_child.at(i) = seed % 10000; - storage_child.at(i) = seed % 10000; - seed = mix(seed); - } - - // Cut in half + + for (size_t i = 0; i < child_size; i++) { + // Fill in with data + truth_child.at(i) = seed % 10000; + storage_child.at(i) = seed % 10000; + seed = mix(seed); + } + + // Cut in half #ifdef debug_bother - std::cerr << "Resize child " << child << " of " << parent_size << " to " << child_size/2 << endl; + std::cerr << "Resize child " << child << " of " << parent_size << " to " + << child_size / 2 << endl; #endif - truth_child.resize(child_size/2); - storage_child.resize(child_size/2); - check(); - - // And increase by 10 with empty slots + truth_child.resize(child_size / 2); + storage_child.resize(child_size / 2); + check(); + + // And increase by 10 with empty slots #ifdef debug_bother - std::cerr << "Resize child " << child << " of " << parent_size << " to " << (truth_child.size() + 10) << endl; + std::cerr << "Resize child " << child << " of " << parent_size << " to " + << (truth_child.size() + 10) << endl; #endif - truth_child.resize(truth_child.size() + 10); - storage_child.resize(storage_child.size() + 10); - check(); - } - - // Now make sure that after all that the structures are equal. - check(); - } + truth_child.resize(truth_child.size() + 10); + storage_child.resize(storage_child.size() + 10); + check(); + } + + // Now make sure that after all that the structures are equal. + check(); } + } } void test_bit_packing() { - // Make an int vector - CompatIntVector<> test; - // Give it 128 bits - test.width(64); - test.resize(2); - - // Make a vector to compare against - sdsl::int_vector<> truth; - truth.width(64); - truth.resize(2); - - // Define a stage so we can report problems - std::string stage = "setup"; - - // Define bit-space accessors for the test vector. Accesses must be aligned on width. - auto set_int = [&](size_t offset_bits, size_t value, size_t width) { - assert(offset_bits % width == 0); - test.pack(offset_bits / width, value, width); - }; - auto get_int = [&](size_t offset_bits, size_t width) { - assert(offset_bits % width == 0); - return test.unpack(offset_bits / width, width); - }; - - // Define combined accessors - auto set_both = [&](size_t offset_bits, size_t value, size_t width) { - set_int(offset_bits, value, width); - truth.set_int(offset_bits, value, width); - }; - auto check_both = [&](size_t offset_bits, size_t width) { - auto test_int = get_int(offset_bits, width); - auto truth_int = truth.get_int(offset_bits, width); - if (test_int != truth_int) { - std::cerr << "In stage " << stage << " at offset " << offset_bits << " for width " << width << " test vector had " << test_int << " but truth vector had " << truth_int << std::endl; - - // Dump some of the bits - size_t window_start = offset_bits > width ? offset_bits - width : 0; - std::cerr << "Bit\tTruth\tTest" << std::endl; - for (size_t i = window_start; i < window_start + 2 * width && i < truth.bit_size(); i++) { - std::cerr << i << "\t" << truth.get_int(i, 1) << "\t" << get_int(i, 1) << std::endl; - } - - assert(false); - } - return test_int; - }; - - // Make sure we can zero everything - stage = "zero"; - for (size_t i = 0; i < 2; i++) { - set_both(i * 64, 0, 64); - } - for (size_t i = 0; i < 2; i++) { - check_both(i * 64, 64); - } - - // Make sure we can put a bit pattern and get back the right values at all bit widths. - stage = "pattern"; - for (size_t i = 0; i < 2; i++) { - set_both(i * 64, 0xCAFEBEBECACAF0F0, 64); + // Make an int vector + CompatIntVector<> test; + // Give it 128 bits + test.width(64); + test.resize(2); + + // Make a vector to compare against + sdsl::int_vector<> truth; + truth.width(64); + truth.resize(2); + + // Define a stage so we can report problems + std::string stage = "setup"; + + // Define bit-space accessors for the test vector. Accesses must be aligned on + // width. + auto set_int = [&](size_t offset_bits, size_t value, size_t width) { + assert(offset_bits % width == 0); + test.pack(offset_bits / width, value, width); + }; + auto get_int = [&](size_t offset_bits, size_t width) { + assert(offset_bits % width == 0); + return test.unpack(offset_bits / width, width); + }; + + // Define combined accessors + auto set_both = [&](size_t offset_bits, size_t value, size_t width) { + set_int(offset_bits, value, width); + truth.set_int(offset_bits, value, width); + }; + auto check_both = [&](size_t offset_bits, size_t width) { + auto test_int = get_int(offset_bits, width); + auto truth_int = truth.get_int(offset_bits, width); + if (test_int != truth_int) { + std::cerr << "In stage " << stage << " at offset " << offset_bits + << " for width " << width << " test vector had " << test_int + << " but truth vector had " << truth_int << std::endl; + + // Dump some of the bits + size_t window_start = offset_bits > width ? offset_bits - width : 0; + std::cerr << "Bit\tTruth\tTest" << std::endl; + for (size_t i = window_start; + i < window_start + 2 * width && i < truth.bit_size(); i++) { + std::cerr << i << "\t" << truth.get_int(i, 1) << "\t" << get_int(i, 1) + << std::endl; + } + + assert(false); } - for (size_t width = 1; width < 65; width++) { - for (size_t i = 0; i < 128/width; i++) { - check_both(i * width, width); - } + return test_int; + }; + + // Make sure we can zero everything + stage = "zero"; + for (size_t i = 0; i < 2; i++) { + set_both(i * 64, 0, 64); + } + for (size_t i = 0; i < 2; i++) { + check_both(i * 64, 64); + } + + // Make sure we can put a bit pattern and get back the right values at all bit + // widths. + stage = "pattern"; + for (size_t i = 0; i < 2; i++) { + set_both(i * 64, 0xCAFEBEBECACAF0F0, 64); + } + for (size_t width = 1; width < 65; width++) { + for (size_t i = 0; i < 128 / width; i++) { + check_both(i * width, width); } - - cerr << "Bit packing tests successful!" << endl; -} + } -void test_mapped_structs() { - - assert(yomo::Manager::count_chains() == 0); - assert(yomo::Manager::count_links() == 0); - - { - - using T = int64_t; - using A = bdsg::yomo::Allocator; - using V = CompatVector; - // Make a thing to hold onto a test array. - bdsg::yomo::UniqueMappedPointer numbers_holder; - - // Construct it - numbers_holder.construct("GATTACA"); - - // See how much memory we are using - std::tuple total_free_reclaimable = numbers_holder.get_usage(); - // Total bytes must be no less than free bytes - assert(get<0>(total_free_reclaimable) >= get<1>(total_free_reclaimable)); - // Free bytes must be no less than reclaimable bytes - assert(get<1>(total_free_reclaimable) >= get<2>(total_free_reclaimable)); - - // Some bytes should be free in the initial chain link - assert(get<1>(total_free_reclaimable) > 0); - // But they should all be reclaimable, including the block header - assert(get<1>(total_free_reclaimable) == get<2>(total_free_reclaimable)); - - { - - // Get a reference to it, which will be valid unless we save() or something - auto& vec1 = *numbers_holder; - - // We should start empty - assert(vec1.size() == 0); - - // We should be able to preload without crashing - numbers_holder.preload(); - numbers_holder.preload(true); - - // We should be able to expand. - vec1.resize(100); - assert(vec1.size() == 100); - - // And contract - vec1.resize(10); - assert(vec1.size() == 10); - - // And hold data - fill_to(vec1, 10, 0); - verify_to(vec1, 10, 0); - - // And expand again - vec1.resize(100); - assert(vec1.size() == 100); - - // And see the data - verify_to(vec1, 10, 0); - - // And expand more - vec1.resize(1000); - assert(vec1.size() == 1000); - - // And see the data - verify_to(vec1, 10, 0); - - // And hold more data - fill_to(vec1, 1000, 1); - verify_to(vec1, 1000, 1); - - // And to preload without crashing - numbers_holder.preload(); - numbers_holder.preload(true); - } - - // We're going to need a temporary file - // This filename fill be filled in with the actual filename. - char filename[] = "tmpXXXXXX"; - int tmpfd = mkstemp(filename); - assert(tmpfd != -1); - - numbers_holder.save(tmpfd); - - { - auto& vec2 = *numbers_holder; - - // We should have the same data - assert(vec2.size() == 1000); - verify_to(vec2, 1000, 1); - - // We should be able to preload without crashing - numbers_holder.preload(); - numbers_holder.preload(true); - - // We should still be able to modify it. - vec2.resize(4000); - fill_to(vec2, 4000, 2); - verify_to(vec2, 4000, 2); - - // Check memory usage - total_free_reclaimable = numbers_holder.get_usage(); - // Total bytes must be no less than free bytes - assert(get<0>(total_free_reclaimable) >= get<1>(total_free_reclaimable)); - // Free bytes must be no less than reclaimable bytes - assert(get<1>(total_free_reclaimable) >= get<2>(total_free_reclaimable)); - - // At this point we've made it bigger than ever before and required - // a new link probably, so nothing should be reclaimable. - assert(get<2>(total_free_reclaimable) == 0); - // But some space should be free because we've deallocated smaller vectors. - assert(get<1>(total_free_reclaimable) > 0); - - // Make it even bigger! - vec2.resize(10000); - - // And smaller again - vec2.resize(4000); - - // And reallocate smaller - vec2.shrink_to_fit(); - - // Check memory usage - total_free_reclaimable = numbers_holder.get_usage(); - // Total bytes must be no less than free bytes - assert(get<0>(total_free_reclaimable) >= get<1>(total_free_reclaimable)); - // Free bytes must be no less than reclaimable bytes - assert(get<1>(total_free_reclaimable) >= get<2>(total_free_reclaimable)); - - // At this point some memory should be reclaimable - assert(get<2>(total_free_reclaimable) > 0); - - } - - numbers_holder.dissociate(); - - { - auto& vec3 = *numbers_holder; - - // After dissociating, we should be able to modify the vector - vec3.resize(5); - fill_to(vec3, 5, 3); - verify_to(vec3, 5, 3); - } - - numbers_holder.reset(); - - numbers_holder.load(tmpfd, "GATTACA"); - - // Check memory usage - total_free_reclaimable = numbers_holder.get_usage(); - // Total bytes must be no less than free bytes - assert(get<0>(total_free_reclaimable) >= get<1>(total_free_reclaimable)); - // Free bytes must be no less than reclaimable bytes - assert(get<1>(total_free_reclaimable) >= get<2>(total_free_reclaimable)); - - // No bytes should be reclaimable because we saved this through a mapping. - assert(get<2>(total_free_reclaimable) == 0); - - { - auto& vec4 = *numbers_holder; - - // We should be able to preload without crashing - numbers_holder.preload(); - numbers_holder.preload(true); - - // When we reload we should see the last thing we wrote before dissociating. - assert(vec4.size() == 4000); - verify_to(vec4, 4000, 2); - } - - close(tmpfd); - unlink(filename); - } - - assert(yomo::Manager::count_chains() == 0); - assert(yomo::Manager::count_links() == 0); - - { - using T = int64_t; - using A = bdsg::yomo::Allocator; - using V1 = CompatVector; - using A2 = bdsg::yomo::Allocator; - using V2 = CompatVector; - // Make a thing to hold onto a test array of arrays. - bdsg::yomo::UniqueMappedPointer numbers_holder_holder; - - numbers_holder_holder.construct(); - - // Now do a vigorous test comparing to a normal vector - bother_vector(*numbers_holder_holder); - } - - assert(yomo::Manager::count_chains() == 0); - assert(yomo::Manager::count_links() == 0); - - { - using T = int64_t; - using A = bdsg::yomo::Allocator; - using V1 = CompatVector; - using A2 = bdsg::yomo::Allocator; - using V2 = CompatVector; - - // Just make the root object on the stack and make sure chain-based - // allocators and pointers fall back to the heap properly. - V2 numbers; - - // Now do a vigorous test comparing to a normal vector - bother_vector(numbers); - } - - assert(yomo::Manager::count_chains() == 0); - assert(yomo::Manager::count_links() == 0); - - { - // Make sure our bit-packing vector works - CompatIntVector<> vec; - vec.width(3); - - for (size_t i = 0; i < 1000; i++) { - vec.resize(i + 1); - vec.at(i) = i % 8; - if (vec.at(i) != i % 8) { - throw std::runtime_error("Expected " + std::to_string(i % 8) + " at " + std::to_string(i) + " but got " + std::to_string(vec.at(i))); - } - } - - for (size_t i = 0; i < 1000; i++) { - if (vec.at(i) != i % 8) { - throw std::runtime_error("Expected " + std::to_string(i % 8) + " at " + std::to_string(i) + " but got " + std::to_string(vec.at(i))); - } - } - - vec.resize(500); - for (size_t i = 0; i < 500; i++) { - if (vec.at(i) != i % 8) { - throw std::runtime_error("Expected " + std::to_string(i % 8) + " at " + std::to_string(i) + " but got " + std::to_string(vec.at(i))); - } - } - - vec.repack(4, 500); - for (size_t i = 0; i < 500; i++) { - if (vec.at(i) != i % 8) { - throw std::runtime_error("Expected " + std::to_string(i % 8) + " at " + std::to_string(i) + " but got " + std::to_string(vec.at(i))); - } - } - } - - assert(yomo::Manager::count_chains() == 0); - assert(yomo::Manager::count_links() == 0); - - { - // Make sure our bit-packing vector can self-test - - // Make a vector - bdsg::yomo::UniqueMappedPointer vec; - vec.construct(); - vec->width(60); - vec->resize(1000); - fill_to(*vec, 1000, 1); - verify_to(*vec, 1000, 1); - - // We should pass heap verification - vec.check_heap_integrity(); - - // Save it out - char filename[] = "tmpXXXXXX"; - int tmpfd = mkstemp(filename); - assert(tmpfd != -1); - vec.save(tmpfd); - vec.reset(); - - // Drop part of the file - auto file_size = lseek(tmpfd, 0, SEEK_END); - assert(ftruncate(tmpfd, file_size/2) == 0); - - // Reload - vec.load(tmpfd, ""); - - try { - // We shouldn't pass heap verification. - vec.check_heap_integrity(); - assert(false); - } catch (std::runtime_error& e) { - // This is the exception we expect to get. - } - - vec.reset(); - - close(tmpfd); - unlink(filename); - } - - assert(yomo::Manager::count_chains() == 0); - assert(yomo::Manager::count_links() == 0); - - cerr << "Mapped Structs tests successful!" << endl; -} - -void test_int_vector() { - - // Make a thing to hold onto a test int vector. - bdsg::yomo::UniqueMappedPointer iv; - - // Have a function we can call to check its size. - auto save_and_check_size = [&](size_t expected_size) { - // Save it out, creating or clobbering - int fd = open("test.dat", O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); - iv.save(fd); - close(fd); - iv.dissociate(); - - // Make sure that the file has the correct size - struct stat file_stats; - stat("test.dat", &file_stats); - cerr << "Observed file size of " << file_stats.st_size << " bytes" << endl; - assert(file_stats.st_size == expected_size); - - // Load it again - bdsg::yomo::UniqueMappedPointer iv2; - fd = open("test.dat", O_RDWR); - iv2.load(fd, "ints"); - close(fd); - - // Make sure the re-loaded object has the correct usage. - std::tuple total_free_reclaimable = iv2.get_usage(); - size_t post_load_total_bytes = std::get<0>(total_free_reclaimable); - cerr << "Observed post-load size of " << post_load_total_bytes << " bytes" << endl; - assert(post_load_total_bytes == expected_size); - }; - - - // Construct it - iv.construct("ints"); - - // Give it a width - iv->width(20); - - // Make it big - size_t iv_size = 1024 * 1024 * 10; - for (size_t i = 1; i < iv_size; i *= 2) { - // Keep resizing it up and fragment the heap into many links. - iv->resize(i); - } - iv->resize(iv_size); - - for (size_t i = 0; i < iv_size; i++) { - // Fill it with a distinctive bit pattern - (*iv)[i] = 0xF0F0; - } - - // See how much memory we are using - std::tuple total_free_reclaimable = iv.get_usage(); - size_t required_bytes = std::get<0>(total_free_reclaimable) - std::get<2>(total_free_reclaimable); - cerr << std::get<0>(total_free_reclaimable) << " bytes in chain, " - << std::get<1>(total_free_reclaimable) << " bytes free, " - << std::get<2>(total_free_reclaimable) << " bytes reclaimable" << endl; - cerr << iv->size() << "/" << iv->capacity() << " entries of " << iv->width() << " bits is " << (iv->capacity() * iv->width() / 8) << " bytes" << endl; - save_and_check_size(required_bytes); - - // Shrink it back down - iv->repack(16, iv_size); - total_free_reclaimable = iv.get_usage(); - required_bytes = std::get<0>(total_free_reclaimable) - std::get<2>(total_free_reclaimable); - cerr << std::get<0>(total_free_reclaimable) << " bytes in chain, " - << std::get<1>(total_free_reclaimable) << " bytes free, " - << std::get<2>(total_free_reclaimable) << " bytes reclaimable" << endl; - cerr << iv->size() << "/" << iv->capacity() << " entries of " << iv->width() << " bits is " << (iv->capacity() * iv->width() / 8) << " bytes" << endl; - save_and_check_size(required_bytes); - - // Expand it even more - iv->repack(32, iv_size); - total_free_reclaimable = iv.get_usage(); - required_bytes = std::get<0>(total_free_reclaimable) - std::get<2>(total_free_reclaimable); - cerr << std::get<0>(total_free_reclaimable) << " bytes in chain, " - << std::get<1>(total_free_reclaimable) << " bytes free, " - << std::get<2>(total_free_reclaimable) << " bytes reclaimable" << endl; - cerr << iv->size() << "/" << iv->capacity() << " entries of " << iv->width() << " bits is " << (iv->capacity() * iv->width() / 8) << " bytes" << endl; - save_and_check_size(required_bytes); - - // And again - iv->repack(40, iv_size); - total_free_reclaimable = iv.get_usage(); - required_bytes = std::get<0>(total_free_reclaimable) - std::get<2>(total_free_reclaimable); - cerr << std::get<0>(total_free_reclaimable) << " bytes in chain, " - << std::get<1>(total_free_reclaimable) << " bytes free, " - << std::get<2>(total_free_reclaimable) << " bytes reclaimable" << endl; - cerr << iv->size() << "/" << iv->capacity() << " entries of " << iv->width() << " bits is " << (iv->capacity() * iv->width() / 8) << " bytes" << endl; - save_and_check_size(required_bytes); - - unlink("test.dat"); - cerr << "Int Vector tests successful!" << endl; + cerr << "Bit packing tests successful!" << endl; } - -void test_serializable_handle_graphs() { - - vector> implementations; - - PackedGraph pg_out, pg_in; - implementations.emplace_back(&pg_out, &pg_in); - - HashGraph hg_out, hg_in; - implementations.emplace_back(&hg_out, &hg_in); - - MappedPackedGraph mpg_in, mpg_out; - implementations.emplace_back(&mpg_in, &mpg_out); - - for (pair implementation : implementations) { - - MutablePathMutableHandleGraph* build_graph = dynamic_cast(implementation.first); - PathHandleGraph* check_graph = dynamic_cast(implementation.second); - SerializableHandleGraph* serialize_graph = implementation.first; - SerializableHandleGraph* deserialize_graph = implementation.second; - - handle_t h1 = build_graph->create_handle("GATT"); - handle_t h2 = build_graph->create_handle("TTGA"); - handle_t h3 = build_graph->create_handle("T"); - handle_t h4 = build_graph->create_handle("CA"); - - build_graph->create_edge(h1, h2); - build_graph->create_edge(h1, build_graph->flip(h3)); - build_graph->create_edge(h2, h3); - build_graph->create_edge(build_graph->flip(h3), h4); - - path_handle_t p = build_graph->create_path_handle("path"); - build_graph->append_step(p, h1); - build_graph->append_step(p, h2); - build_graph->append_step(p, h4); - - stringstream strm; - - serialize_graph->serialize(strm); - strm.seekg(0); - deserialize_graph->deserialize(strm); - - assert(build_graph->get_node_count() == check_graph->get_node_count()); - assert(build_graph->get_edge_count() == check_graph->get_edge_count()); - assert(build_graph->get_path_count() == check_graph->get_path_count()); - - for (handle_t h : {h1, h2, h3, h4}) { - assert(check_graph->has_node(build_graph->get_id(h))); - assert(check_graph->get_sequence(check_graph->get_handle(build_graph->get_id(h))) == build_graph->get_sequence(h)); - } - - assert(check_graph->get_step_count(check_graph->get_path_handle(build_graph->get_path_name(p))) == build_graph->get_step_count(p)); - } - - cerr << "SerializableHandleGraph tests successful!" << endl; -} +void test_mapped_structs() { -void test_deletable_handle_graphs() { - - // first batch of tests - { - vector implementations; - - // Add implementations - - PackedGraph pg; - implementations.push_back(&pg); - - HashGraph hg; - implementations.push_back(&hg); - - MappedPackedGraph mpg; - implementations.push_back(&mpg); - - // And test them - - for (DeletableHandleGraph* implementation : implementations) { - - DeletableHandleGraph& graph = *implementation; - - assert(graph.get_node_count() == 0); - - handle_t h = graph.create_handle("ATG", 2); - - // DeletableHandleGraph has correct structure after creating a node - { - assert(graph.get_sequence(h) == "ATG"); - assert(graph.get_sequence(graph.flip(h)) == "CAT"); - assert(graph.get_base(h, 1) == 'T'); - assert(graph.get_base(graph.flip(h), 2) == 'T'); - assert(graph.get_subsequence(h, 1, 3) == "TG"); - assert(graph.get_subsequence(graph.flip(h), 0, 2) == "CA"); - assert(graph.get_length(h) == 3); - assert(graph.has_node(graph.get_id(h))); - assert(!graph.has_node(graph.get_id(h) + 1)); - - assert(graph.get_handle(graph.get_id(h)) == h); - assert(!graph.get_is_reverse(h)); - assert(graph.get_is_reverse(graph.flip(h))); - - assert(graph.get_node_count() == 1); - assert(graph.min_node_id() == graph.get_id(h)); - assert(graph.max_node_id() == graph.get_id(h)); - assert(graph.get_total_length() == 3); - assert(graph.get_edge_count() == 0); - - graph.follow_edges(h, true, [](const handle_t& prev) { - assert(false); - return true; - }); - graph.follow_edges(h, false, [](const handle_t& next) { - assert(false); - return true; - }); - } - - handle_t h2 = graph.create_handle("CT", 1); - - // DeletableHandleGraph has correct structure after creating a node at the beginning of ID space - { - - assert(graph.get_sequence(h2) == "CT"); - assert(graph.get_sequence(graph.flip(h2)) == "AG"); - assert(graph.get_base(h2, 1) == 'T'); - assert(graph.get_base(graph.flip(h2), 0) == 'A'); - assert(graph.get_subsequence(h2, 1, 10) == "T"); - assert(graph.get_subsequence(graph.flip(h2), 0, 2) == "AG"); - assert(graph.get_length(h2) == 2); - assert(graph.has_node(graph.get_id(h2))); - assert(!graph.has_node(max(graph.get_id(h), graph.get_id(h2)) + 1)); - - assert(graph.get_handle(graph.get_id(h2)) == h2); - - assert(graph.get_node_count() == 2); - assert(graph.min_node_id() == graph.get_id(h2)); - assert(graph.max_node_id() == graph.get_id(h)); - assert(graph.get_total_length() == 5); - assert(graph.get_edge_count() == 0); - - graph.follow_edges(h2, true, [](const handle_t& prev) { - assert(false); - return true; - }); - graph.follow_edges(h2, false, [](const handle_t& next) { - assert(false); - return true; - }); - } - - // creating and accessing a node at the end of ID space - - handle_t h3 = graph.create_handle("GAC", 4); - - // DeletableHandleGraph has correct structure after creating a node at the end of ID space - { - assert(graph.get_sequence(h3) == "GAC"); - assert(graph.get_sequence(graph.flip(h3)) == "GTC"); - assert(graph.get_base(h3, 1) == 'A'); - assert(graph.get_base(graph.flip(h3), 0) == 'G'); - assert(graph.get_subsequence(h3, 1, 1) == "A"); - assert(graph.get_subsequence(graph.flip(h3), 0, 5) == "GTC"); - assert(graph.get_length(h3) == 3); - - assert(graph.get_handle(graph.get_id(h3)) == h3); - - assert(graph.get_node_count() == 3); - assert(graph.min_node_id() == graph.get_id(h2)); - assert(graph.max_node_id() == graph.get_id(h3)); - assert(graph.get_total_length() == 8); - assert(graph.get_edge_count() == 0); - - graph.follow_edges(h3, true, [](const handle_t& prev) { - assert(false); - return true; - }); - graph.follow_edges(h3, false, [](const handle_t& next) { - assert(false); - return true; - }); - } - - - // creating and accessing in the middle of ID space - - handle_t h4 = graph.create_handle("T", 3); - - // DeletableHandleGraph has correct structure after creating a node in the middle of ID space - { - assert(graph.get_sequence(h4) == "T"); - assert(graph.get_sequence(graph.flip(h4)) == "A"); - assert(graph.get_length(h4) == 1); - - assert(graph.get_handle(graph.get_id(h4)) == h4); - - assert(graph.get_node_count() == 4); - assert(graph.min_node_id() == graph.get_id(h2)); - assert(graph.max_node_id() == graph.get_id(h3)); - assert(graph.get_total_length() == 9); - assert(graph.get_edge_count() == 0); - - graph.follow_edges(h4, true, [](const handle_t& prev) { - assert(false); - return true; - }); - graph.follow_edges(h4, false, [](const handle_t& next) { - assert(false); - return true; - }); - } - - graph.create_edge(h, h2); - - bool found1 = false, found2 = false, found3 = false, found4 = false; - int count1 = 0, count2 = 0, count3 = 0, count4 = 0; - - // DeletableHandleGraph has correct structure after creating an edge - { - assert(graph.get_edge_count() == 1); - - graph.follow_edges(h, false, [&](const handle_t& next) { - if (next == h2) { - found1 = true; - } - count1++; - return true; - }); - graph.follow_edges(h2, true, [&](const handle_t& prev) { - if (prev == h) { - found2 = true; - } - count2++; - return true; - }); - graph.follow_edges(graph.flip(h), true, [&](const handle_t& prev) { - if (prev == graph.flip(h2)) { - found3 = true; - } - count3++; - return true; - }); - graph.follow_edges(graph.flip(h2), false, [&](const handle_t& next) { - if (next == graph.flip(h)) { - found4 = true; - } - count4++; - return true; - }); - assert(count1 == 1); - assert(count2 == 1); - assert(count3 == 1); - assert(count4 == 1); - assert(found1); - assert(found2); - assert(found3); - assert(found4); - - count1 = count2 = count3 = count4 = 0; - found1 = found2 = found3 = found4 = false; - } - - graph.create_edge(h, graph.flip(h3)); - - bool found5 = false, found6 = false, found7 = false, found8 = false; - int count5 = 0, count6 = 0; - - // DeletableHandleGraph has correct structure after creating an edge with a traversal - { - assert(graph.get_edge_count() == 2); - - graph.follow_edges(h, false, [&](const handle_t& next) { - if (next == h2) { - found1 = true; - } - else if (next == graph.flip(h3)) { - found2 = true; - } - count1++; - return true; - }); - graph.follow_edges(graph.flip(h), true, [&](const handle_t& prev) { - if (prev == graph.flip(h2)) { - found3 = true; - } - else if (prev == h3) { - found4 = true; - } - count2++; - return true; - }); - graph.follow_edges(h2, true, [&](const handle_t& prev) { - if (prev == h) { - found5 = true; - } - count3++; - return true; - }); - graph.follow_edges(graph.flip(h2), false, [&](const handle_t& next) { - if (next == graph.flip(h)) { - found6 = true; - } - count4++; - return true; - }); - graph.follow_edges(graph.flip(h3), true, [&](const handle_t& prev) { - if (prev == h) { - found7 = true; - } - count5++; - return true; - }); - graph.follow_edges(h3, false, [&](const handle_t& next) { - if (next == graph.flip(h)) { - found8 = true; - } - count6++; - return true; - }); - assert(count1 == 2); - assert(count2 == 2); - assert(count3 == 1); - assert(count4 == 1); - assert(count5 == 1); - assert(count6 == 1); - assert(found1); - assert(found2); - assert(found3); - assert(found4); - assert(found5); - assert(found6); - assert(found7); - assert(found8); - - count1 = count2 = count3 = count4 = count5 = count6 = 0; - found1 = found2 = found3 = found4 = found5 = found6 = found7 = found8 = false; - } - - graph.create_edge(h4, graph.flip(h4)); - - // DeletableHandleGraph has correct structure after creating a reversing self-loop - { - assert(graph.get_edge_count() == 3); - - graph.follow_edges(h4, false, [&](const handle_t& next) { - if (next == graph.flip(h4)) { - found1 = true; - } - count1++; - return true; - }); - graph.follow_edges(graph.flip(h4), true, [&](const handle_t& prev) { - if (prev == h4) { - found2 = true; - } - count2++; - return true; - }); - assert(count1 == 1); - assert(count2 == 1); - assert(found1); - assert(found2); - - count1 = count2 = 0; - found1 = found2 = false; - } - - graph.create_edge(h, graph.flip(h4)); - graph.create_edge(graph.flip(h3), h4); - - assert(graph.get_edge_count() == 5); - - graph.destroy_edge(h, graph.flip(h4)); - graph.destroy_edge(graph.flip(h3), h4); - - assert(graph.get_edge_count() == 3); - - // DeletableHandleGraph has correct structure after creating and deleting edges - { - graph.follow_edges(h, false, [&](const handle_t& next) { - if (next == h2) { - found1 = true; - } - else if (next == graph.flip(h3)) { - found2 = true; - } - count1++; - return true; - }); - graph.follow_edges(graph.flip(h), true, [&](const handle_t& prev) { - if (prev == graph.flip(h2)) { - found3 = true; - } - else if (prev == h3) { - found4 = true; - } - count2++; - return true; - }); - graph.follow_edges(h2, true, [&](const handle_t& prev) { - if (prev == h) { - found5 = true; - } - count3++; - return true; - }); - graph.follow_edges(graph.flip(h2), false, [&](const handle_t& next) { - if (next == graph.flip(h)) { - found6 = true; - } - count4++; - return true; - }); - graph.follow_edges(graph.flip(h3), true, [&](const handle_t& prev) { - if (prev == h) { - found7 = true; - } - count5++; - return true; - }); - graph.follow_edges(h3, false, [&](const handle_t& next) { - if (next == graph.flip(h)) { - found8 = true; - } - count6++; - return true; - }); - assert(count1 == 2); - assert(count2 == 2); - assert(count3 == 1); - assert(count4 == 1); - assert(count5 == 1); - assert(count6 == 1); - assert(found1); - assert(found2); - assert(found3); - assert(found4); - assert(found5); - assert(found6); - assert(found7); - assert(found8); - - count1 = count2 = count3 = count4 = count5 = count6 = 0; - found1 = found2 = found3 = found4 = found5 = found6 = found7 = found8 = false; - - graph.follow_edges(h4, false, [&](const handle_t& next) { - if (next == graph.flip(h4)) { - found1 = true; - } - count1++; - return true; - }); - graph.follow_edges(graph.flip(h4), true, [&](const handle_t& prev) { - if (prev == h4) { - found2 = true; - } - count2++; - return true; - }); - assert(count1 == 1); - assert(count2 == 1); - assert(found1); - assert(found2); - - count1 = count2 = 0; - found1 = found2 = false; - } - - handle_t h5 = graph.create_handle("GGACC"); - - // make some edges to ensure that deleting is difficult - graph.create_edge(h, h5); - graph.create_edge(h5, h); - graph.create_edge(graph.flip(h5), h2); - graph.create_edge(h3, graph.flip(h5)); - graph.create_edge(h3, h5); - graph.create_edge(h5, h4); - - graph.destroy_handle(h5); - - // DeletableHandleGraph has correct structure after creating and deleting a node - { - - graph.follow_edges(h, false, [&](const handle_t& next) { - if (next == h2) { - found1 = true; - } - else if (next == graph.flip(h3)) { - found2 = true; - } - count1++; - return true; - }); - graph.follow_edges(graph.flip(h), true, [&](const handle_t& prev) { - if (prev == graph.flip(h2)) { - found3 = true; - } - else if (prev == h3) { - found4 = true; - } - count2++; - return true; - }); - graph.follow_edges(h2, true, [&](const handle_t& prev) { - if (prev == h) { - found5 = true; - } - count3++; - return true; - }); - graph.follow_edges(graph.flip(h2), false, [&](const handle_t& next) { - if (next == graph.flip(h)) { - found6 = true; - } - count4++; - return true; - }); - graph.follow_edges(graph.flip(h3), true, [&](const handle_t& prev) { - if (prev == h) { - found7 = true; - } - count5++; - return true; - }); - graph.follow_edges(h3, false, [&](const handle_t& next) { - if (next == graph.flip(h)) { - found8 = true; - } - count6++; - return true; - }); - assert(count1 == 2); - assert(count2 == 2); - assert(count3 == 1); - assert(count4 == 1); - assert(count5 == 1); - assert(count6 == 1); - assert(found1); - assert(found2); - assert(found3); - assert(found4); - assert(found5); - assert(found6); - assert(found7); - assert(found8); - - count1 = count2 = count3 = count4 = count5 = count6 = 0; - found1 = found2 = found3 = found4 = found5 = found6 = found7 = found8 = false; - - graph.follow_edges(h4, false, [&](const handle_t& next) { - if (next == graph.flip(h4)) { - found1 = true; - } - count1++; - return true; - }); - graph.follow_edges(graph.flip(h4), true, [&](const handle_t& prev) { - if (prev == h4) { - found2 = true; - } - count2++; - return true; - }); - assert(count1 == 1); - assert(count2 == 1); - assert(found1); - assert(found2); - - count1 = count2 = 0; - found1 = found2 = false; - } - - // DeletableHandleGraph has correct structure after swapping nodes - { - - graph.follow_edges(h, false, [&](const handle_t& next) { - if (next == h2) { - found1 = true; - } - else if (next == graph.flip(h3)) { - found2 = true; - } - count1++; - return true; - }); - graph.follow_edges(graph.flip(h), true, [&](const handle_t& prev) { - if (prev == graph.flip(h2)) { - found3 = true; - } - else if (prev == h3) { - found4 = true; - } - count2++; - return true; - }); - graph.follow_edges(h2, true, [&](const handle_t& prev) { - if (prev == h) { - found5 = true; - } - count3++; - return true; - }); - graph.follow_edges(graph.flip(h2), false, [&](const handle_t& next) { - if (next == graph.flip(h)) { - found6 = true; - } - count4++; - return true; - }); - graph.follow_edges(graph.flip(h3), true, [&](const handle_t& prev) { - if (prev == h) { - found7 = true; - } - count5++; - return true; - }); - graph.follow_edges(h3, false, [&](const handle_t& next) { - if (next == graph.flip(h)) { - found8 = true; - } - count6++; - return true; - }); - assert(count1 == 2); - assert(count2 == 2); - assert(count3 == 1); - assert(count4 == 1); - assert(count5 == 1); - assert(count6 == 1); - assert(found1); - assert(found2); - assert(found3); - assert(found4); - assert(found5); - assert(found6); - assert(found7); - assert(found8); - - count1 = count2 = count3 = count4 = count5 = count6 = 0; - found1 = found2 = found3 = found4 = found5 = found6 = found7 = found8 = false; - - graph.follow_edges(h4, false, [&](const handle_t& next) { - if (next == graph.flip(h4)) { - found1 = true; - } - count1++; - return true; - }); - graph.follow_edges(graph.flip(h4), true, [&](const handle_t& prev) { - if (prev == h4) { - found2 = true; - } - count2++; - return true; - }); - assert(count1 == 1); - assert(count2 == 1); - assert(found1); - assert(found2); - - count1 = count2 = 0; - found1 = found2 = false; - } - - // DeletableHandleGraph visits all nodes with for_each_handle - { - graph.for_each_handle([&](const handle_t& handle) { - if (handle == h) { - found1 = true; - } - else if (handle == h2) { - found2 = true; - } - else if (handle == h3) { - found3 = true; - } - else if (handle == h4) { - found4 = true; - } - else { - assert(false); - } - return true; - }); - - assert(found1); - assert(found2); - assert(found3); - assert(found4); - - found1 = found2 = found3 = found4 = false; - } - - // to make sure the sequence reverse complemented correctly - int i = 0; - auto check_rev_comp = [&](const std::string& seq1, const std::string& seq2) { - i++; - assert(seq1.size() == seq2.size()); - auto it = seq1.begin(); - auto rit = seq2.rbegin(); - for (; it != seq1.end(); it++) { - if (*it == 'A') { - assert(*rit == 'T'); - } - else if (*it == 'C') { - assert(*rit == 'G'); - } - else if (*it == 'G') { - assert(*rit == 'C'); - } - else if (*it == 'T') { - assert(*rit == 'A'); - } - else if (*it == 'N') { - assert(*rit == 'N'); - } - else { - assert(false); - } - - rit++; - } - }; - - - int count7 = 0, count8 = 0; - - // DeletableHandleGraph correctly reverses a node - { - - string seq1 = graph.get_sequence(h); - h = graph.apply_orientation(graph.flip(h)); - - // check the sequence - string rev_seq1 = graph.get_sequence(h); - check_rev_comp(seq1, rev_seq1); - - // check that the edges are what we expect - - graph.follow_edges(h, false, [&](const handle_t& next) { - count1++; - return true; - }); - graph.follow_edges(h, true, [&](const handle_t& prev) { - if (prev == graph.flip(h2)) { - found1 = true; - } - else if (prev == h3) { - found2 = true; - } - count2++; - return true; - }); - graph.follow_edges(graph.flip(h), true, [&](const handle_t& next) { - count3++; - return true; - }); - graph.follow_edges(graph.flip(h), false, [&](const handle_t& prev) { - if (prev == h2) { - found3 = true; - } - else if (prev == graph.flip(h3)) { - found4 = true; - } - count4++; - return true; - }); - graph.follow_edges(h2, true, [&](const handle_t& prev) { - if (prev == graph.flip(h)) { - found5 = true; - } - count5++; - return true; - }); - graph.follow_edges(graph.flip(h2), false, [&](const handle_t& next) { - if (next == h) { - found6 = true; - } - count6++; - return true; - }); - graph.follow_edges(graph.flip(h3), true, [&](const handle_t& prev) { - if (prev == graph.flip(h)) { - found7 = true; - } - count7++; - return true; - }); - graph.follow_edges(h3, false, [&](const handle_t& next) { - if (next == h) { - found8 = true; - } - count8++; - return true; - }); - assert(count1 == 0); - assert(count2 == 2); - assert(count3 == 0); - assert(count4 == 2); - assert(count5 == 1); - assert(count6 == 1); - assert(count7 == 1); - assert(count8 == 1); - assert(found1); - assert(found2); - assert(found3); - assert(found4); - assert(found5); - assert(found6); - assert(found7); - assert(found8); - - count1 = count2 = count3 = count4 = count5 = count6 = count7 = count8 = 0; - found1 = found2 = found3 = found4 = found5 = found6 = found7 = found8 = false; - - - // and now switch it back to the same orientation and repeat the topology checks - - h = graph.apply_orientation(graph.flip(h)); - - graph.follow_edges(h, false, [&](const handle_t& next) { - if (next == h2) { - found1 = true; - } - else if (next == graph.flip(h3)) { - found2 = true; - } - count1++; - return true; - }); - graph.follow_edges(graph.flip(h), true, [&](const handle_t& prev) { - if (prev == graph.flip(h2)) { - found3 = true; - } - else if (prev == h3) { - found4 = true; - } - count2++; - return true; - }); - graph.follow_edges(h2, true, [&](const handle_t& prev) { - if (prev == h) { - found5 = true; - } - count3++; - return true; - }); - graph.follow_edges(graph.flip(h2), false, [&](const handle_t& next) { - if (next == graph.flip(h)) { - found6 = true; - } - count4++; - return true; - }); - graph.follow_edges(graph.flip(h3), true, [&](const handle_t& prev) { - if (prev == h) { - found7 = true; - } - count5++; - return true; - }); - graph.follow_edges(h3, false, [&](const handle_t& next) { - if (next == graph.flip(h)) { - found8 = true; - } - count6++; - return true; - }); - assert(count1 == 2); - assert(count2 == 2); - assert(count3 == 1); - assert(count4 == 1); - assert(count5 == 1); - assert(count6 == 1); - assert(found1); - assert(found2); - assert(found3); - assert(found4); - assert(found5); - assert(found6); - assert(found7); - assert(found8); - - count1 = count2 = count3 = count4 = count5 = count6 = 0; - found1 = found2 = found3 = found4 = found5 = found6 = found7 = found8 = false; - - graph.follow_edges(h4, false, [&](const handle_t& next) { - if (next == graph.flip(h4)) { - found1 = true; - } - count1++; - return true; - }); - graph.follow_edges(graph.flip(h4), true, [&](const handle_t& prev) { - if (prev == h4) { - found2 = true; - } - count2++; - return true; - }); - assert(count1 == 1); - assert(count2 == 1); - assert(found1); - assert(found2); - - count1 = count2 = 0; - found1 = found2 = false; - } - - vector parts = graph.divide_handle(h, vector{1, 2}); - - int count9 = 0, count10 = 0, count11 = 0, count12 = 0; - bool found9 = false, found10 = false, found11 = false, found12 = false, found13 = false, found14 = false; - - // DeletableHandleGraph can correctly divide a node - { - - assert(parts.size() == 3); - - assert(graph.get_sequence(parts[0]) == "A"); - assert(graph.get_length(parts[0]) == 1); - assert(graph.get_sequence(parts[1]) == "T"); - assert(graph.get_length(parts[1]) == 1); - assert(graph.get_sequence(parts[2]) == "G"); - assert(graph.get_length(parts[2]) == 1); - - - graph.follow_edges(parts[0], false, [&](const handle_t& next) { - if (next == parts[1]) { - found1 = true; - } - count1++; - return true; - }); - graph.follow_edges(parts[0], true, [&](const handle_t& prev) { - count2++; - return true; - }); - graph.follow_edges(graph.flip(parts[0]), true, [&](const handle_t& prev) { - if (prev == graph.flip(parts[1])) { - found2 = true; - } - count3++; - return true; - }); - graph.follow_edges(graph.flip(parts[0]), false, [&](const handle_t& next) { - count4++; - return true; - }); - - graph.follow_edges(parts[1], false, [&](const handle_t& next) { - if (next == parts[2]) { - found3 = true; - } - count5++; - return true; - }); - graph.follow_edges(parts[1], true, [&](const handle_t& prev) { - if (prev == parts[0]) { - found4 = true; - } - count6++; - return true; - }); - graph.follow_edges(graph.flip(parts[1]), true, [&](const handle_t& prev) { - if (prev == graph.flip(parts[2])) { - found5 = true; - } - count7++; - return true; - }); - graph.follow_edges(graph.flip(parts[1]), false, [&](const handle_t& next) { - if (next == graph.flip(parts[0])) { - found6 = true; - } - count8++; - return true; - }); - - graph.follow_edges(parts[2], false, [&](const handle_t& next) { - if (next == h2) { - found7 = true; - } - else if (next == graph.flip(h3)) { - found8 = true; - } - count9++; - return true; - }); - graph.follow_edges(parts[2], true, [&](const handle_t& prev) { - if (prev == parts[1]) { - found9 = true; - } - count10++; - return true; - }); - graph.follow_edges(graph.flip(parts[2]), true, [&](const handle_t& prev) { - if (prev == graph.flip(h2)) { - found10 = true; - } - else if (prev == h3) { - found11 = true; - } - count11++; - return true; - }); - graph.follow_edges(graph.flip(parts[2]), false, [&](const handle_t& next) { - if (next == graph.flip(parts[1])) { - found12 = true; - } - count12++; - return true; - }); - graph.follow_edges(graph.flip(h3), true, [&](const handle_t& prev) { - if (prev == parts[2]) { - found13 = true; - } - return true; - }); - graph.follow_edges(h2, true, [&](const handle_t& prev) { - if (prev == parts[2]) { - found14 = true; - } - return true; - }); - - assert(count1 == 1); - assert(count2 == 0); - assert(count3 == 1); - assert(count4 == 0); - assert(count5 == 1); - assert(count6 == 1); - assert(count7 == 1); - assert(count8 == 1); - assert(count9 == 2); - assert(count10 == 1); - assert(count11 == 2); - assert(count12 == 1); - assert(found1); - assert(found2); - assert(found3); - assert(found4); - assert(found5); - assert(found6); - assert(found7); - assert(found8); - assert(found9); - assert(found10); - assert(found11); - assert(found12); - assert(found13); - assert(found14); - - count1 = count2 = count3 = count4 = count5 = count6 = count7 = count8 = count9 = count10 = count11 = count12 = 0; - found1 = found2 = found3 = found4 = found5 = found6 = found7 = found8 = found9 = found10 = found11 = found12 = false; - } - - vector rev_parts = graph.divide_handle(graph.flip(h3), vector{1}); - - // DeletableHandleGraph can correctly divide a node on the reverse strand - { - - assert(graph.get_sequence(rev_parts[0]) == "G"); - assert(graph.get_length(rev_parts[0]) == 1); - assert(graph.get_is_reverse(rev_parts[0])); - assert(graph.get_sequence(rev_parts[1]) == "TC"); - assert(graph.get_length(rev_parts[1]) == 2); - assert(graph.get_is_reverse(rev_parts[1])); - - graph.follow_edges(rev_parts[0], false, [&](const handle_t& next) { - if (next == rev_parts[1]) { - found1 = true; - } - count1++; - return true; - }); - graph.follow_edges(rev_parts[1], true, [&](const handle_t& prev) { - if (prev == rev_parts[0]) { - found2 = true; - } - count2++; - return true; - }); - graph.follow_edges(graph.flip(rev_parts[1]), false, [&](const handle_t& next) { - if (next == graph.flip(rev_parts[0])) { - found3 = true; - } - count3++; - return true; - }); - graph.follow_edges(graph.flip(rev_parts[0]), true, [&](const handle_t& prev) { - if (prev == graph.flip(rev_parts[1])) { - found4 = true; - } - count4++; - return true; - }); - graph.follow_edges(rev_parts[0], true, [&](const handle_t& prev) { - if (prev == parts[2]) { - found5 = true; - } - count5++; - return true; - }); - graph.follow_edges(rev_parts[1], false, [&](const handle_t& next) { - count6++; - return true; - }); - - assert(count1 == 1); - assert(count2 == 1); - assert(count3 == 1); - assert(count4 == 1); - assert(count5 == 1); - assert(count6 == 0); - assert(found1); - assert(found2); - assert(found3); - assert(found4); - assert(found5); - } - - auto h6 = graph.create_handle("ACGT"); - auto h7 = graph.create_handle("GCGG"); - auto h8 = graph.create_handle("TTCA"); - - graph.create_edge(h6, h7); - graph.create_edge(h7, h8); - - h7 = graph.truncate_handle(h7, true, 1); - assert(graph.get_sequence(h7) == "CGG"); - assert(graph.get_degree(h7, true) == 0); - assert(graph.get_degree(h7, false) == 1); - assert(graph.get_degree(h6, false) == 0); - assert(graph.get_degree(h8, true) == 1); - - h7 = graph.truncate_handle(h7, false, 2); - assert(graph.get_sequence(h7) == "CG"); - assert(graph.get_degree(h7, true) == 0); - assert(graph.get_degree(h7, false) == 0); - assert(graph.get_degree(h6, false) == 0); - assert(graph.get_degree(h8, true) == 0); - - h6 = graph.change_sequence(h6, "AAAT"); - h7 = graph.change_sequence(h7, "G"); - assert(graph.get_sequence(h6) == "AAAT"); - assert(graph.get_sequence(graph.flip(h6)) == "ATTT"); - assert(graph.get_sequence(h7) == "G"); - assert(graph.get_sequence(graph.flip(h7)) == "C"); - } - } + assert(yomo::Manager::count_chains() == 0); + assert(yomo::Manager::count_links() == 0); - // second batch of test involving self loops - { - vector implementations; - - PackedGraph pg; - implementations.push_back(&pg); - - HashGraph hg; - implementations.push_back(&hg); - - MappedPackedGraph mpg; - implementations.push_back(&mpg); - - for (DeletableHandleGraph* implementation : implementations) { - - DeletableHandleGraph& graph = *implementation; - - // initialize the graph - - handle_t h1 = graph.create_handle("A"); - handle_t h2 = graph.create_handle("C"); - - graph.create_edge(h1, h2); - graph.create_edge(graph.flip(h1), h2); - - // test for the right initial topology - bool found1 = false, found2 = false, found3 = false, found4 = false, found5 = false, found6 = false; - int count1 = 0, count2 = 0, count3 = 0, count4 = 0; - - graph.follow_edges(h1, false, [&](const handle_t& other) { - if (other == h2) { - found1 = true; - } - count1++; - }); - graph.follow_edges(h1, true, [&](const handle_t& other) { - if (other == graph.flip(h2)) { - found2 = true; - } - count2++; - }); - graph.follow_edges(h2, false, [&](const handle_t& other) { - count3++; - }); - graph.follow_edges(h2, true, [&](const handle_t& other) { - if (other == h1) { - found3 = true; - } - else if (other == graph.flip(h1)) { - found4 = true; - } - count4++; - }); - assert(found1); - assert(found2); - assert(found3); - assert(found4); - assert(count1 == 1); - assert(count2 == 1); - assert(count3 == 0); - assert(count4 == 2); - found1 = found2 = found3 = found4 = found5 = found6 = false; - count1 = count2 = count3 = count4 = 0; - - // flip a node and check if the orientation is correct - h1 = graph.apply_orientation(graph.flip(h1)); - - graph.follow_edges(h1, false, [&](const handle_t& other) { - if (other == h2) { - found1 = true; - } - count1++; - }); - graph.follow_edges(h1, true, [&](const handle_t& other) { - if (other == graph.flip(h2)) { - found2 = true; - } - count2++; - }); - graph.follow_edges(h2, false, [&](const handle_t& other) { - count3++; - }); - graph.follow_edges(h2, true, [&](const handle_t& other) { - if (other == h1) { - found3 = true; - } - else if (other == graph.flip(h1)) { - found4 = true; - } - count4++; - }); - assert(found1); - assert(found2); - assert(found3); - assert(found4); - assert(count1 == 1); - assert(count2 == 1); - assert(count3 == 0); - assert(count4 == 2); - found1 = found2 = found3 = found4 = found5 = found6 = false; - count1 = count2 = count3 = count4 = 0; - - // create a new edge - - graph.create_edge(h1, graph.flip(h2)); - - // check the topology - - graph.follow_edges(h1, false, [&](const handle_t& other) { - if (other == h2) { - found1 = true; - } - else if (other == graph.flip(h2)) { - found2 = true; - } - count1++; - }); - graph.follow_edges(h1, true, [&](const handle_t& other) { - if (other == graph.flip(h2)) { - found3 = true; - } - count2++; - }); - graph.follow_edges(h2, false, [&](const handle_t& other) { - if (other == graph.flip(h1)) { - found4 = true; - } - count3++; - }); - graph.follow_edges(h2, true, [&](const handle_t& other) { - if (other == h1) { - found5 = true; - } - else if (other == graph.flip(h1)) { - found6 = true; - } - count4++; - }); - assert(found1); - assert(found2); - assert(found3); - assert(found4); - assert(found5); - assert(found6); - assert(count1 == 2); - assert(count2 == 1); - assert(count3 == 1); - assert(count4 == 2); - found1 = found2 = found3 = found4 = found5 = found6 = false; - count1 = count2 = count3 = count4 = 0; - - // now another node and check to make sure that the edges are updated appropriately - - h2 = graph.apply_orientation(graph.flip(h2)); - - graph.follow_edges(h1, false, [&](const handle_t& other) { - if (other == h2) { - found1 = true; - } - else if (other == graph.flip(h2)) { - found2 = true; - } - count1++; - }); - graph.follow_edges(h1, true, [&](const handle_t& other) { - if (other == h2) { - found3 = true; - } - count2++; - }); - graph.follow_edges(h2, false, [&](const handle_t& other) { - if (other == h1) { - found4 = true; - } - else if (other == graph.flip(h1)) { - found5 = true; - } - count3++; - }); - graph.follow_edges(h2, true, [&](const handle_t& other) { - if (other == h1) { - found6 = true; - } - count4++; - }); - assert(found1); - assert(found2); - assert(found3); - assert(found4); - assert(found5); - assert(found6); - assert(count1 == 2); - assert(count2 == 1); - assert(count3 == 2); - assert(count4 == 1); - } - } + { - // another batch of tests involving divide handle and reversing - // self edges - { - vector implementations; + using T = int64_t; + using A = bdsg::yomo::Allocator; + using V = CompatVector; + // Make a thing to hold onto a test array. + bdsg::yomo::UniqueMappedPointer numbers_holder; - HashGraph hg; - implementations.push_back(&hg); + // Construct it + numbers_holder.construct("GATTACA"); - PackedGraph pg; - implementations.push_back(&pg); + // See how much memory we are using + std::tuple total_free_reclaimable = + numbers_holder.get_usage(); + // Total bytes must be no less than free bytes + assert(get<0>(total_free_reclaimable) >= get<1>(total_free_reclaimable)); + // Free bytes must be no less than reclaimable bytes + assert(get<1>(total_free_reclaimable) >= get<2>(total_free_reclaimable)); + + // Some bytes should be free in the initial chain link + assert(get<1>(total_free_reclaimable) > 0); + // But they should all be reclaimable, including the block header + assert(get<1>(total_free_reclaimable) == get<2>(total_free_reclaimable)); - MappedPackedGraph mpg; - implementations.push_back(&mpg); + { - for (DeletableHandleGraph* implementation : implementations) { - DeletableHandleGraph& graph = *implementation; + // Get a reference to it, which will be valid unless we save() or + // something + auto &vec1 = *numbers_holder; - handle_t h1 = graph.create_handle("ATGAA"); - handle_t h2 = graph.create_handle("ATGAA"); + // We should start empty + assert(vec1.size() == 0); - graph.create_edge(h1, graph.flip(h1)); - graph.create_edge(graph.flip(h2), h2); + // We should be able to preload without crashing + numbers_holder.preload(); + numbers_holder.preload(true); - auto parts1 = graph.divide_handle(h1, {2, 4}); - auto parts2 = graph.divide_handle(h2, {2, 4}); + // We should be able to expand. + vec1.resize(100); + assert(vec1.size() == 100); - assert(parts1.size() == 3); - assert(parts2.size() == 3); + // And contract + vec1.resize(10); + assert(vec1.size() == 10); - assert(graph.has_edge(parts1[0], parts1[1])); - assert(graph.has_edge(parts1[1], parts1[2])); - assert(graph.has_edge(parts1[2], graph.flip(parts1[2]))); + // And hold data + fill_to(vec1, 10, 0); + verify_to(vec1, 10, 0); - assert(graph.has_edge(parts2[0], parts2[1])); - assert(graph.has_edge(parts2[1], parts2[2])); - assert(graph.has_edge(graph.flip(parts2[0]), parts2[0])); + // And expand again + vec1.resize(100); + assert(vec1.size() == 100); - } - } + // And see the data + verify_to(vec1, 10, 0); - // another batch of tests that deal with deleting after dividing - { - vector> implementations; - - // Add implementations - - PackedGraph pg, pg2; - implementations.push_back(make_pair(&pg, &pg2)); - - HashGraph hg, hg2; - implementations.push_back(make_pair(&hg, &hg2)); - - MappedPackedGraph mpg, mpg2; - implementations.push_back(make_pair(&mpg, &mpg2)); - - // And test them - for (int imp = 0; imp < implementations.size(); ++imp) { - - for (bool backwards : {false, true}) { - - MutablePathDeletableHandleGraph* g = backwards ? implementations[imp].first : implementations[imp].second; - - assert(g->get_node_count() == 0); - - handle_t handle1 = g->create_handle("CAAATAAGGCTTGGAAATTTTCTGGAGTTCTA"); - handle_t handle2 = g->create_handle("TTATATTCCAACTCTCTG"); - path_handle_t path_handle = g->create_path_handle("x"); - g->create_edge(handle1, handle2); - - if (backwards) { - handle1 = g->flip(handle1); - handle2 = g->flip(handle2); - g->append_step(path_handle, handle2); - g->append_step(path_handle, handle1); - } else { - g->append_step(path_handle, handle1); - g->append_step(path_handle, handle2); - } - - auto parts1 = g->divide_handle(handle1, vector({2, 7, 22, 31})); - auto parts2 = g->divide_handle(handle2, vector({1, 5, 10})); - - vector steps; - g->for_each_step_in_path(path_handle, [&](step_handle_t step_handle) { - steps.push_back(g->get_handle_of_step(step_handle)); - }); - - assert(steps.size() == 9); - int i = 0; - vector to_delete; - g->append_step(g->create_path_handle(to_string(i)), steps[i]); - ++i; - to_delete.push_back(steps[i++]); - g->append_step(g->create_path_handle(to_string(i)), steps[i]); - ++i; - to_delete.push_back(steps[i++]); - to_delete.push_back(steps[i++]); - to_delete.push_back(steps[i++]); - g->append_step(g->create_path_handle(to_string(i)), steps[i]); - ++i; - to_delete.push_back(steps[i++]); - g->append_step(g->create_path_handle(to_string(i)), steps[i]); - ++i; - - g->destroy_path(path_handle); - - for (auto handle : to_delete) { - g->destroy_handle(handle); - } - - g->for_each_path_handle([&](const path_handle_t& p) { - g->for_each_step_in_path(p, [&](const step_handle_t& s) { - auto h = g->get_handle_of_step(s); - }); - }); - - assert(g->get_node_count() == 4); - assert(g->get_path_count() == 4); - } - } - } + // And expand more + vec1.resize(1000); + assert(vec1.size() == 1000); - // another batch of tests that deal with deleting down to an empty graph - { - vector implementations; - - // Add implementations - - PackedGraph pg; - implementations.push_back(&pg); - - HashGraph hg; - implementations.push_back(&hg); - - MappedPackedGraph mpg; - implementations.push_back(&mpg); - - // And test them - for (int imp = 0; imp < implementations.size(); ++imp) { - - MutablePathDeletableHandleGraph* g = implementations[imp]; - - // the graph that i discovered the bug this tests for - vector>> graph_spec{ - {1, "C", {19}}, - {2, "A", {4, 5}}, - {3, "G", {4, 5}}, - {4, "T", {6, 16, 18}}, - {5, "C", {6, 16, 18}}, - {6, "TTG", {7, 8}}, - {7, "A", {9}}, - {8, "G", {9}}, - {9, "AAATT", {16}}, - {10, "A", {12}}, - {11, "T", {12}}, - {12, "ATAT", {13, 14}}, - {13, "A", {15}}, - {14, "T", {15}}, - {15, "C", {20}}, - {16, "TTCTGG", {17, 18}}, - {17, "AGT", {18}}, - {18, "TCTAT", {10, 11}}, - {19, "AAATAAG", {2, 3}}, - {20, "CAACTCTCTG", {}}, - }; - - for (auto rec : graph_spec) { - g->create_handle(get<1>(rec), get<0>(rec)); - } - for (auto rec : graph_spec) { - for (auto n : get<2>(rec)) { - g->create_edge(g->get_handle(get<0>(rec)), g->get_handle(n)); - } - } - - // a series of deletes that elicits the behavior - vector> delete_edges{ - {g->get_handle(10, 1), g->get_handle(18, 1)}, - {g->get_handle(3, 0), g->get_handle(5, 0)}, - {g->get_handle(4, 0), g->get_handle(6, 0)}, - {g->get_handle(6, 0), g->get_handle(7, 0)}, - {g->get_handle(2, 0), g->get_handle(5, 0)}, - {g->get_handle(7, 0), g->get_handle(9, 0)}, - {g->get_handle(16, 0), g->get_handle(17, 0)}, - {g->get_handle(12, 0), g->get_handle(14, 0)}, - {g->get_handle(9, 0), g->get_handle(16, 0)}, - {g->get_handle(11, 1), g->get_handle(18, 1)}, - {g->get_handle(6, 0), g->get_handle(8, 0)}, - {g->get_handle(12, 0), g->get_handle(13, 0)}, - {g->get_handle(5, 0), g->get_handle(16, 0)}, - {g->get_handle(4, 0), g->get_handle(16, 0)}, - {g->get_handle(16, 0), g->get_handle(18, 0)}, - {g->get_handle(5, 0), g->get_handle(6, 0)}, - {g->get_handle(3, 0), g->get_handle(4, 0)}, - {g->get_handle(8, 0), g->get_handle(9, 0)}, - {g->get_handle(2, 0), g->get_handle(4, 0)} - }; - for (auto edge : delete_edges) { - g->destroy_edge(edge.first, edge.second); - } - g->destroy_handle(g->get_handle(16, 0)); - g->destroy_handle(g->get_handle(13, 0)); - g->destroy_handle(g->get_handle(15, 0)); - g->destroy_handle(g->get_handle(20, 0)); - g->destroy_handle(g->get_handle(14, 0)); - g->destroy_handle(g->get_handle(10, 0)); - g->destroy_handle(g->get_handle(12, 0)); - g->destroy_handle(g->get_handle(11, 0)); - g->destroy_handle(g->get_handle(9, 0)); - g->destroy_handle(g->get_handle(4, 0)); - g->destroy_handle(g->get_handle(7, 0)); - g->destroy_handle(g->get_handle(18, 0)); - g->destroy_handle(g->get_handle(5, 0)); - g->destroy_handle(g->get_handle(1, 0)); - g->destroy_handle(g->get_handle(8, 0)); - g->destroy_handle(g->get_handle(19, 0)); - g->destroy_handle(g->get_handle(3, 0)); - g->destroy_handle(g->get_handle(6, 0)); - g->destroy_handle(g->get_handle(17, 0)); - g->destroy_handle(g->get_handle(2, 0)); - - g->create_handle("GATTACA", 4); - assert(g->get_node_count() == 1); - } - } - - // Edge counts stay accurate after deleting nodes - { - vector implementations; - - // Add implementations - - PackedGraph pg; - implementations.push_back(&pg); - - HashGraph hg; - implementations.push_back(&hg); - - MappedPackedGraph mpg; - implementations.push_back(&mpg); - - // note: not valid in graph with reversing self edges - auto count_edges = [&](const HandleGraph& g) { - int cnt = 0; - g.for_each_handle([&](const handle_t& h) { - for (bool r : {true, false}) { - g.follow_edges(h, r, [&](const handle_t& n) { - ++cnt; - }); - } - }); - assert(cnt % 2 == 0); - return cnt / 2; - }; - - // And test them - for (int imp = 0; imp < implementations.size(); ++imp) { - - MutablePathDeletableHandleGraph* graph = implementations[imp]; - - handle_t h1 = graph->create_handle("A"); - handle_t h2 = graph->create_handle("AAA"); - handle_t h3 = graph->create_handle("CC"); - handle_t h4 = graph->create_handle("G"); - handle_t h5 = graph->create_handle("T"); - handle_t h6 = graph->create_handle("T"); - handle_t h7 = graph->create_handle("TT"); - handle_t h8 = graph->create_handle("T"); - handle_t h9 = graph->create_handle("TTT"); - handle_t h10 = graph->create_handle("C"); - handle_t h11 = graph->create_handle("CC"); - handle_t h12 = graph->create_handle("A"); - handle_t h13 = graph->create_handle("AA"); - - graph->create_edge(h1, h2); - graph->create_edge(h2, h3); - graph->create_edge(h2, h4); - graph->create_edge(h3, h4); - graph->create_edge(h3, h5); - graph->create_edge(h5, h6); - graph->create_edge(h6, h7); - graph->create_edge(h7, h8); - graph->create_edge(h8, h9); - graph->create_edge(h9, h10); - graph->create_edge(h9, h12); - graph->create_edge(h10, h11); - graph->create_edge(h11, h12); - graph->create_edge(h12, h13); - graph->create_edge(h5, h7); - graph->create_edge(h5, h11); - graph->create_edge(h7, h13); - graph->create_edge(h8, h12); - - graph->destroy_handle(h1); - assert(graph->get_edge_count() == count_edges(*graph)); - graph->destroy_handle(h6); - assert(graph->get_edge_count() == count_edges(*graph)); - graph->destroy_handle(h9); - assert(graph->get_edge_count() == count_edges(*graph)); - graph->destroy_handle(h10); - assert(graph->get_edge_count() == count_edges(*graph)); - } - } - - // batch deletion of paths works as expected - { - vector implementations; - - // Add implementations - - PackedGraph pg; - implementations.push_back(&pg); - - HashGraph hg; - implementations.push_back(&hg); - - MappedPackedGraph mpg; - implementations.push_back(&mpg); - - for (int imp = 0; imp < implementations.size(); ++imp) { - - MutablePathDeletableHandleGraph& graph = *implementations[imp]; - - auto h1 = graph.create_handle("A"); - auto h2 = graph.create_handle("A"); - auto h3 = graph.create_handle("A"); - - graph.create_edge(h1, h2); - graph.create_edge(h2, h3); - - auto p1 = graph.create_path_handle("1"); - auto p2 = graph.create_path_handle("2"); - auto p3 = graph.create_path_handle("3"); - auto p4 = graph.create_path_handle("4"); - auto p5 = graph.create_path_handle("5"); - - for (const auto& p : {p1, p2, p3, p4, p5}) { - for (auto h : {h1, h2, h3}) { - graph.append_step(p, h); - } - } - - graph.destroy_paths({p1, p3, p4}); - - set paths_seen; - set paths_expected{p2, p5}; - graph.for_each_path_handle([&](const path_handle_t& path) { - assert(!paths_seen.count(path)); - paths_seen.insert(path); - std::vector handles; - std::vector handles_expected{h1, h2, h3}; - for (auto h : graph.scan_path(path)) { - handles.push_back(h); - } - assert(handles == handles_expected); - }); - - assert(paths_seen == paths_expected); - - graph.for_each_handle([&](const handle_t& h) { - set paths; - graph.for_each_step_on_handle(h, [&](const step_handle_t& step) { - auto p = graph.get_path_handle_of_step(step); - assert(!paths.count(p)); - paths.insert(p); - }); - assert(paths_seen == paths_expected); - }); - } - } - - cerr << "DeletableHandleGraph tests successful!" << endl; -} + // And see the data + verify_to(vec1, 10, 0); -void test_mutable_path_handle_graphs() { - - vector implementations; - - PackedGraph pg; - implementations.push_back(&pg); - - HashGraph hg; - implementations.push_back(&hg); + // And hold more data + fill_to(vec1, 1000, 1); + verify_to(vec1, 1000, 1); - MappedPackedGraph mpg; - implementations.push_back(&mpg); - - for (MutablePathDeletableHandleGraph* implementation : implementations) { - - auto check_path = [&](MutablePathDeletableHandleGraph& graph, const path_handle_t& p, const vector& steps) { - assert(graph.get_step_count(p) == steps.size()); - - // Make sure steps connect back to the path - step_handle_t begin_step = graph.path_begin(p); - step_handle_t end_step = graph.path_end(p); - assert(graph.get_path_handle_of_step(begin_step) == p); - assert(graph.get_path_handle_of_step(end_step) == p); - - step_handle_t step = graph.path_begin(p); - for (int i = 0; i < steps.size(); i++) { - auto here = graph.get_handle_of_step(step); - assert(graph.get_path_handle_of_step(step) == p); - assert(graph.get_handle_of_step(step) == steps[i]); - - if (graph.get_is_circular(p)) { - assert(graph.has_next_step(step)); - assert(graph.has_previous_step(step)); - } - else { - assert(graph.has_next_step(step) == i + 1 < steps.size()); - assert(graph.has_previous_step(step) == i > 0); - } - - step = graph.get_next_step(step); - } - - if (graph.get_is_circular(p) && !graph.is_empty(p)) { - assert(step == graph.path_begin(p)); - } - else { - assert(step == graph.path_end(p)); - } - - step = graph.path_back(p); - - for (int i = steps.size() - 1; i >= 0; i--) { - - assert(graph.get_path_handle_of_step(step) == p); - assert(graph.get_handle_of_step(step) == steps[i]); - - if (graph.get_is_circular(p)) { - assert(graph.has_next_step(step)); - assert(graph.has_previous_step(step)); - } - else { - assert(graph.has_next_step(step) == i + 1 < steps.size()); - assert(graph.has_previous_step(step) == i > 0); - } - - step = graph.get_previous_step(step); - } - - if (graph.get_is_circular(p) && !graph.is_empty(p)) { - assert(step == graph.path_back(p)); - } - else { - assert(step == graph.path_front_end(p)); - } - }; + // And to preload without crashing + numbers_holder.preload(); + numbers_holder.preload(true); + } - auto check_flips = [&](MutablePathDeletableHandleGraph& graph, const path_handle_t& p, const vector& steps) { + // We're going to need a temporary file + // This filename fill be filled in with the actual filename. + char filename[] = "tmpXXXXXX"; + int tmpfd = mkstemp(filename); + assert(tmpfd != -1); - auto flipped = steps; - for (size_t i = 0; i < steps.size(); i++) { - graph.apply_orientation(graph.flip(graph.forward(flipped[i]))); - flipped[i] = graph.flip(flipped[i]); - check_path(graph, p, flipped); + numbers_holder.save(tmpfd); - graph.apply_orientation(graph.flip(graph.forward(flipped[i]))); - flipped[i] = graph.flip(flipped[i]); - check_path(graph, p, flipped); - } - }; + { + auto &vec2 = *numbers_holder; + + // We should have the same data + assert(vec2.size() == 1000); + verify_to(vec2, 1000, 1); + + // We should be able to preload without crashing + numbers_holder.preload(); + numbers_holder.preload(true); + + // We should still be able to modify it. + vec2.resize(4000); + fill_to(vec2, 4000, 2); + verify_to(vec2, 4000, 2); + + // Check memory usage + total_free_reclaimable = numbers_holder.get_usage(); + // Total bytes must be no less than free bytes + assert(get<0>(total_free_reclaimable) >= get<1>(total_free_reclaimable)); + // Free bytes must be no less than reclaimable bytes + assert(get<1>(total_free_reclaimable) >= get<2>(total_free_reclaimable)); + + // At this point we've made it bigger than ever before and required + // a new link probably, so nothing should be reclaimable. + assert(get<2>(total_free_reclaimable) == 0); + // But some space should be free because we've deallocated smaller + // vectors. + assert(get<1>(total_free_reclaimable) > 0); + + // Make it even bigger! + vec2.resize(10000); + + // And smaller again + vec2.resize(4000); + + // And reallocate smaller + vec2.shrink_to_fit(); + + // Check memory usage + total_free_reclaimable = numbers_holder.get_usage(); + // Total bytes must be no less than free bytes + assert(get<0>(total_free_reclaimable) >= get<1>(total_free_reclaimable)); + // Free bytes must be no less than reclaimable bytes + assert(get<1>(total_free_reclaimable) >= get<2>(total_free_reclaimable)); + + // At this point some memory should be reclaimable + assert(get<2>(total_free_reclaimable) > 0); + } - MutablePathDeletableHandleGraph& graph = *implementation; + numbers_holder.dissociate(); - handle_t h1 = graph.create_handle("AC"); - handle_t h2 = graph.create_handle("CAGTGA"); - handle_t h3 = graph.create_handle("GT"); + { + auto &vec3 = *numbers_holder; - graph.create_edge(h1, h2); - graph.create_edge(h2, h3); - graph.create_edge(h1, graph.flip(h2)); - graph.create_edge(graph.flip(h2), h3); + // After dissociating, we should be able to modify the vector + vec3.resize(5); + fill_to(vec3, 5, 3); + verify_to(vec3, 5, 3); + } - assert(!graph.has_path("1")); - assert(graph.get_path_count() == 0); + numbers_holder.reset(); - path_handle_t p1 = graph.create_path_handle("1"); + numbers_holder.load(tmpfd, "GATTACA"); - assert(graph.has_path("1")); - assert(graph.get_path_count() == 1); - assert(graph.get_path_handle("1") == p1); - assert(graph.get_path_name(p1) == "1"); - assert(graph.get_step_count(p1) == 0); - assert(graph.is_empty(p1)); + // Check memory usage + total_free_reclaimable = numbers_holder.get_usage(); + // Total bytes must be no less than free bytes + assert(get<0>(total_free_reclaimable) >= get<1>(total_free_reclaimable)); + // Free bytes must be no less than reclaimable bytes + assert(get<1>(total_free_reclaimable) >= get<2>(total_free_reclaimable)); - graph.append_step(p1, h1); + // No bytes should be reclaimable because we saved this through a mapping. + assert(get<2>(total_free_reclaimable) == 0); - assert(graph.get_step_count(p1) == 1); - assert(!graph.is_empty(p1)); + { + auto &vec4 = *numbers_holder; - graph.append_step(p1, h2); - graph.append_step(p1, h3); + // We should be able to preload without crashing + numbers_holder.preload(); + numbers_holder.preload(true); - assert(graph.get_step_count(p1) == 3); + // When we reload we should see the last thing we wrote before + // dissociating. + assert(vec4.size() == 4000); + verify_to(vec4, 4000, 2); + } - // graph can traverse a path - check_path(graph, p1, {h1, h2, h3}); + close(tmpfd); + unlink(filename); + } + + assert(yomo::Manager::count_chains() == 0); + assert(yomo::Manager::count_links() == 0); + + { + using T = int64_t; + using A = bdsg::yomo::Allocator; + using V1 = CompatVector; + using A2 = bdsg::yomo::Allocator; + using V2 = CompatVector; + // Make a thing to hold onto a test array of arrays. + bdsg::yomo::UniqueMappedPointer numbers_holder_holder; + + numbers_holder_holder.construct(); + + // Now do a vigorous test comparing to a normal vector + bother_vector(*numbers_holder_holder); + } + + assert(yomo::Manager::count_chains() == 0); + assert(yomo::Manager::count_links() == 0); + + { + using T = int64_t; + using A = bdsg::yomo::Allocator; + using V1 = CompatVector; + using A2 = bdsg::yomo::Allocator; + using V2 = CompatVector; + + // Just make the root object on the stack and make sure chain-based + // allocators and pointers fall back to the heap properly. + V2 numbers; + + // Now do a vigorous test comparing to a normal vector + bother_vector(numbers); + } + + assert(yomo::Manager::count_chains() == 0); + assert(yomo::Manager::count_links() == 0); + + { + // Make sure our bit-packing vector works + CompatIntVector<> vec; + vec.width(3); + + for (size_t i = 0; i < 1000; i++) { + vec.resize(i + 1); + vec.at(i) = i % 8; + if (vec.at(i) != i % 8) { + throw std::runtime_error("Expected " + std::to_string(i % 8) + " at " + + std::to_string(i) + " but got " + + std::to_string(vec.at(i))); + } + } - // graph preserves paths when reversing nodes - check_flips(graph, p1, {h1, h2, h3}); + for (size_t i = 0; i < 1000; i++) { + if (vec.at(i) != i % 8) { + throw std::runtime_error("Expected " + std::to_string(i % 8) + " at " + + std::to_string(i) + " but got " + + std::to_string(vec.at(i))); + } + } - // make a circular path - path_handle_t p2 = graph.create_path_handle("2", true); - assert(graph.get_path_count() == 2); + vec.resize(500); + for (size_t i = 0; i < 500; i++) { + if (vec.at(i) != i % 8) { + throw std::runtime_error("Expected " + std::to_string(i % 8) + " at " + + std::to_string(i) + " but got " + + std::to_string(vec.at(i))); + } + } - graph.append_step(p2, h1); - graph.append_step(p2, graph.flip(h2)); - graph.append_step(p2, h3); + vec.repack(4, 500); + for (size_t i = 0; i < 500; i++) { + if (vec.at(i) != i % 8) { + throw std::runtime_error("Expected " + std::to_string(i % 8) + " at " + + std::to_string(i) + " but got " + + std::to_string(vec.at(i))); + } + } + } - check_path(graph, p2, {h1, graph.flip(h2), h3}); + assert(yomo::Manager::count_chains() == 0); + assert(yomo::Manager::count_links() == 0); - // graph can query steps of a node on paths + { + // Make sure our bit-packing vector can self-test - bool found1 = false, found2 = false; - vector steps = graph.steps_of_handle(h1); - for (auto& step : steps) { - if (graph.get_path_handle_of_step(step) == p1 && - graph.get_handle_of_step(step) == h1) { - found1 = true; - } - else if (graph.get_path_handle_of_step(step) == p2 && - graph.get_handle_of_step(step) == h1) { - found2 = true; - } - else { - assert(false); - } - } - assert(found1); - assert(found2); - found1 = found2 = false; + // Make a vector + bdsg::yomo::UniqueMappedPointer vec; + vec.construct(); + vec->width(60); + vec->resize(1000); + fill_to(*vec, 1000, 1); + verify_to(*vec, 1000, 1); - steps = graph.steps_of_handle(h1, true); - for (auto& step : steps) { - if (graph.get_path_handle_of_step(step) == p1 && - graph.get_handle_of_step(step) == h1) { - found1 = true; - } - else if (graph.get_path_handle_of_step(step) == p2 && - graph.get_handle_of_step(step) == h1) { - found2 = true; - } - else { - assert(false); - } - } - assert(found1); - assert(found2); - found1 = found2 = false; + // We should pass heap verification + vec.check_heap_integrity(); - steps = graph.steps_of_handle(graph.flip(h1), true); - for (auto& step : steps) { - assert(false); - } + // Save it out + char filename[] = "tmpXXXXXX"; + int tmpfd = mkstemp(filename); + assert(tmpfd != -1); + vec.save(tmpfd); + vec.reset(); + + // Drop part of the file + auto file_size = lseek(tmpfd, 0, SEEK_END); + assert(ftruncate(tmpfd, file_size / 2) == 0); + + // Reload + vec.load(tmpfd, ""); + + try { + // We shouldn't pass heap verification. + vec.check_heap_integrity(); + assert(false); + } catch (std::runtime_error &e) { + // This is the exception we expect to get. + } - steps = graph.steps_of_handle(h2, true); - for (auto& step : steps) { - if (graph.get_path_handle_of_step(step) == p1 && - graph.get_handle_of_step(step) == h2) { - found1 = true; - } - else { - assert(false); - } - } - steps = graph.steps_of_handle(graph.flip(h2), true); - for (auto& step : steps) { - if (graph.get_path_handle_of_step(step) == p2 && - graph.get_handle_of_step(step) == graph.flip(h2)) { - found2 = true; - } - else { - assert(false); - } - } - assert(found1); - assert(found2); - found1 = found2 = false; + vec.reset(); - vector segments = graph.divide_handle(h2, {size_t(2), size_t(4)}); + close(tmpfd); + unlink(filename); + } - // graph preserves paths when dividing nodes + assert(yomo::Manager::count_chains() == 0); + assert(yomo::Manager::count_links() == 0); - check_path(graph, p1, {h1, segments[0], segments[1], segments[2], h3}); - check_path(graph, p2, {h1, graph.flip(segments[2]), graph.flip(segments[1]), graph.flip(segments[0]), h3}); + cerr << "Mapped Structs tests successful!" << endl; +} - path_handle_t p3 = graph.create_path_handle("3"); - graph.append_step(p3, h1); - graph.append_step(p3, segments[0]); +void test_int_vector() { - assert(graph.has_path("3")); - assert(graph.get_path_count() == 3); + // Make a thing to hold onto a test int vector. + bdsg::yomo::UniqueMappedPointer iv; + + // Have a function we can call to check its size. + auto save_and_check_size = [&](size_t expected_size) { + // Save it out, creating or clobbering + int fd = open("test.dat", O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); + iv.save(fd); + close(fd); + iv.dissociate(); + + // Make sure that the file has the correct size + struct stat file_stats; + stat("test.dat", &file_stats); + cerr << "Observed file size of " << file_stats.st_size << " bytes" << endl; + assert(file_stats.st_size == expected_size); + + // Load it again + bdsg::yomo::UniqueMappedPointer iv2; + fd = open("test.dat", O_RDWR); + iv2.load(fd, "ints"); + close(fd); + + // Make sure the re-loaded object has the correct usage. + std::tuple total_free_reclaimable = iv2.get_usage(); + size_t post_load_total_bytes = std::get<0>(total_free_reclaimable); + cerr << "Observed post-load size of " << post_load_total_bytes << " bytes" + << endl; + assert(post_load_total_bytes == expected_size); + }; + + // Construct it + iv.construct("ints"); + + // Give it a width + iv->width(20); + + // Make it big + size_t iv_size = 1024 * 1024 * 10; + for (size_t i = 1; i < iv_size; i *= 2) { + // Keep resizing it up and fragment the heap into many links. + iv->resize(i); + } + iv->resize(iv_size); + + for (size_t i = 0; i < iv_size; i++) { + // Fill it with a distinctive bit pattern + (*iv)[i] = 0xF0F0; + } + + // See how much memory we are using + std::tuple total_free_reclaimable = iv.get_usage(); + size_t required_bytes = + std::get<0>(total_free_reclaimable) - std::get<2>(total_free_reclaimable); + cerr << std::get<0>(total_free_reclaimable) << " bytes in chain, " + << std::get<1>(total_free_reclaimable) << " bytes free, " + << std::get<2>(total_free_reclaimable) << " bytes reclaimable" << endl; + cerr << iv->size() << "/" << iv->capacity() << " entries of " << iv->width() + << " bits is " << (iv->capacity() * iv->width() / 8) << " bytes" << endl; + save_and_check_size(required_bytes); + + // Shrink it back down + iv->repack(16, iv_size); + total_free_reclaimable = iv.get_usage(); + required_bytes = + std::get<0>(total_free_reclaimable) - std::get<2>(total_free_reclaimable); + cerr << std::get<0>(total_free_reclaimable) << " bytes in chain, " + << std::get<1>(total_free_reclaimable) << " bytes free, " + << std::get<2>(total_free_reclaimable) << " bytes reclaimable" << endl; + cerr << iv->size() << "/" << iv->capacity() << " entries of " << iv->width() + << " bits is " << (iv->capacity() * iv->width() / 8) << " bytes" << endl; + save_and_check_size(required_bytes); + + // Expand it even more + iv->repack(32, iv_size); + total_free_reclaimable = iv.get_usage(); + required_bytes = + std::get<0>(total_free_reclaimable) - std::get<2>(total_free_reclaimable); + cerr << std::get<0>(total_free_reclaimable) << " bytes in chain, " + << std::get<1>(total_free_reclaimable) << " bytes free, " + << std::get<2>(total_free_reclaimable) << " bytes reclaimable" << endl; + cerr << iv->size() << "/" << iv->capacity() << " entries of " << iv->width() + << " bits is " << (iv->capacity() * iv->width() / 8) << " bytes" << endl; + save_and_check_size(required_bytes); + + // And again + iv->repack(40, iv_size); + total_free_reclaimable = iv.get_usage(); + required_bytes = + std::get<0>(total_free_reclaimable) - std::get<2>(total_free_reclaimable); + cerr << std::get<0>(total_free_reclaimable) << " bytes in chain, " + << std::get<1>(total_free_reclaimable) << " bytes free, " + << std::get<2>(total_free_reclaimable) << " bytes reclaimable" << endl; + cerr << iv->size() << "/" << iv->capacity() << " entries of " << iv->width() + << " bits is " << (iv->capacity() * iv->width() / 8) << " bytes" << endl; + save_and_check_size(required_bytes); + + unlink("test.dat"); + cerr << "Int Vector tests successful!" << endl; +} - // graph can toggle circularity +void test_serializable_handle_graphs() { - graph.for_each_path_handle([&](const path_handle_t& p) { + vector> + implementations; - vector steps; + PackedGraph pg_out, pg_in; + implementations.emplace_back(&pg_out, &pg_in); - for (handle_t h : graph.scan_path(p)) { - steps.push_back(h); - } + HashGraph hg_out, hg_in; + implementations.emplace_back(&hg_out, &hg_in); - bool starting_circularity = graph.get_is_circular(p); + MappedPackedGraph mpg_in, mpg_out; + implementations.emplace_back(&mpg_in, &mpg_out); - // make every transition occur - for (bool circularity : {true, true, false, false, true}) { - graph.set_circularity(p, circularity); - assert(graph.get_is_circular(p) == circularity); - check_path(graph, p, steps); - } + for (pair + implementation : implementations) { - graph.set_circularity(p, starting_circularity); - }); + MutablePathMutableHandleGraph *build_graph = + dynamic_cast(implementation.first); + PathHandleGraph *check_graph = + dynamic_cast(implementation.second); + SerializableHandleGraph *serialize_graph = implementation.first; + SerializableHandleGraph *deserialize_graph = implementation.second; - // graph can destroy paths + handle_t h1 = build_graph->create_handle("GATT"); + handle_t h2 = build_graph->create_handle("TTGA"); + handle_t h3 = build_graph->create_handle("T"); + handle_t h4 = build_graph->create_handle("CA"); - graph.destroy_path(p3); + build_graph->create_edge(h1, h2); + build_graph->create_edge(h1, build_graph->flip(h3)); + build_graph->create_edge(h2, h3); + build_graph->create_edge(build_graph->flip(h3), h4); - assert(!graph.has_path("3")); - assert(graph.get_path_count() == 2); + path_handle_t p = build_graph->create_path_handle("path"); + build_graph->append_step(p, h1); + build_graph->append_step(p, h2); + build_graph->append_step(p, h4); - bool found3 = false; + stringstream strm; - graph.for_each_path_handle([&](const path_handle_t& p) { - if (graph.get_path_name(p) == "1") { - found1 = true; - } - else if (graph.get_path_name(p) == "2") { - found2 = true; - } - else if (graph.get_path_name(p) == "3") { - found3 = true; - } - else { - assert(false); - } - }); + serialize_graph->serialize(strm); + strm.seekg(0); + deserialize_graph->deserialize(strm); - assert(found1); - assert(found2); - assert(!found3); - - // check flips to see if membership records are still functional - check_flips(graph, p1, {h1, segments[0], segments[1], segments[2], h3}); - check_flips(graph, p2, {h1, graph.flip(segments[2]), graph.flip(segments[1]), graph.flip(segments[0]), h3}); - - graph.destroy_path(p1); - - assert(!graph.has_path("1")); - assert(graph.get_path_count() == 1); - - found1 = found2 = found3 = false; - - graph.for_each_path_handle([&](const path_handle_t& p) { - if (graph.get_path_name(p) == "1") { - found1 = true; - } - else if (graph.get_path_name(p) == "2") { - found2 = true; - } - else if (graph.get_path_name(p) == "3") { - found3 = true; - } - else { - assert(false); - } - }); + assert(build_graph->get_node_count() == check_graph->get_node_count()); + assert(build_graph->get_edge_count() == check_graph->get_edge_count()); + assert(build_graph->get_path_count() == check_graph->get_path_count()); - assert(!found1); - assert(found2); - assert(!found3); - - // check flips to see if membership records are still functional - check_flips(graph, p2, {h1, graph.flip(segments[2]), graph.flip(segments[1]), graph.flip(segments[0]), h3}); - - // make a path to rewrite - path_handle_t p4 = graph.create_path_handle("4"); - graph.prepend_step(p4, h3); - graph.prepend_step(p4, segments[2]); - graph.prepend_step(p4, segments[1]); - graph.prepend_step(p4, segments[0]); - graph.prepend_step(p4, h1); - - check_flips(graph, p4, {h1, segments[0], segments[1], segments[2], h3}); - - auto check_rewritten_segment = [&](const pair& new_segment, - const vector& steps) { - int i = 0; - for (auto step = new_segment.first; step != new_segment.second; step = graph.get_next_step(step)) { - assert(graph.get_handle_of_step(step) == steps[i]); - i++; - } - assert(i == steps.size()); - }; + for (handle_t h : {h1, h2, h3, h4}) { + assert(check_graph->has_node(build_graph->get_id(h))); + assert(check_graph->get_sequence(check_graph->get_handle( + build_graph->get_id(h))) == build_graph->get_sequence(h)); + } - // rewrite the middle portion of a path + assert(check_graph->get_step_count( + check_graph->get_path_handle(build_graph->get_path_name(p))) == + build_graph->get_step_count(p)); + } - step_handle_t s1 = graph.get_next_step(graph.path_begin(p4)); - step_handle_t s2 = graph.get_next_step(graph.get_next_step(graph.get_next_step(s1))); + cerr << "SerializableHandleGraph tests successful!" << endl; +} - auto new_segment = graph.rewrite_segment(s1, s2, {graph.flip(segments[2]), graph.flip(segments[1]), graph.flip(segments[0])}); +void test_deletable_handle_graphs() { - check_flips(graph, p4, {h1, graph.flip(segments[2]), graph.flip(segments[1]), graph.flip(segments[0]), h3}); - check_rewritten_segment(new_segment, {graph.flip(segments[2]), graph.flip(segments[1]), graph.flip(segments[0])}); + // first batch of tests + { + vector implementations; - // rewrite around the end of a circular path to delete + // Add implementations - graph.create_edge(h3, h1); - graph.create_edge(segments[2], segments[0]); - graph.set_circularity(p4, true); + PackedGraph pg; + implementations.push_back(&pg); - s1 = graph.get_previous_step(graph.path_begin(p4)); - s2 = graph.get_next_step(graph.path_begin(p4)); - assert(s2 != graph.path_end(p4)); + HashGraph hg; + implementations.push_back(&hg); - new_segment = graph.rewrite_segment(s1, s2, vector()); - // The end we get should be the same as the end we sent, since it is exclusive - assert(new_segment.second == s2); + MappedPackedGraph mpg; + implementations.push_back(&mpg); - check_flips(graph, p4, {graph.flip(segments[2]), graph.flip(segments[1]), graph.flip(segments[0])}); - check_rewritten_segment(new_segment, vector()); + // And test them - // add into an empty slot + for (DeletableHandleGraph *implementation : implementations) { - new_segment = graph.rewrite_segment(new_segment.first, new_segment.second, {graph.flip(h1), graph.flip(h3)}); + DeletableHandleGraph &graph = *implementation; - check_flips(graph, p4, {graph.flip(h1), graph.flip(h3), graph.flip(segments[2]), graph.flip(segments[1]), graph.flip(segments[0])}); - check_rewritten_segment(new_segment, {graph.flip(h1), graph.flip(h3)}); + assert(graph.get_node_count() == 0); - } - - { - vector> implementations; - - // Add implementations - - HashGraph hg, hg2; - implementations.push_back(make_pair(&hg, &hg2)); - - PackedGraph pg, pg2; - implementations.push_back(make_pair(&pg, &pg2)); - - MappedPackedGraph mpg, mpg2; - implementations.push_back(make_pair(&mpg, &mpg2)); - - // And test them - for (int imp = 0; imp < implementations.size(); ++imp) { - for (bool backwards : {false, true}) { - - MutablePathMutableHandleGraph* g = backwards ? implementations[imp].first : implementations[imp].second; - - assert(g->get_node_count() == 0); - - handle_t handle = g->create_handle("TTATATTCCAACTCTCTG"); - if (backwards) { - handle = g->flip(handle); - } - path_handle_t path_handle = g->create_path_handle("Path"); - g->append_step(path_handle, handle); - string seq = g->get_sequence(handle); - vector true_parts = { seq.substr(0, 1), seq.substr(1, 4), seq.substr(5, 5), seq.substr(10) }; - - // Should get (C,AGAG,AGTTG,GAATATAA) (forward) - // Should get (T,TATA,TTCCA,ACTCTCTG) (reverse) - auto parts = g->divide_handle(handle, {1, 5, 10}); - assert(parts.size() == true_parts.size()); - for (int i = 0; i < parts.size(); ++i) { - assert(g->get_sequence(parts[i]) == true_parts[i]); - assert(g->get_is_reverse(parts[i]) == backwards); - } - - vector steps; - g->for_each_step_in_path(path_handle, [&](step_handle_t step_handle) { - steps.push_back(g->get_handle_of_step(step_handle)); - }); - assert(steps.size() == true_parts.size()); - for (int i = 0; i < parts.size(); ++i) { - assert(g->get_sequence(steps[i]) == true_parts[i]); - assert(g->get_is_reverse(steps[i]) == backwards); - } - } - } + handle_t h = graph.create_handle("ATG", 2); - } - - cerr << "MutablePathDeletableHandleGraph tests successful!" << endl; -} + // DeletableHandleGraph has correct structure after creating a node + { + assert(graph.get_sequence(h) == "ATG"); + assert(graph.get_sequence(graph.flip(h)) == "CAT"); + assert(graph.get_base(h, 1) == 'T'); + assert(graph.get_base(graph.flip(h), 2) == 'T'); + assert(graph.get_subsequence(h, 1, 3) == "TG"); + assert(graph.get_subsequence(graph.flip(h), 0, 2) == "CA"); + assert(graph.get_length(h) == 3); + assert(graph.has_node(graph.get_id(h))); + assert(!graph.has_node(graph.get_id(h) + 1)); -template -void test_packed_vector() { - enum vec_op_t {SET = 0, GET = 1, APPEND = 2, POP = 3, SERIALIZE = 4}; - - random_device rd; - default_random_engine prng(rd()); - uniform_int_distribution op_distr(0, 4); - - int num_runs = 1000; - int num_ops = 200; - int gets_per_op = 5; - int sets_per_op = 5; - int appends_per_op = 3; - int pops_per_op = 1; - - for (size_t i = 0; i < num_runs; i++) { - - uint64_t next_val = 0; - - vector std_vec; - PackedVectorImpl dyn_vec; - - for (size_t j = 0; j < num_ops; j++) { - - vec_op_t op = (vec_op_t) op_distr(prng); - switch (op) { - case SET: - if (!std_vec.empty()) { - for (size_t k = 0; k < sets_per_op; k++) { - size_t idx = prng() % dyn_vec.size(); - std_vec[idx] = next_val; - dyn_vec.set(idx, next_val); - next_val++; - } - } - - break; - - case GET: - if (!std_vec.empty()) { - for (size_t k = 0; k < gets_per_op; k++) { - size_t idx = prng() % dyn_vec.size(); - assert(std_vec[idx] == dyn_vec.get(idx)); - next_val++; - } - } - - break; - - case APPEND: - for (size_t k = 0; k < appends_per_op; k++) { - std_vec.push_back(next_val); - dyn_vec.push_back(next_val); - next_val++; - } - - break; - - case POP: - if (!std_vec.empty()) { - for (size_t k = 0; k < pops_per_op; k++) { - std_vec.pop_back(); - dyn_vec.pop_back(); - } - } - - break; - - case SERIALIZE: - { - stringstream strm; - - dyn_vec.serialize(strm); - strm.seekg(0); - PackedVectorImpl copy_vec(strm); - - assert(copy_vec.size() == dyn_vec.size()); - for (size_t i = 0; i < copy_vec.size(); i++) { - assert(copy_vec.get(i) == dyn_vec.get(i)); - } - break; - } - - default: - break; - } - - assert(std_vec.empty() == dyn_vec.empty()); - assert(std_vec.size() == dyn_vec.size()); - } - } - cerr << "PackedVector (" << typeid(PackedVectorImpl).name() << ") tests successful!" << endl; -} + assert(graph.get_handle(graph.get_id(h)) == h); + assert(!graph.get_is_reverse(h)); + assert(graph.get_is_reverse(graph.flip(h))); -/** - * Generic iterator test function that works with any vector-like container - * (PackedVector, PagedVector, RobustPagedVector, PackedDeque). - * - * Tests ForwardIterator, BidirectionalIterator, RandomAccessIterator, and - * iterator order comparison, but not OutputIterator. - */ -template -void test_iterators() { - // ForwardIterator tests + assert(graph.get_node_count() == 1); + assert(graph.min_node_id() == graph.get_id(h)); + assert(graph.max_node_id() == graph.get_id(h)); + assert(graph.get_total_length() == 3); + assert(graph.get_edge_count() == 0); - // Empty iteration - { - VectorLike vec; - assert(vec.begin() == vec.end()); + graph.follow_edges(h, true, [](const handle_t &prev) { + assert(false); + return true; + }); + graph.follow_edges(h, false, [](const handle_t &next) { + assert(false); + return true; + }); + } + + handle_t h2 = graph.create_handle("CT", 1); + + // DeletableHandleGraph has correct structure after creating a node at the + // beginning of ID space + { + + assert(graph.get_sequence(h2) == "CT"); + assert(graph.get_sequence(graph.flip(h2)) == "AG"); + assert(graph.get_base(h2, 1) == 'T'); + assert(graph.get_base(graph.flip(h2), 0) == 'A'); + assert(graph.get_subsequence(h2, 1, 10) == "T"); + assert(graph.get_subsequence(graph.flip(h2), 0, 2) == "AG"); + assert(graph.get_length(h2) == 2); + assert(graph.has_node(graph.get_id(h2))); + assert(!graph.has_node(max(graph.get_id(h), graph.get_id(h2)) + 1)); + + assert(graph.get_handle(graph.get_id(h2)) == h2); + + assert(graph.get_node_count() == 2); + assert(graph.min_node_id() == graph.get_id(h2)); + assert(graph.max_node_id() == graph.get_id(h)); + assert(graph.get_total_length() == 5); + assert(graph.get_edge_count() == 0); + + graph.follow_edges(h2, true, [](const handle_t &prev) { + assert(false); + return true; + }); + graph.follow_edges(h2, false, [](const handle_t &next) { + assert(false); + return true; + }); + } + + // creating and accessing a node at the end of ID space + + handle_t h3 = graph.create_handle("GAC", 4); + + // DeletableHandleGraph has correct structure after creating a node at the + // end of ID space + { + assert(graph.get_sequence(h3) == "GAC"); + assert(graph.get_sequence(graph.flip(h3)) == "GTC"); + assert(graph.get_base(h3, 1) == 'A'); + assert(graph.get_base(graph.flip(h3), 0) == 'G'); + assert(graph.get_subsequence(h3, 1, 1) == "A"); + assert(graph.get_subsequence(graph.flip(h3), 0, 5) == "GTC"); + assert(graph.get_length(h3) == 3); + + assert(graph.get_handle(graph.get_id(h3)) == h3); + + assert(graph.get_node_count() == 3); + assert(graph.min_node_id() == graph.get_id(h2)); + assert(graph.max_node_id() == graph.get_id(h3)); + assert(graph.get_total_length() == 8); + assert(graph.get_edge_count() == 0); + + graph.follow_edges(h3, true, [](const handle_t &prev) { + assert(false); + return true; + }); + graph.follow_edges(h3, false, [](const handle_t &next) { + assert(false); + return true; + }); + } - size_t count = 0; - for (auto it = vec.begin(); it != vec.end(); ++it) { - count++; - } - assert(count == 0); - } + // creating and accessing in the middle of ID space - // Single element - { - VectorLike vec; - vec.push_back(42); + handle_t h4 = graph.create_handle("T", 3); - assert(vec.begin() != vec.end()); + // DeletableHandleGraph has correct structure after creating a node in the + // middle of ID space + { + assert(graph.get_sequence(h4) == "T"); + assert(graph.get_sequence(graph.flip(h4)) == "A"); + assert(graph.get_length(h4) == 1); - auto it = vec.begin(); - assert(*it == 42); - ++it; - assert(it == vec.end()); - } + assert(graph.get_handle(graph.get_id(h4)) == h4); - // Multiple elements - basic iteration - { - VectorLike vec; - vector expected = {10, 20, 30, 40, 50}; + assert(graph.get_node_count() == 4); + assert(graph.min_node_id() == graph.get_id(h2)); + assert(graph.max_node_id() == graph.get_id(h3)); + assert(graph.get_total_length() == 9); + assert(graph.get_edge_count() == 0); - for (auto val : expected) { - vec.push_back(val); - } + graph.follow_edges(h4, true, [](const handle_t &prev) { + assert(false); + return true; + }); + graph.follow_edges(h4, false, [](const handle_t &next) { + assert(false); + return true; + }); + } - // Iterate and compare - size_t idx = 0; - for (auto it = vec.begin(); it != vec.end(); ++it) { - assert(idx < expected.size()); - assert(*it == expected[idx]); - idx++; - } - assert(idx == expected.size()); - } + graph.create_edge(h, h2); - // Range-based for loop - { - VectorLike vec; - vector expected = {100, 200, 300, 400, 500, 600, 700, 800}; + bool found1 = false, found2 = false, found3 = false, found4 = false; + int count1 = 0, count2 = 0, count3 = 0, count4 = 0; - for (auto val : expected) { - vec.push_back(val); - } + // DeletableHandleGraph has correct structure after creating an edge + { + assert(graph.get_edge_count() == 1); - size_t idx = 0; - for (auto val : vec) { - assert(idx < expected.size()); - assert(val == expected[idx]); - idx++; - } - assert(idx == expected.size()); - } + graph.follow_edges(h, false, [&](const handle_t &next) { + if (next == h2) { + found1 = true; + } + count1++; + return true; + }); + graph.follow_edges(h2, true, [&](const handle_t &prev) { + if (prev == h) { + found2 = true; + } + count2++; + return true; + }); + graph.follow_edges(graph.flip(h), true, [&](const handle_t &prev) { + if (prev == graph.flip(h2)) { + found3 = true; + } + count3++; + return true; + }); + graph.follow_edges(graph.flip(h2), false, [&](const handle_t &next) { + if (next == graph.flip(h)) { + found4 = true; + } + count4++; + return true; + }); + assert(count1 == 1); + assert(count2 == 1); + assert(count3 == 1); + assert(count4 == 1); + assert(found1); + assert(found2); + assert(found3); + assert(found4); - // Iterator equality and inequality - { - VectorLike vec; - vec.push_back(1); - vec.push_back(2); - vec.push_back(3); + count1 = count2 = count3 = count4 = 0; + found1 = found2 = found3 = found4 = false; + } + + graph.create_edge(h, graph.flip(h3)); + + bool found5 = false, found6 = false, found7 = false, found8 = false; + int count5 = 0, count6 = 0; + + // DeletableHandleGraph has correct structure after creating an edge with + // a traversal + { + assert(graph.get_edge_count() == 2); + + graph.follow_edges(h, false, [&](const handle_t &next) { + if (next == h2) { + found1 = true; + } else if (next == graph.flip(h3)) { + found2 = true; + } + count1++; + return true; + }); + graph.follow_edges(graph.flip(h), true, [&](const handle_t &prev) { + if (prev == graph.flip(h2)) { + found3 = true; + } else if (prev == h3) { + found4 = true; + } + count2++; + return true; + }); + graph.follow_edges(h2, true, [&](const handle_t &prev) { + if (prev == h) { + found5 = true; + } + count3++; + return true; + }); + graph.follow_edges(graph.flip(h2), false, [&](const handle_t &next) { + if (next == graph.flip(h)) { + found6 = true; + } + count4++; + return true; + }); + graph.follow_edges(graph.flip(h3), true, [&](const handle_t &prev) { + if (prev == h) { + found7 = true; + } + count5++; + return true; + }); + graph.follow_edges(h3, false, [&](const handle_t &next) { + if (next == graph.flip(h)) { + found8 = true; + } + count6++; + return true; + }); + assert(count1 == 2); + assert(count2 == 2); + assert(count3 == 1); + assert(count4 == 1); + assert(count5 == 1); + assert(count6 == 1); + assert(found1); + assert(found2); + assert(found3); + assert(found4); + assert(found5); + assert(found6); + assert(found7); + assert(found8); + + count1 = count2 = count3 = count4 = count5 = count6 = 0; + found1 = found2 = found3 = found4 = found5 = found6 = found7 = found8 = + false; + } + + graph.create_edge(h4, graph.flip(h4)); + + // DeletableHandleGraph has correct structure after creating a reversing + // self-loop + { + assert(graph.get_edge_count() == 3); + + graph.follow_edges(h4, false, [&](const handle_t &next) { + if (next == graph.flip(h4)) { + found1 = true; + } + count1++; + return true; + }); + graph.follow_edges(graph.flip(h4), true, [&](const handle_t &prev) { + if (prev == h4) { + found2 = true; + } + count2++; + return true; + }); + assert(count1 == 1); + assert(count2 == 1); + assert(found1); + assert(found2); - auto it1 = vec.begin(); - auto it2 = vec.begin(); - assert(it1 == it2); + count1 = count2 = 0; + found1 = found2 = false; + } - ++it2; - assert(it1 != it2); + graph.create_edge(h, graph.flip(h4)); + graph.create_edge(graph.flip(h3), h4); - ++it1; - assert(it1 == it2); - } + assert(graph.get_edge_count() == 5); - // std::distance compatibility - { - VectorLike vec; - for (size_t i = 0; i < 15; i++) { - vec.push_back(i); - } + graph.destroy_edge(h, graph.flip(h4)); + graph.destroy_edge(graph.flip(h3), h4); - auto dist = std::distance(vec.begin(), vec.end()); - assert((size_t)dist == vec.size()); - assert((size_t)dist == 15); - } + assert(graph.get_edge_count() == 3); - // std::find compatibility - { - VectorLike vec; - vec.push_back(10); - vec.push_back(20); - vec.push_back(30); - vec.push_back(40); - vec.push_back(50); - - auto it = std::find(vec.begin(), vec.end(), 30); - assert(it != vec.end()); - assert(*it == 30); - - auto it2 = std::find(vec.begin(), vec.end(), 999); - assert(it2 == vec.end()); - } + // DeletableHandleGraph has correct structure after creating and deleting + // edges + { + graph.follow_edges(h, false, [&](const handle_t &next) { + if (next == h2) { + found1 = true; + } else if (next == graph.flip(h3)) { + found2 = true; + } + count1++; + return true; + }); + graph.follow_edges(graph.flip(h), true, [&](const handle_t &prev) { + if (prev == graph.flip(h2)) { + found3 = true; + } else if (prev == h3) { + found4 = true; + } + count2++; + return true; + }); + graph.follow_edges(h2, true, [&](const handle_t &prev) { + if (prev == h) { + found5 = true; + } + count3++; + return true; + }); + graph.follow_edges(graph.flip(h2), false, [&](const handle_t &next) { + if (next == graph.flip(h)) { + found6 = true; + } + count4++; + return true; + }); + graph.follow_edges(graph.flip(h3), true, [&](const handle_t &prev) { + if (prev == h) { + found7 = true; + } + count5++; + return true; + }); + graph.follow_edges(h3, false, [&](const handle_t &next) { + if (next == graph.flip(h)) { + found8 = true; + } + count6++; + return true; + }); + assert(count1 == 2); + assert(count2 == 2); + assert(count3 == 1); + assert(count4 == 1); + assert(count5 == 1); + assert(count6 == 1); + assert(found1); + assert(found2); + assert(found3); + assert(found4); + assert(found5); + assert(found6); + assert(found7); + assert(found8); + + count1 = count2 = count3 = count4 = count5 = count6 = 0; + found1 = found2 = found3 = found4 = found5 = found6 = found7 = found8 = + false; + + graph.follow_edges(h4, false, [&](const handle_t &next) { + if (next == graph.flip(h4)) { + found1 = true; + } + count1++; + return true; + }); + graph.follow_edges(graph.flip(h4), true, [&](const handle_t &prev) { + if (prev == h4) { + found2 = true; + } + count2++; + return true; + }); + assert(count1 == 1); + assert(count2 == 1); + assert(found1); + assert(found2); - // Const iterator - { - VectorLike vec; - vec.push_back(5); - vec.push_back(15); - vec.push_back(25); + count1 = count2 = 0; + found1 = found2 = false; + } + + handle_t h5 = graph.create_handle("GGACC"); + + // make some edges to ensure that deleting is difficult + graph.create_edge(h, h5); + graph.create_edge(h5, h); + graph.create_edge(graph.flip(h5), h2); + graph.create_edge(h3, graph.flip(h5)); + graph.create_edge(h3, h5); + graph.create_edge(h5, h4); + + graph.destroy_handle(h5); + + // DeletableHandleGraph has correct structure after creating and deleting + // a node + { + + graph.follow_edges(h, false, [&](const handle_t &next) { + if (next == h2) { + found1 = true; + } else if (next == graph.flip(h3)) { + found2 = true; + } + count1++; + return true; + }); + graph.follow_edges(graph.flip(h), true, [&](const handle_t &prev) { + if (prev == graph.flip(h2)) { + found3 = true; + } else if (prev == h3) { + found4 = true; + } + count2++; + return true; + }); + graph.follow_edges(h2, true, [&](const handle_t &prev) { + if (prev == h) { + found5 = true; + } + count3++; + return true; + }); + graph.follow_edges(graph.flip(h2), false, [&](const handle_t &next) { + if (next == graph.flip(h)) { + found6 = true; + } + count4++; + return true; + }); + graph.follow_edges(graph.flip(h3), true, [&](const handle_t &prev) { + if (prev == h) { + found7 = true; + } + count5++; + return true; + }); + graph.follow_edges(h3, false, [&](const handle_t &next) { + if (next == graph.flip(h)) { + found8 = true; + } + count6++; + return true; + }); + assert(count1 == 2); + assert(count2 == 2); + assert(count3 == 1); + assert(count4 == 1); + assert(count5 == 1); + assert(count6 == 1); + assert(found1); + assert(found2); + assert(found3); + assert(found4); + assert(found5); + assert(found6); + assert(found7); + assert(found8); + + count1 = count2 = count3 = count4 = count5 = count6 = 0; + found1 = found2 = found3 = found4 = found5 = found6 = found7 = found8 = + false; + + graph.follow_edges(h4, false, [&](const handle_t &next) { + if (next == graph.flip(h4)) { + found1 = true; + } + count1++; + return true; + }); + graph.follow_edges(graph.flip(h4), true, [&](const handle_t &prev) { + if (prev == h4) { + found2 = true; + } + count2++; + return true; + }); + assert(count1 == 1); + assert(count2 == 1); + assert(found1); + assert(found2); - const VectorLike& const_vec = vec; + count1 = count2 = 0; + found1 = found2 = false; + } + + // DeletableHandleGraph has correct structure after swapping nodes + { + + graph.follow_edges(h, false, [&](const handle_t &next) { + if (next == h2) { + found1 = true; + } else if (next == graph.flip(h3)) { + found2 = true; + } + count1++; + return true; + }); + graph.follow_edges(graph.flip(h), true, [&](const handle_t &prev) { + if (prev == graph.flip(h2)) { + found3 = true; + } else if (prev == h3) { + found4 = true; + } + count2++; + return true; + }); + graph.follow_edges(h2, true, [&](const handle_t &prev) { + if (prev == h) { + found5 = true; + } + count3++; + return true; + }); + graph.follow_edges(graph.flip(h2), false, [&](const handle_t &next) { + if (next == graph.flip(h)) { + found6 = true; + } + count4++; + return true; + }); + graph.follow_edges(graph.flip(h3), true, [&](const handle_t &prev) { + if (prev == h) { + found7 = true; + } + count5++; + return true; + }); + graph.follow_edges(h3, false, [&](const handle_t &next) { + if (next == graph.flip(h)) { + found8 = true; + } + count6++; + return true; + }); + assert(count1 == 2); + assert(count2 == 2); + assert(count3 == 1); + assert(count4 == 1); + assert(count5 == 1); + assert(count6 == 1); + assert(found1); + assert(found2); + assert(found3); + assert(found4); + assert(found5); + assert(found6); + assert(found7); + assert(found8); + + count1 = count2 = count3 = count4 = count5 = count6 = 0; + found1 = found2 = found3 = found4 = found5 = found6 = found7 = found8 = + false; + + graph.follow_edges(h4, false, [&](const handle_t &next) { + if (next == graph.flip(h4)) { + found1 = true; + } + count1++; + return true; + }); + graph.follow_edges(graph.flip(h4), true, [&](const handle_t &prev) { + if (prev == h4) { + found2 = true; + } + count2++; + return true; + }); + assert(count1 == 1); + assert(count2 == 1); + assert(found1); + assert(found2); - size_t count = 0; - for (auto it = const_vec.begin(); it != const_vec.end(); ++it) { - count++; - } - assert(count == 3); - - auto it = const_vec.begin(); - assert(*it == 5); - ++it; - assert(*it == 15); - ++it; - assert(*it == 25); - } + count1 = count2 = 0; + found1 = found2 = false; + } + + // DeletableHandleGraph visits all nodes with for_each_handle + { + graph.for_each_handle([&](const handle_t &handle) { + if (handle == h) { + found1 = true; + } else if (handle == h2) { + found2 = true; + } else if (handle == h3) { + found3 = true; + } else if (handle == h4) { + found4 = true; + } else { + assert(false); + } + return true; + }); - // Large container with various patterns - { - VectorLike vec; - random_device rd; - default_random_engine prng(rd()); - uniform_int_distribution val_distr(0, 10000); - - vector expected; - size_t num_elements = 200; - - for (size_t i = 0; i < num_elements; i++) { - uint64_t val = val_distr(prng); - expected.push_back(val); - vec.push_back(val); - } + assert(found1); + assert(found2); + assert(found3); + assert(found4); - size_t idx = 0; - for (auto val : vec) { - assert(val == expected[idx]); - idx++; - } - assert(idx == expected.size()); - } + found1 = found2 = found3 = found4 = false; + } + + // to make sure the sequence reverse complemented correctly + int i = 0; + auto check_rev_comp = [&](const std::string &seq1, + const std::string &seq2) { + i++; + assert(seq1.size() == seq2.size()); + auto it = seq1.begin(); + auto rit = seq2.rbegin(); + for (; it != seq1.end(); it++) { + if (*it == 'A') { + assert(*rit == 'T'); + } else if (*it == 'C') { + assert(*rit == 'G'); + } else if (*it == 'G') { + assert(*rit == 'C'); + } else if (*it == 'T') { + assert(*rit == 'A'); + } else if (*it == 'N') { + assert(*rit == 'N'); + } else { + assert(false); + } - // Iteration after modification - { - VectorLike vec; - vec.push_back(1); - vec.push_back(2); - vec.push_back(3); - - // First iteration - size_t count = 0; - for (auto it = vec.begin(); it != vec.end(); ++it) { - count++; + rit++; } - assert(count == 3); - - // Modify - vec.push_back(4); - vec.set(0, 100); - - // Second iteration - vector expected = {100, 2, 3, 4}; - size_t idx = 0; - for (auto val : vec) { - assert(val == expected[idx]); - idx++; - } - assert(idx == 4); - } + }; - // Iterator copy construction - { - VectorLike vec; - vec.push_back(10); - vec.push_back(20); - - auto it1 = vec.begin(); - auto it2(it1); // Copy constructor - - assert(it1 == it2); - assert(*it1 == *it2); - assert(*it1 == 10); - } - - // Iterator assignment - { - VectorLike vec; - vec.push_back(10); - vec.push_back(20); - vec.push_back(30); - - auto it1 = vec.begin(); - auto it2 = vec.begin(); - ++it2; - - assert(*it1 == 10); - assert(*it2 == 20); - - it1 = it2; // Assignment - assert(it1 == it2); - assert(*it1 == 20); - } - - // BidirectionalIterator tests. - { - VectorLike vec; - vec.push_back(10); - vec.push_back(20); - vec.push_back(30); - - auto it1 = vec.begin(); - auto it2 = it1; - ++it2; - auto also_decremented = --it2; - - assert(it2 == it1); - assert(also_decremented == it1); - - it2++; - auto not_decremented = it2--; - - assert(it2 == it1); - assert(not_decremented != it1); - assert(*not_decremented == 20); - - auto it3 = vec.end(); - it3--; - assert(it3 != vec.end()); - assert(*it3 == 30); - } - - // RandomAccessIterator tests - { - VectorLike vec; - vec.push_back(10); - vec.push_back(20); - vec.push_back(30); - - auto it1 = vec.begin(); - auto it2 = it1; - - it1 += 1; - assert(*it1 == 20); - - it1 += 2; - assert(it1 == vec.end()); - - it1 -= 1; - auto it3 = it2 + 2; - assert(it1 == it3); - assert(*it1 == 30); - assert(it2 == vec.begin()); - - auto it4 = it1 - 2; - assert(*it4 == 10); - - assert(*it1 == vec.begin()[2]); - assert(*it4 == vec.begin()[0]); - assert(it4[2] == *it1); - assert(it1[-2] == *it4); - - assert(it1 + -2 == it4); - assert(it4 - -2 == it1); - - it1 += -2; - assert(it1 == it4); - - it1 -= -1; - it4++; - assert(it1 == it4); - } + int count7 = 0, count8 = 0; - // Iterator comparison tests - { - VectorLike vec; - vec.push_back(10); - vec.push_back(20); - vec.push_back(30); - - auto it1 = vec.begin(); - auto it2 = it1; - - assert(it1 >= it2); - assert(it1 <= it2); - assert(!(it1 < it2)); - assert(!(it1 > it2)); - it1++; - - assert(it1 >= it2); - assert(!(it1 <= it2)); - assert(!(it2 >= it1)); - assert(it2 <= it1); - assert(!(it1 < it2)); - assert(it1 > it2); - assert(it2 < it1); - assert(!(it2 > it1)); - } - - // Iterator distance tests - { - VectorLike vec; - vec.push_back(10); - vec.push_back(20); - vec.push_back(30); - - assert(vec.end() - vec.begin() == vec.size()); - - auto it1 = vec.begin(); - auto it2 = it1; - - it1 += 1; - it2 += 2; - - assert(it2 - it1 == 1); - assert(it1 - it2 == -1); - - it1--; - assert(it2 - it1 == 2); - assert(it1 - it2 == -2); - } - - cerr << "Iterator (" << typeid(typename VectorLike::iterator).name() << ") tests successful!" << endl; -} + // DeletableHandleGraph correctly reverses a node + { -template -void test_paged_vector() { - enum vec_op_t {SET = 0, GET = 1, APPEND = 2, POP = 3, SERIALIZE = 4}; - std::random_device rd; - std::default_random_engine prng(rd()); - std::uniform_int_distribution op_distr(0, 4); - std::uniform_int_distribution val_distr(0, 100); - - int num_runs = 200; - int num_ops = 200; - int gets_per_op = 5; - int sets_per_op = 5; - int appends_per_op = 3; - int pops_per_op = 1; - - for (size_t i = 0; i < num_runs; i++) { - - uint64_t next_val = val_distr(prng); - - std::vector std_vec; - PagedVectorImpl dyn_vec; - - for (size_t j = 0; j < num_ops; j++) { - - vec_op_t op = (vec_op_t) op_distr(prng); - switch (op) { - case SET: - if (!std_vec.empty()) { - for (size_t k = 0; k < sets_per_op; k++) { - size_t idx = prng() % dyn_vec.size(); - std_vec[idx] = next_val; - dyn_vec.set(idx, next_val); - next_val = val_distr(prng); - } - } - - break; - - case GET: - if (!std_vec.empty()) { - for (size_t k = 0; k < gets_per_op; k++) { - size_t idx = prng() % dyn_vec.size(); - assert(std_vec[idx] == dyn_vec.get(idx)); - next_val = val_distr(prng); - } - } - - break; - - case APPEND: - for (size_t k = 0; k < appends_per_op; k++) { - std_vec.push_back(next_val); - dyn_vec.push_back(next_val); - next_val = val_distr(prng); - } - - break; - - case POP: - if (!std_vec.empty()) { - for (size_t k = 0; k < pops_per_op; k++) { - std_vec.pop_back(); - dyn_vec.pop_back(); - } - } - - break; - - case SERIALIZE: - { - stringstream strm; - - dyn_vec.serialize(strm); - strm.seekg(0); - PagedVectorImpl copy_vec(strm); - - assert(copy_vec.size() == dyn_vec.size()); - for (size_t i = 0; i < copy_vec.size(); i++) { - assert(copy_vec.get(i) == dyn_vec.get(i)); - } - break; - } - - default: - break; - } - - assert(std_vec.empty() == dyn_vec.empty()); - assert(std_vec.size() == dyn_vec.size()); - } - } - cerr << "PagedVector (" << typeid(PagedVectorImpl).name() << ") tests successful!" << endl; -} + string seq1 = graph.get_sequence(h); + h = graph.apply_orientation(graph.flip(h)); -void test_packed_deque() { - enum deque_op_t {SET = 0, GET = 1, APPEND_LEFT = 2, POP_LEFT = 3, APPEND_RIGHT = 4, POP_RIGHT = 5, SERIALIZE = 6}; - std::random_device rd; - std::default_random_engine prng(rd()); - std::uniform_int_distribution op_distr(0, 6); - - int num_runs = 1000; - int num_ops = 200; - int gets_per_op = 5; - int sets_per_op = 5; - int appends_per_op = 3; - int pops_per_op = 1; - - for (size_t i = 0; i < num_runs; i++) { - - uint64_t next_val = 0; - - std::deque std_deq; - PackedDeque<> suc_deq; - - for (size_t j = 0; j < num_ops; j++) { - - deque_op_t op = (deque_op_t) op_distr(prng); - switch (op) { - case SET: - if (!std_deq.empty()) { - for (size_t k = 0; k < sets_per_op; k++) { - size_t idx = prng() % std_deq.size(); - std_deq[idx] = next_val; - suc_deq.set(idx, next_val); - next_val++; - } - } - - break; - - case GET: - if (!std_deq.empty()) { - for (size_t k = 0; k < gets_per_op; k++) { - size_t idx = prng() % std_deq.size(); - assert(std_deq[idx] == suc_deq.get(idx)); - next_val++; - } - } - - break; - - case APPEND_LEFT: - for (size_t k = 0; k < appends_per_op; k++) { - std_deq.push_front(next_val); - suc_deq.push_front(next_val); - next_val++; - } - - break; - - case POP_LEFT: - for (size_t k = 0; k < pops_per_op && !std_deq.empty(); k++) { - std_deq.pop_front(); - suc_deq.pop_front(); - } - - break; - - case APPEND_RIGHT: - for (size_t k = 0; k < appends_per_op; k++) { - std_deq.push_back(next_val); - suc_deq.push_back(next_val); - next_val++; - } - - break; - - case POP_RIGHT: - for (size_t k = 0; k < pops_per_op && !std_deq.empty(); k++) { - std_deq.pop_back(); - suc_deq.pop_back(); - } - - break; - - case SERIALIZE: - { - stringstream strm; - - suc_deq.serialize(strm); - strm.seekg(0); - PackedDeque<> copy_deq(strm); - - assert(copy_deq.size() == suc_deq.size()); - for (size_t i = 0; i < copy_deq.size(); i++) { - assert(copy_deq.get(i) == suc_deq.get(i)); - } - break; - } - - default: - break; - } - - assert(std_deq.empty() == suc_deq.empty()); - assert(std_deq.size() == suc_deq.size()); - } - } - cerr << "PackedDeque tests successful!" << endl; -} + // check the sequence + string rev_seq1 = graph.get_sequence(h); + check_rev_comp(seq1, rev_seq1); -void test_packed_set() { - enum set_op_t {INSERT = 0, REMOVE = 1, FIND = 2}; - - random_device rd; - default_random_engine prng(rd()); - uniform_int_distribution op_distr(0, 2); - - int num_runs = 1000; - int num_ops = 200; - int inserts_per_op = 2; - int prev_inserts_per_op = 1; - int removes_per_op = 1; - int finds_per_op = 5; - - for (size_t i = 0; i < num_runs; i++) { - uint64_t next_val = 0; - - unordered_set std_set; - PackedSet<> packed_set; - - for (size_t j = 0; j < num_ops; j++) { - set_op_t op = (set_op_t) op_distr(prng); - switch (op) { - case INSERT: - - for (size_t k = 0; k < inserts_per_op; ++k) { - packed_set.insert(next_val); - std_set.insert(next_val); - next_val++; - } - for (size_t k = 0; k < prev_inserts_per_op; ++k) { - uint64_t val = prng() % next_val; - packed_set.insert(val); - std_set.insert(val); - } - - break; - - case REMOVE: - if (next_val > 0) { - for (size_t k = 0; k < removes_per_op; ++k) { - uint64_t val = prng() % next_val; - packed_set.remove(val); - std_set.erase(val); - } - } - else { - packed_set.remove(0); - packed_set.remove(1); - packed_set.remove(2); - std_set.erase(0); - std_set.erase(1); - std_set.erase(2); - } - - break; - - case FIND: - if (next_val) { - for (size_t k = 0; k < finds_per_op; k++) { - uint64_t val = prng() % next_val; - assert(packed_set.find(val) == (bool) std_set.count(val)); - } - } - else { - assert(packed_set.find(0) == (bool) std_set.count(0)); - assert(packed_set.find(1) == (bool) std_set.count(1)); - assert(packed_set.find(2) == (bool) std_set.count(2)); - } - - break; - -// case SERIALIZE: -// { -// stringstream strm; -// -// dyn_vec.serialize(strm); -// strm.seekg(0); -// PackedVector<> copy_vec(strm); -// -// assert(copy_vec.size() == dyn_vec.size()); -// for (size_t i = 0; i < copy_vec.size(); i++) { -// assert(copy_vec.get(i) == dyn_vec.get(i)); -// } -// break; -// } - - default: - break; - } - - assert(std_set.empty() == packed_set.empty()); - assert(std_set.size() == packed_set.size()); - } - } - cerr << "PackedSet tests successful!" << endl; -} + // check that the edges are what we expect -void test_packed_graph() { - - auto check_path = [&](MutablePathDeletableHandleGraph& graph, const path_handle_t& p, const vector& steps) { - assert(graph.get_step_count(p) == steps.size()); - - step_handle_t step = graph.path_begin(p); - for (int i = 0; i < steps.size(); i++) { - - assert(graph.get_path_handle_of_step(step) == p); - assert(graph.get_handle_of_step(step) == steps[i]); - - if (graph.get_is_circular(p)) { - assert(graph.has_next_step(step)); - assert(graph.has_previous_step(step)); - } - else { - assert(graph.has_next_step(step) == i + 1 < steps.size()); - assert(graph.has_previous_step(step) == i > 0); - } - - step = graph.get_next_step(step); - } - - if (graph.get_is_circular(p) && !graph.is_empty(p)) { - assert(step == graph.path_begin(p)); - } - else { - assert(step == graph.path_end(p)); - } - - step = graph.path_back(p); - - for (int i = steps.size() - 1; i >= 0; i--) { - - assert(graph.get_path_handle_of_step(step) == p); - assert(graph.get_handle_of_step(step) == steps[i]); - - if (graph.get_is_circular(p)) { - assert(graph.has_next_step(step)); - assert(graph.has_previous_step(step)); - } - else { - assert(graph.has_next_step(step) == i + 1 < steps.size()); - assert(graph.has_previous_step(step) == i > 0); - } - - step = graph.get_previous_step(step); - } - - if (graph.get_is_circular(p) && !graph.is_empty(p)) { - assert(step == graph.path_back(p)); - } - else { - assert(step == graph.path_front_end(p)); - } - }; - - auto check_flips = [&](MutablePathDeletableHandleGraph& graph, const path_handle_t& p, const vector& steps) { - - auto flipped = steps; - for (size_t i = 0; i < steps.size(); i++) { - graph.apply_orientation(graph.flip(graph.forward(flipped[i]))); - flipped[i] = graph.flip(flipped[i]); - check_path(graph, p, flipped); - - graph.apply_orientation(graph.flip(graph.forward(flipped[i]))); - flipped[i] = graph.flip(flipped[i]); - check_path(graph, p, flipped); - } - }; - - // defragmentation - { - PackedGraph graph; - - handle_t h1 = graph.create_handle("ATGTAG"); - handle_t h2 = graph.create_handle("ACCCC"); - handle_t h3 = graph.create_handle("C"); - handle_t h4 = graph.create_handle("ATT"); - handle_t h5 = graph.create_handle("GGCA"); - - graph.create_edge(h1, h2); - graph.create_edge(h1, h3); - graph.create_edge(h2, h3); - graph.create_edge(h3, h5); - graph.create_edge(h3, h4); - graph.create_edge(h4, h5); - - path_handle_t p0 = graph.create_path_handle("0"); - path_handle_t p1 = graph.create_path_handle("1"); - path_handle_t p2 = graph.create_path_handle("2"); - - - graph.append_step(p0, h3); - graph.append_step(p0, h4); - graph.append_step(p0, h5); - - graph.append_step(p1, h1); - graph.append_step(p1, h3); - graph.append_step(p1, h5); - - graph.append_step(p2, h1); - graph.append_step(p2, h2); - graph.append_step(p2, h3); - graph.append_step(p2, h4); - graph.append_step(p2, h5); - - graph.destroy_path(p0); - graph.destroy_path(p2); - graph.destroy_handle(h2); - graph.destroy_handle(h4); - - assert(graph.get_sequence(h1) == "ATGTAG"); - assert(graph.get_sequence(h3) == "C"); - assert(graph.get_sequence(h5) == "GGCA"); - - bool found = false; - graph.follow_edges(h1, false, [&](const handle_t& next) { - if (next == h3) { - found = true; - } - else { - assert(false); - } - return true; + graph.follow_edges(h, false, [&](const handle_t &next) { + count1++; + return true; }); - assert(found); - - found = false; - graph.follow_edges(h3, false, [&](const handle_t& next) { - if (next == h5) { - found = true; - } - else { - assert(false); - } - return true; + graph.follow_edges(h, true, [&](const handle_t &prev) { + if (prev == graph.flip(h2)) { + found1 = true; + } else if (prev == h3) { + found2 = true; + } + count2++; + return true; }); - assert(found); - - check_flips(graph, p1, {h1, h3, h5}); - } - - // tightening vector allocations - { - PackedGraph graph; - handle_t h1 = graph.create_handle("ATGTAG"); - handle_t h2 = graph.create_handle("ACCCC"); - handle_t h3 = graph.create_handle("C"); - handle_t h4 = graph.create_handle("ATT"); - handle_t h5 = graph.create_handle("GGCA"); - - graph.create_edge(h1, h2); - graph.create_edge(h1, h3); - graph.create_edge(h2, h3); - graph.create_edge(h3, h5); - graph.create_edge(h3, h4); - graph.create_edge(h4, h5); - - path_handle_t p0 = graph.create_path_handle("0"); - path_handle_t p1 = graph.create_path_handle("1"); - path_handle_t p2 = graph.create_path_handle("2"); - - - graph.append_step(p0, h3); - graph.append_step(p0, h4); - graph.append_step(p0, h5); - - graph.append_step(p1, h1); - graph.append_step(p1, h3); - graph.append_step(p1, h5); - - graph.append_step(p2, h1); - graph.append_step(p2, h2); - graph.append_step(p2, h3); - graph.append_step(p2, h4); - graph.append_step(p2, h5); - - // delete some things, but not enough to trigger defragmentation - graph.destroy_path(p2); - graph.destroy_handle(h2); - // reallocate and compress down to the smaller size - graph.optimize(false); - - assert(graph.get_sequence(h1) == "ATGTAG"); - assert(graph.get_sequence(h3) == "C"); - assert(graph.get_sequence(h4) == "ATT"); - assert(graph.get_sequence(h5) == "GGCA"); - - int count = 0; - bool found1 = false, found2 = false; - graph.follow_edges(h1, false, [&](const handle_t& h) { - if (h == h3) { - found1 = true; - } - count++; + graph.follow_edges(graph.flip(h), true, [&](const handle_t &next) { + count3++; + return true; }); - assert(found1); - assert(count == 1); - - count = 0; - found1 = false, found2 = false; - graph.follow_edges(h1, true, [&](const handle_t& h) { - count++; + graph.follow_edges(graph.flip(h), false, [&](const handle_t &prev) { + if (prev == h2) { + found3 = true; + } else if (prev == graph.flip(h3)) { + found4 = true; + } + count4++; + return true; + }); + graph.follow_edges(h2, true, [&](const handle_t &prev) { + if (prev == graph.flip(h)) { + found5 = true; + } + count5++; + return true; + }); + graph.follow_edges(graph.flip(h2), false, [&](const handle_t &next) { + if (next == h) { + found6 = true; + } + count6++; + return true; }); - assert(count == 0); - - count = 0; - found1 = false, found2 = false; - graph.follow_edges(h3, false, [&](const handle_t& h) { - if (h == h4) { - found1 = true; - } - if (h == h5) { - found2 = true; - } - count++; + graph.follow_edges(graph.flip(h3), true, [&](const handle_t &prev) { + if (prev == graph.flip(h)) { + found7 = true; + } + count7++; + return true; }); + graph.follow_edges(h3, false, [&](const handle_t &next) { + if (next == h) { + found8 = true; + } + count8++; + return true; + }); + assert(count1 == 0); + assert(count2 == 2); + assert(count3 == 0); + assert(count4 == 2); + assert(count5 == 1); + assert(count6 == 1); + assert(count7 == 1); + assert(count8 == 1); assert(found1); assert(found2); - assert(count == 2); - - count = 0; - found1 = false, found2 = false; - graph.follow_edges(h3, true, [&](const handle_t& h) { - if (h == h1) { - found1 = true; - } - count++; + assert(found3); + assert(found4); + assert(found5); + assert(found6); + assert(found7); + assert(found8); + + count1 = count2 = count3 = count4 = count5 = count6 = count7 = count8 = + 0; + found1 = found2 = found3 = found4 = found5 = found6 = found7 = found8 = + false; + + // and now switch it back to the same orientation and repeat the + // topology checks + + h = graph.apply_orientation(graph.flip(h)); + + graph.follow_edges(h, false, [&](const handle_t &next) { + if (next == h2) { + found1 = true; + } else if (next == graph.flip(h3)) { + found2 = true; + } + count1++; + return true; }); - assert(found1); - assert(count == 1); - - count = 0; - found1 = false, found2 = false; - graph.follow_edges(h4, false, [&](const handle_t& h) { - if (h == h5) { - found1 = true; - } - count++; + graph.follow_edges(graph.flip(h), true, [&](const handle_t &prev) { + if (prev == graph.flip(h2)) { + found3 = true; + } else if (prev == h3) { + found4 = true; + } + count2++; + return true; }); - assert(found1); - assert(count == 1); - - count = 0; - found1 = false, found2 = false; - graph.follow_edges(h4, true, [&](const handle_t& h) { - if (h == h3) { - found1 = true; - } - count++; + graph.follow_edges(h2, true, [&](const handle_t &prev) { + if (prev == h) { + found5 = true; + } + count3++; + return true; }); - assert(found1); - assert(count == 1); - - count = 0; - found1 = false, found2 = false; - graph.follow_edges(h5, false, [&](const handle_t& h) { - count++; + graph.follow_edges(graph.flip(h2), false, [&](const handle_t &next) { + if (next == graph.flip(h)) { + found6 = true; + } + count4++; + return true; }); - assert(count == 0); - - count = 0; - found1 = false, found2 = false; - graph.follow_edges(h5, true, [&](const handle_t& h) { - if (h == h3) { - found1 = true; - } - else if (h == h4) { - found2 = true; - } - count++; + graph.follow_edges(graph.flip(h3), true, [&](const handle_t &prev) { + if (prev == h) { + found7 = true; + } + count5++; + return true; }); - assert(found1); - assert(found2); - assert(count == 2); - - check_flips(graph, p0, {h3, h4, h5}); - check_flips(graph, p1, {h1, h3, h5}); - } - - // optimizing with id reassignment - { - PackedGraph graph; - handle_t h1 = graph.create_handle("ATGTAG"); - handle_t h2 = graph.create_handle("ACCCC"); - handle_t h3 = graph.create_handle("C"); - handle_t h4 = graph.create_handle("ATT"); - handle_t h5 = graph.create_handle("GGCA"); - - graph.create_edge(h1, h2); - graph.create_edge(h1, h3); - graph.create_edge(h2, h3); - graph.create_edge(h3, h5); - graph.create_edge(h3, h4); - graph.create_edge(h4, h5); - - path_handle_t p0 = graph.create_path_handle("0"); - path_handle_t p1 = graph.create_path_handle("1"); - path_handle_t p2 = graph.create_path_handle("2"); - - - graph.append_step(p0, h3); - graph.append_step(p0, h4); - graph.append_step(p0, h5); - - graph.append_step(p1, h1); - graph.append_step(p1, h3); - graph.append_step(p1, h5); - - graph.append_step(p2, h1); - graph.append_step(p2, h2); - graph.append_step(p2, h3); - graph.append_step(p2, h4); - graph.append_step(p2, h5); - - // delete some things, but not enough to trigger defragmentation - graph.destroy_path(p2); - graph.destroy_handle(h2); - // reallocate and compress down to the smaller size, reassigning IDs - graph.optimize(true); - set seen_ids; - - int count = 0; - bool found1 = false, found2 = false, found3 = false, found4 = false; - graph.for_each_handle([&](const handle_t& handle) { - if (graph.get_sequence(handle) == "ATGTAG") { - h1 = handle; - found1 = true; - } - else if (graph.get_sequence(handle) == "C") { - h3 = handle; - found2 = true; - } - else if (graph.get_sequence(handle) == "ATT") { - h4 = handle; - found3 = true; - } - else if (graph.get_sequence(handle) == "GGCA") { - h5 = handle; - found4 = true; - } - else { - assert(false); - } - count++; - - seen_ids.insert(graph.get_id(handle)); - - assert(graph.get_id(handle) >= 1); - assert(graph.get_id(handle) <= 4); + graph.follow_edges(h3, false, [&](const handle_t &next) { + if (next == graph.flip(h)) { + found8 = true; + } + count6++; + return true; }); - + assert(count1 == 2); + assert(count2 == 2); + assert(count3 == 1); + assert(count4 == 1); + assert(count5 == 1); + assert(count6 == 1); assert(found1); assert(found2); assert(found3); assert(found4); - assert(count == 4); - assert(seen_ids.size() == 4); - - count = 0; - found1 = found2 = found3 = found4 = false; - - graph.follow_edges(h1, false, [&](const handle_t& h) { - if (h == h3) { - found1 = true; - } - count++; - }); - assert(found1); - assert(count == 1); - - count = 0; - found1 = false, found2 = false; - graph.follow_edges(h1, true, [&](const handle_t& h) { - count++; + assert(found5); + assert(found6); + assert(found7); + assert(found8); + + count1 = count2 = count3 = count4 = count5 = count6 = 0; + found1 = found2 = found3 = found4 = found5 = found6 = found7 = found8 = + false; + + graph.follow_edges(h4, false, [&](const handle_t &next) { + if (next == graph.flip(h4)) { + found1 = true; + } + count1++; + return true; }); - assert(count == 0); - - count = 0; - found1 = false, found2 = false; - graph.follow_edges(h3, false, [&](const handle_t& h) { - if (h == h4) { - found1 = true; - } - if (h == h5) { - found2 = true; - } - count++; + graph.follow_edges(graph.flip(h4), true, [&](const handle_t &prev) { + if (prev == h4) { + found2 = true; + } + count2++; + return true; }); + assert(count1 == 1); + assert(count2 == 1); assert(found1); assert(found2); - assert(count == 2); - - count = 0; - found1 = false, found2 = false; - graph.follow_edges(h3, true, [&](const handle_t& h) { - if (h == h1) { - found1 = true; - } - count++; + + count1 = count2 = 0; + found1 = found2 = false; + } + + vector parts = graph.divide_handle(h, vector{1, 2}); + + int count9 = 0, count10 = 0, count11 = 0, count12 = 0; + bool found9 = false, found10 = false, found11 = false, found12 = false, + found13 = false, found14 = false; + + // DeletableHandleGraph can correctly divide a node + { + + assert(parts.size() == 3); + + assert(graph.get_sequence(parts[0]) == "A"); + assert(graph.get_length(parts[0]) == 1); + assert(graph.get_sequence(parts[1]) == "T"); + assert(graph.get_length(parts[1]) == 1); + assert(graph.get_sequence(parts[2]) == "G"); + assert(graph.get_length(parts[2]) == 1); + + graph.follow_edges(parts[0], false, [&](const handle_t &next) { + if (next == parts[1]) { + found1 = true; + } + count1++; + return true; }); - assert(found1); - assert(count == 1); - - count = 0; - found1 = false, found2 = false; - graph.follow_edges(h4, false, [&](const handle_t& h) { - if (h == h5) { - found1 = true; - } - count++; + graph.follow_edges(parts[0], true, [&](const handle_t &prev) { + count2++; + return true; }); - assert(found1); - assert(count == 1); - - count = 0; - found1 = false, found2 = false; - graph.follow_edges(h4, true, [&](const handle_t& h) { - if (h == h3) { - found1 = true; - } - count++; + graph.follow_edges(graph.flip(parts[0]), true, + [&](const handle_t &prev) { + if (prev == graph.flip(parts[1])) { + found2 = true; + } + count3++; + return true; + }); + graph.follow_edges(graph.flip(parts[0]), false, + [&](const handle_t &next) { + count4++; + return true; + }); + + graph.follow_edges(parts[1], false, [&](const handle_t &next) { + if (next == parts[2]) { + found3 = true; + } + count5++; + return true; + }); + graph.follow_edges(parts[1], true, [&](const handle_t &prev) { + if (prev == parts[0]) { + found4 = true; + } + count6++; + return true; + }); + graph.follow_edges(graph.flip(parts[1]), true, + [&](const handle_t &prev) { + if (prev == graph.flip(parts[2])) { + found5 = true; + } + count7++; + return true; + }); + graph.follow_edges(graph.flip(parts[1]), false, + [&](const handle_t &next) { + if (next == graph.flip(parts[0])) { + found6 = true; + } + count8++; + return true; + }); + + graph.follow_edges(parts[2], false, [&](const handle_t &next) { + if (next == h2) { + found7 = true; + } else if (next == graph.flip(h3)) { + found8 = true; + } + count9++; + return true; + }); + graph.follow_edges(parts[2], true, [&](const handle_t &prev) { + if (prev == parts[1]) { + found9 = true; + } + count10++; + return true; + }); + graph.follow_edges(graph.flip(parts[2]), true, + [&](const handle_t &prev) { + if (prev == graph.flip(h2)) { + found10 = true; + } else if (prev == h3) { + found11 = true; + } + count11++; + return true; + }); + graph.follow_edges(graph.flip(parts[2]), false, + [&](const handle_t &next) { + if (next == graph.flip(parts[1])) { + found12 = true; + } + count12++; + return true; + }); + graph.follow_edges(graph.flip(h3), true, [&](const handle_t &prev) { + if (prev == parts[2]) { + found13 = true; + } + return true; + }); + graph.follow_edges(h2, true, [&](const handle_t &prev) { + if (prev == parts[2]) { + found14 = true; + } + return true; }); + + assert(count1 == 1); + assert(count2 == 0); + assert(count3 == 1); + assert(count4 == 0); + assert(count5 == 1); + assert(count6 == 1); + assert(count7 == 1); + assert(count8 == 1); + assert(count9 == 2); + assert(count10 == 1); + assert(count11 == 2); + assert(count12 == 1); assert(found1); - assert(count == 1); - - count = 0; - found1 = false, found2 = false; - graph.follow_edges(h5, false, [&](const handle_t& h) { - count++; + assert(found2); + assert(found3); + assert(found4); + assert(found5); + assert(found6); + assert(found7); + assert(found8); + assert(found9); + assert(found10); + assert(found11); + assert(found12); + assert(found13); + assert(found14); + + count1 = count2 = count3 = count4 = count5 = count6 = count7 = count8 = + count9 = count10 = count11 = count12 = 0; + found1 = found2 = found3 = found4 = found5 = found6 = found7 = found8 = + found9 = found10 = found11 = found12 = false; + } + + vector rev_parts = + graph.divide_handle(graph.flip(h3), vector{1}); + + // DeletableHandleGraph can correctly divide a node on the reverse strand + { + + assert(graph.get_sequence(rev_parts[0]) == "G"); + assert(graph.get_length(rev_parts[0]) == 1); + assert(graph.get_is_reverse(rev_parts[0])); + assert(graph.get_sequence(rev_parts[1]) == "TC"); + assert(graph.get_length(rev_parts[1]) == 2); + assert(graph.get_is_reverse(rev_parts[1])); + + graph.follow_edges(rev_parts[0], false, [&](const handle_t &next) { + if (next == rev_parts[1]) { + found1 = true; + } + count1++; + return true; }); - assert(count == 0); - - count = 0; - found1 = false, found2 = false; - graph.follow_edges(h5, true, [&](const handle_t& h) { - if (h == h3) { - found1 = true; - } - else if (h == h4) { - found2 = true; - } - count++; + graph.follow_edges(rev_parts[1], true, [&](const handle_t &prev) { + if (prev == rev_parts[0]) { + found2 = true; + } + count2++; + return true; }); + graph.follow_edges(graph.flip(rev_parts[1]), false, + [&](const handle_t &next) { + if (next == graph.flip(rev_parts[0])) { + found3 = true; + } + count3++; + return true; + }); + graph.follow_edges(graph.flip(rev_parts[0]), true, + [&](const handle_t &prev) { + if (prev == graph.flip(rev_parts[1])) { + found4 = true; + } + count4++; + return true; + }); + graph.follow_edges(rev_parts[0], true, [&](const handle_t &prev) { + if (prev == parts[2]) { + found5 = true; + } + count5++; + return true; + }); + graph.follow_edges(rev_parts[1], false, [&](const handle_t &next) { + count6++; + return true; + }); + + assert(count1 == 1); + assert(count2 == 1); + assert(count3 == 1); + assert(count4 == 1); + assert(count5 == 1); + assert(count6 == 0); assert(found1); assert(found2); - assert(count == 2); - - check_flips(graph, p0, {h3, h4, h5}); - check_flips(graph, p1, {h1, h3, h5}); + assert(found3); + assert(found4); + assert(found5); + } + + auto h6 = graph.create_handle("ACGT"); + auto h7 = graph.create_handle("GCGG"); + auto h8 = graph.create_handle("TTCA"); + + graph.create_edge(h6, h7); + graph.create_edge(h7, h8); + + h7 = graph.truncate_handle(h7, true, 1); + assert(graph.get_sequence(h7) == "CGG"); + assert(graph.get_degree(h7, true) == 0); + assert(graph.get_degree(h7, false) == 1); + assert(graph.get_degree(h6, false) == 0); + assert(graph.get_degree(h8, true) == 1); + + h7 = graph.truncate_handle(h7, false, 2); + assert(graph.get_sequence(h7) == "CG"); + assert(graph.get_degree(h7, true) == 0); + assert(graph.get_degree(h7, false) == 0); + assert(graph.get_degree(h6, false) == 0); + assert(graph.get_degree(h8, true) == 0); + + h6 = graph.change_sequence(h6, "AAAT"); + h7 = graph.change_sequence(h7, "G"); + assert(graph.get_sequence(h6) == "AAAT"); + assert(graph.get_sequence(graph.flip(h6)) == "ATTT"); + assert(graph.get_sequence(h7) == "G"); + assert(graph.get_sequence(graph.flip(h7)) == "C"); } - - cerr << "PackedGraph tests successful!" << endl; -} + } -void test_multithreaded_overlay_construction() { - HashGraph graph; - - std::string node_content = "GATTACACATTAG"; - size_t node_count = 1000; - size_t true_path_length = node_count * node_content.size(); - size_t path_count = 10; - // We should coalesce 2 paths into each index. - size_t steps_per_index = node_count * 2; - - // Make a long linear graph - std::vector nodes; - for (size_t i = 0; i < node_count; i++) { - nodes.push_back(graph.create_handle(node_content)); - if (nodes.size() > 1) { - graph.create_edge(nodes[nodes.size() - 2], nodes[nodes.size() - 1]); - } - } - - // Make a bunch of paths and keep their names - std::vector paths; - for (size_t i = 0; i < path_count; i++) { - string path_name = "path" + std::to_string(i); - paths.push_back(path_name); - path_handle_t path_handle = graph.create_path_handle(path_name); - for (auto& visit : nodes) { - graph.append_step(path_handle, visit); - } - } - - // Back up the thread count we have been using. - int backup_thread_count = omp_get_max_threads(); - for (int thread_count = 1; thread_count <= 4; thread_count++) { - // Try this number of threads - omp_set_num_threads(thread_count); - - // Make an overlay with this many threads for construction - PackedPositionOverlay overlay(&graph, {}, steps_per_index); - - // Make sure it is right - for (auto& path_name : paths) { - assert(overlay.has_path(path_name)); - path_handle_t path_handle = overlay.get_path_handle(path_name); - // Make sure they have the right name and length. - assert(overlay.get_path_name(path_handle) == path_name); - assert(overlay.get_path_length(path_handle) == true_path_length); - for (size_t i = 0; i < true_path_length; i++) { - // For each position - // Figure out what node and orientation it should have. - handle_t true_underlying_handle = nodes.at(i / node_content.size()); - // Find its step - step_handle_t seen_step = overlay.get_step_at_position(path_handle, i); - // Make sure it is on the right path - assert(overlay.get_path_handle_of_step(seen_step) == path_handle); - // Make sure it is the right node - handle_t observed_handle = overlay.get_handle_of_step(seen_step); - assert(overlay.get_underlying_handle(observed_handle) == true_underlying_handle); - // Make sure the step is at the right place - size_t true_step_start = i - (i % node_content.size()); - assert(overlay.get_position_of_step(seen_step) == true_step_start); - } - } - - } - // Go back to the default thread count. - omp_set_num_threads(backup_thread_count); - - cerr << "Multithreaded PackedPositionOverlay tests successful!" << endl; -} + // second batch of test involving self loops + { + vector implementations; -void test_path_position_overlays() { - - vector implementations; + PackedGraph pg; + implementations.push_back(&pg); HashGraph hg; implementations.push_back(&hg); - PackedGraph pg; - implementations.push_back(&pg); - MappedPackedGraph mpg; implementations.push_back(&mpg); - - for (MutablePathDeletableHandleGraph* implementation : implementations) { - - MutablePathDeletableHandleGraph& graph = *implementation; - - handle_t h1 = graph.create_handle("AAA"); - handle_t h2 = graph.create_handle("A"); - handle_t h3 = graph.create_handle("T"); - handle_t h4 = graph.create_handle("AAAAA"); - - graph.create_edge(h1, h2); - graph.create_edge(h1, h3); - graph.create_edge(h2, h4); - graph.create_edge(h3, h4); - - path_handle_t p1 = graph.create_path_handle("p1"); - step_handle_t s1 = graph.append_step(p1, h1); - step_handle_t s2 = graph.append_step(p1, h2); - step_handle_t s3 = graph.append_step(p1, h4); - - // static position overlays - { - vector overlays; - - PositionOverlay basic_overlay(&graph); - PackedPositionOverlay packed_overlay(&graph); - - overlays.push_back(&basic_overlay); - overlays.push_back(&packed_overlay); - - for (PathPositionHandleGraph* implementation : overlays) { - PathPositionHandleGraph& overlay = *implementation; - - assert(overlay.get_path_length(p1) == 9); - - assert(overlay.get_position_of_step(s1) == 0); - assert(overlay.get_position_of_step(s2) == 3); - assert(overlay.get_position_of_step(s3) == 4); - - assert(overlay.get_step_at_position(p1, 0) == s1); - assert(overlay.get_step_at_position(p1, 1) == s1); - assert(overlay.get_step_at_position(p1, 2) == s1); - assert(overlay.get_step_at_position(p1, 3) == s2); - assert(overlay.get_step_at_position(p1, 4) == s3); - assert(overlay.get_step_at_position(p1, 5) == s3); - assert(overlay.get_step_at_position(p1, 6) == s3); - assert(overlay.get_step_at_position(p1, 7) == s3); - assert(overlay.get_step_at_position(p1, 8) == s3); - assert(overlay.get_step_at_position(p1, 9) == overlay.path_end(p1)); - assert(overlay.get_step_at_position(p1, 10) == overlay.path_end(p1)); - assert(overlay.get_step_at_position(p1, 1000) == overlay.path_end(p1)); - } - } - - - // mutable position overlay - { - MutablePositionOverlay overlay(&graph); - - handle_t h5 = overlay.create_handle("AAAA"); - - overlay.create_edge(h4, h5); - overlay.create_edge(h5, h5); - - step_handle_t s4 = overlay.append_step(p1, h5); - - assert(overlay.get_path_length(p1) == 13); - - assert(overlay.get_position_of_step(s4) == 9); - - assert(overlay.get_step_at_position(p1, 9) == s4); - assert(overlay.get_step_at_position(p1, 10) == s4); - assert(overlay.get_step_at_position(p1, 11) == s4); - assert(overlay.get_step_at_position(p1, 12) == s4); - assert(overlay.get_step_at_position(p1, 13) == overlay.path_end(p1)); - assert(overlay.get_step_at_position(p1, 14) == overlay.path_end(p1)); - assert(overlay.get_step_at_position(p1, 1000) == overlay.path_end(p1)); - - step_handle_t s5 = overlay.append_step(p1, h5); - - assert(overlay.get_path_length(p1) == 17); - - assert(overlay.get_position_of_step(s5) == 13); - - assert(overlay.get_step_at_position(p1, 13) == s5); - assert(overlay.get_step_at_position(p1, 14) == s5); - assert(overlay.get_step_at_position(p1, 15) == s5); - assert(overlay.get_step_at_position(p1, 16) == s5); - assert(overlay.get_step_at_position(p1, 17) == overlay.path_end(p1)); - assert(overlay.get_step_at_position(p1, 18) == overlay.path_end(p1)); - assert(overlay.get_step_at_position(p1, 1000) == overlay.path_end(p1)); - - path_handle_t p2 = overlay.create_path_handle("p2"); - - assert(overlay.get_path_length(p2) == 0); - - step_handle_t s6 = overlay.prepend_step(p2, h3); - - assert(overlay.get_path_length(p2) == 1); - - assert(overlay.get_position_of_step(s6) == 0); - - assert(overlay.get_step_at_position(p2, 0) == s6); - assert(overlay.get_step_at_position(p2, 1) == overlay.path_end(p2)); - assert(overlay.get_step_at_position(p2, 2) == overlay.path_end(p2)); - assert(overlay.get_step_at_position(p2, 1000) == overlay.path_end(p2)); - - step_handle_t s7 = overlay.prepend_step(p2, h1); - - assert(overlay.get_path_length(p2) == 4); - - assert(overlay.get_position_of_step(s7) == 0); - assert(overlay.get_position_of_step(s6) == 3); - - assert(overlay.get_step_at_position(p2, 0) == s7); - assert(overlay.get_step_at_position(p2, 1) == s7); - assert(overlay.get_step_at_position(p2, 2) == s7); - assert(overlay.get_step_at_position(p2, 3) == s6); - assert(overlay.get_step_at_position(p2, 4) == overlay.path_end(p2)); - assert(overlay.get_step_at_position(p2, 5) == overlay.path_end(p2)); - assert(overlay.get_step_at_position(p2, 1000) == overlay.path_end(p2)); - - handle_t h2_flip = overlay.apply_orientation(overlay.flip(h2)); - assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 3)) == overlay.flip(h2_flip)); - - vector offs_1{1}; - auto parts_1 = overlay.divide_handle(overlay.flip(h1), offs_1); - assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 0)) == overlay.flip(parts_1[1])); - assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 1)) == overlay.flip(parts_1[1])); - assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 2)) == overlay.flip(parts_1[0])); - assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 3)) == overlay.flip(h2_flip)); - assert(overlay.get_handle_of_step(overlay.get_step_at_position(p2, 0)) == overlay.flip(parts_1[1])); - assert(overlay.get_handle_of_step(overlay.get_step_at_position(p2, 1)) == overlay.flip(parts_1[1])); - assert(overlay.get_handle_of_step(overlay.get_step_at_position(p2, 2)) == overlay.flip(parts_1[0])); - assert(overlay.get_handle_of_step(overlay.get_step_at_position(p2, 3)) == h3); - - - vector offs_2{1, 3}; - auto parts_2 = overlay.divide_handle(h5, offs_2); - assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 9)) == parts_2[0]); - assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 10)) == parts_2[1]); - assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 11)) == parts_2[1]); - assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 12)) == parts_2[2]); - assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 13)) == parts_2[0]); - assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 14)) == parts_2[1]); - assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 15)) == parts_2[1]); - assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 16)) == parts_2[2]); - assert(overlay.get_step_at_position(p1, 17) == overlay.path_end(p1)); - assert(overlay.get_step_at_position(p1, 18) == overlay.path_end(p1)); - assert(overlay.get_step_at_position(p1, 1000) == overlay.path_end(p1)); - } - } - cerr << "PathPositionOverlay tests successful!" << endl; -} -void test_packed_reference_path_overlay() { - - vector implementations; + for (DeletableHandleGraph *implementation : implementations) { - HashGraph hg; - implementations.push_back(&hg); + DeletableHandleGraph &graph = *implementation; - PackedGraph pg; - implementations.push_back(&pg); - - MappedPackedGraph mpg; - implementations.push_back(&mpg); - - for (MutablePathDeletableHandleGraph* implementation : implementations) { - - MutablePathDeletableHandleGraph& graph = *implementation; - - handle_t h1 = graph.create_handle("AAA"); - handle_t h2 = graph.create_handle("A"); - handle_t h3 = graph.create_handle("T"); - handle_t h4 = graph.create_handle("AAAAA"); - - graph.create_edge(h1, h2); - graph.create_edge(h1, h3); - graph.create_edge(h2, h4); - graph.create_edge(h3, h4); - - path_handle_t p1 = graph.create_path_handle("p1"); - step_handle_t s1 = graph.append_step(p1, h1); - step_handle_t s2 = graph.append_step(p1, h2); - step_handle_t s3 = graph.append_step(p1, h4); - - path_handle_t p2 = graph.create_path_handle("p2"); - step_handle_t s2_1 = graph.append_step(p2, graph.flip(h4)); - step_handle_t s2_2 = graph.append_step(p2, graph.flip(h3)); - step_handle_t s2_3 = graph.append_step(p2, graph.flip(h1)); - - { - - PackedReferencePathOverlay overlay(&graph); - - assert(overlay.get_path_length(p1) == 9); - - assert(overlay.get_position_of_step(s1) == 0); - assert(overlay.get_position_of_step(s2) == 3); - assert(overlay.get_position_of_step(s3) == 4); - - assert(overlay.get_step_at_position(p1, 0) == s1); - assert(overlay.get_step_at_position(p1, 1) == s1); - assert(overlay.get_step_at_position(p1, 2) == s1); - assert(overlay.get_step_at_position(p1, 3) == s2); - assert(overlay.get_step_at_position(p1, 4) == s3); - assert(overlay.get_step_at_position(p1, 5) == s3); - assert(overlay.get_step_at_position(p1, 6) == s3); - assert(overlay.get_step_at_position(p1, 7) == s3); - assert(overlay.get_step_at_position(p1, 8) == s3); - assert(overlay.get_step_at_position(p1, 9) == overlay.path_end(p1)); - assert(overlay.get_step_at_position(p1, 10) == overlay.path_end(p1)); - assert(overlay.get_step_at_position(p1, 1000) == overlay.path_end(p1)); - - bool found1 = false; - bool found2 = false; - overlay.for_each_step_on_handle(h1, [&](const step_handle_t& s) { - if (s == s1) { - found1 = true; - } else if (s == s2_3) { - found2 = true; - } else { - assert(false); - } - }); - assert(found1); - assert(found2); - found1 = false; - found2 = false; - - overlay.for_each_step_on_handle(h2, [&](const step_handle_t& s) { - if (s == s2) { - found1 = true; - } else { - assert(false); - } - }); - assert(found1); - found1 = false; - - overlay.for_each_step_on_handle(h3, [&](const step_handle_t& s) { - if (s == s2_2) { - found1 = true; - } else { - assert(false); - } - }); - assert(found1); - found1 = false; - - overlay.for_each_step_on_handle(h4, [&](const step_handle_t& s) { - if (s == s3) { - found1 = true; - } else if (s == s2_1) { - found2 = true; - } else { - assert(false); - } - }); - assert(found1); - assert(found2); - found1 = false; - found2 = false; + // initialize the graph + + handle_t h1 = graph.create_handle("A"); + handle_t h2 = graph.create_handle("C"); + + graph.create_edge(h1, h2); + graph.create_edge(graph.flip(h1), h2); + + // test for the right initial topology + bool found1 = false, found2 = false, found3 = false, found4 = false, + found5 = false, found6 = false; + int count1 = 0, count2 = 0, count3 = 0, count4 = 0; + + graph.follow_edges(h1, false, [&](const handle_t &other) { + if (other == h2) { + found1 = true; } - - { - - // Make sure we can handle a lot of paths - for (size_t i = 0; i < 100; i++) { - path_handle_t pn = graph.create_path_handle("pn" + std::to_string(i)); - graph.append_step(pn, h1); - graph.append_step(pn, h2); - graph.append_step(pn, h4); - } - - // Split the paths up agross many indexes for testing - PackedReferencePathOverlay overlay(&graph, {}, 10); - - std::unordered_set seen_paths; - overlay.for_each_step_on_handle(h1, [&](const step_handle_t& s) { - seen_paths.insert(overlay.get_path_name(overlay.get_path_handle_of_step(s))); - }); - // Should have the 2 original paths and the 100 new ones. - assert(seen_paths.size() == 102); + count1++; + }); + graph.follow_edges(h1, true, [&](const handle_t &other) { + if (other == graph.flip(h2)) { + found2 = true; } - } - cerr << "PackedReferencePathOverlay tests successful!" << endl; -} - -void test_reference_path_overlay() { - - vector implementations; - - HashGraph hg; - implementations.push_back(&hg); - - PackedGraph pg; - implementations.push_back(&pg); - - MappedPackedGraph mpg; - implementations.push_back(&mpg); - - for (MutablePathDeletableHandleGraph* implementation : implementations) { - - MutablePathDeletableHandleGraph& graph = *implementation; - - auto h1 = graph.create_handle("AAAA"); - auto h2 = graph.create_handle("AA"); - auto h3 = graph.create_handle("A"); - auto h4 = graph.create_handle("AAAAAA"); - - graph.create_edge(h1, h2); - graph.create_edge(h1, h3); - graph.create_edge(h2, h4); - graph.create_edge(h3, h4); - - auto p = graph.create_path_handle("p"); - auto s1 = graph.append_step(p, h1); - auto s2 = graph.append_step(p, h2); - auto s3 = graph.append_step(p, h4); - - { - ReferencePathOverlay ref_overlay(&graph); - - auto os1 = ref_overlay.path_begin(p); - auto os2 = ref_overlay.get_next_step(os1); - auto os3 = ref_overlay.get_next_step(os2); - - assert(ref_overlay.get_next_step(os3) == ref_overlay.path_end(p)); - assert(ref_overlay.get_previous_step(os1) == ref_overlay.path_front_end(p)); - - assert(ref_overlay.has_next_step(os1)); - assert(ref_overlay.has_next_step(os2)); - assert(!ref_overlay.has_next_step(os3)); - - assert(!ref_overlay.has_previous_step(os1)); - assert(ref_overlay.has_previous_step(os2)); - assert(ref_overlay.has_previous_step(os3)); - - assert(ref_overlay.get_next_step(os1) == os2); - assert(ref_overlay.get_next_step(os2) == os3); - assert(ref_overlay.get_next_step(os3) == ref_overlay.path_end(p)); - assert(ref_overlay.get_previous_step(os1) == ref_overlay.path_front_end(p)); - assert(ref_overlay.get_previous_step(os2) == os1); - assert(ref_overlay.get_previous_step(os3) == os2); - - assert(ref_overlay.get_step_count(p) == 3); - - assert(ref_overlay.get_path_length(p) == 12); - - assert(ref_overlay.get_position_of_step(os1) == 0); - assert(ref_overlay.get_position_of_step(os2) == 4); - assert(ref_overlay.get_position_of_step(os3) == 6); - - for (size_t i = 0; i < 25; ++i) { - if (i < 4) { - assert(ref_overlay.get_step_at_position(p, i) == os1); - } - else if (i < 6) { - assert(ref_overlay.get_step_at_position(p, i) == os2); - } - else if (i < 12) { - assert(ref_overlay.get_step_at_position(p, i) == os3); - } - else { - assert(ref_overlay.get_step_at_position(p, i) == ref_overlay.path_end(p)); - } - } - - int count = 0; - ref_overlay.for_each_step_on_handle(h1, [&](const step_handle_t& s) { - assert(s == os1); - ++count; - }); - assert(count == 1); - count = 0; - ref_overlay.for_each_step_on_handle(h2, [&](const step_handle_t& s) { - assert(s == os2); - ++count; - }); - assert(count == 1); - count = 0; - ref_overlay.for_each_step_on_handle(h3, [&](const step_handle_t& s) { - ++count; - }); - assert(count == 0); - count = 0; - ref_overlay.for_each_step_on_handle(h4, [&](const step_handle_t& s) { - assert(s == os3); - ++count; - }); - assert(count == 1); + count2++; + }); + graph.follow_edges(h2, false, [&](const handle_t &other) { count3++; }); + graph.follow_edges(h2, true, [&](const handle_t &other) { + if (other == h1) { + found3 = true; + } else if (other == graph.flip(h1)) { + found4 = true; } - - random_device rd; - default_random_engine prng(12261988);//(rd()); - - uniform_int_distribution node_len_distr(1, 5); - - vector paths(1, p); - - paths.push_back(graph.create_path_handle(std::to_string(paths.size()))); - paths.push_back(graph.create_path_handle(std::to_string(paths.size()))); - - uniform_int_distribution path_distr(0, paths.size() - 1); - - std::vector handles; - - // add enough nodes to stress test the parallel code - for (size_t i = 0; i < 200000; ++i) { - auto p = paths[path_distr(prng)]; - string seq(node_len_distr(prng), 'A'); - auto h = graph.create_handle(seq); - handles.push_back(h); - if (graph.get_step_count(p) != 0) { - graph.create_edge(graph.get_handle_of_step(graph.path_back(p)), h); - } - graph.append_step(p, h); + count4++; + }); + assert(found1); + assert(found2); + assert(found3); + assert(found4); + assert(count1 == 1); + assert(count2 == 1); + assert(count3 == 0); + assert(count4 == 2); + found1 = found2 = found3 = found4 = found5 = found6 = false; + count1 = count2 = count3 = count4 = 0; + + // flip a node and check if the orientation is correct + h1 = graph.apply_orientation(graph.flip(h1)); + + graph.follow_edges(h1, false, [&](const handle_t &other) { + if (other == h2) { + found1 = true; + } + count1++; + }); + graph.follow_edges(h1, true, [&](const handle_t &other) { + if (other == graph.flip(h2)) { + found2 = true; + } + count2++; + }); + graph.follow_edges(h2, false, [&](const handle_t &other) { count3++; }); + graph.follow_edges(h2, true, [&](const handle_t &other) { + if (other == h1) { + found3 = true; + } else if (other == graph.flip(h1)) { + found4 = true; + } + count4++; + }); + assert(found1); + assert(found2); + assert(found3); + assert(found4); + assert(count1 == 1); + assert(count2 == 1); + assert(count3 == 0); + assert(count4 == 2); + found1 = found2 = found3 = found4 = found5 = found6 = false; + count1 = count2 = count3 = count4 = 0; + + // create a new edge + + graph.create_edge(h1, graph.flip(h2)); + + // check the topology + + graph.follow_edges(h1, false, [&](const handle_t &other) { + if (other == h2) { + found1 = true; + } else if (other == graph.flip(h2)) { + found2 = true; + } + count1++; + }); + graph.follow_edges(h1, true, [&](const handle_t &other) { + if (other == graph.flip(h2)) { + found3 = true; + } + count2++; + }); + graph.follow_edges(h2, false, [&](const handle_t &other) { + if (other == graph.flip(h1)) { + found4 = true; + } + count3++; + }); + graph.follow_edges(h2, true, [&](const handle_t &other) { + if (other == h1) { + found5 = true; + } else if (other == graph.flip(h1)) { + found6 = true; + } + count4++; + }); + assert(found1); + assert(found2); + assert(found3); + assert(found4); + assert(found5); + assert(found6); + assert(count1 == 2); + assert(count2 == 1); + assert(count3 == 1); + assert(count4 == 2); + found1 = found2 = found3 = found4 = found5 = found6 = false; + count1 = count2 = count3 = count4 = 0; + + // now another node and check to make sure that the edges are updated + // appropriately + + h2 = graph.apply_orientation(graph.flip(h2)); + + graph.follow_edges(h1, false, [&](const handle_t &other) { + if (other == h2) { + found1 = true; + } else if (other == graph.flip(h2)) { + found2 = true; } - - uniform_int_distribution handle_distr(0, handles.size() - 1); - - // add enough path steps that some nodes will have >= 3 path coverage - for (size_t i = 0; i < 100000; ++i) { - auto p = paths[path_distr(prng)]; - auto h1 = graph.get_handle_of_step(graph.path_back(p)); - auto h2 = handles[handle_distr(prng)]; - graph.create_edge(h1, h2); - graph.append_step(p, h2); + count1++; + }); + graph.follow_edges(h1, true, [&](const handle_t &other) { + if (other == h2) { + found3 = true; } - - { - ReferencePathOverlay ref_overlay(&graph); - - assert(ref_overlay.get_path_count() == paths.size()); - - std::unordered_map> steps_on_handle; - - ref_overlay.for_each_path_handle([&](const path_handle_t& path) { - size_t walked_len = 0; - for (auto s = ref_overlay.path_begin(path), end = ref_overlay.path_end(path); s != end; s = ref_overlay.get_next_step(s)) { - assert(ref_overlay.get_path_handle_of_step(s) == path); - assert(ref_overlay.get_position_of_step(s) == walked_len); - auto h = ref_overlay.get_handle_of_step(s); - size_t len = ref_overlay.get_length(h); - for (size_t i = 0; i < len; ++i) { - auto s2 = ref_overlay.get_step_at_position(path, walked_len + i); - assert(s2 == s); - } - steps_on_handle[h].push_back(s); - walked_len += len; - } - assert(ref_overlay.get_path_length(path) == walked_len); - }); - - ref_overlay.for_each_handle([&](const handle_t& handle) { -// std::cerr << "check handles on " << ref_overlay.get_id(handle) << '\n'; - auto& direct = steps_on_handle[handle]; - std::sort(direct.begin(), direct.end()); - vector indexed; - ref_overlay.for_each_step_on_handle(handle, [&](const step_handle_t& step) { - indexed.push_back(step); - }); - std::sort(indexed.begin(), indexed.end()); - if (direct != indexed) { - std::cerr << "error on node " << ref_overlay.get_id(handle) << '\n'; - std::cerr << "direct\n"; - for (auto s : direct) { - std::cerr << '\t' << handlegraph::as_integers(s)[0] << '\t' << handlegraph::as_integers(s)[1] << '\t' << handlegraph::as_integer(ref_overlay.get_path_handle_of_step(s)) << '\t' << ref_overlay.get_id(ref_overlay.get_handle_of_step(s)) << '\n'; - } - std::cerr << "indexed\n"; - for (auto s : indexed) { - std::cerr << '\t' << handlegraph::as_integers(s)[0] << '\t' << handlegraph::as_integers(s)[1] << '\t' << handlegraph::as_integer(ref_overlay.get_path_handle_of_step(s)) << '\t' << ref_overlay.get_id(ref_overlay.get_handle_of_step(s)) << '\n'; - } - } - assert(direct == indexed); - }); + count2++; + }); + graph.follow_edges(h2, false, [&](const handle_t &other) { + if (other == h1) { + found4 = true; + } else if (other == graph.flip(h1)) { + found5 = true; } + count3++; + }); + graph.follow_edges(h2, true, [&](const handle_t &other) { + if (other == h1) { + found6 = true; + } + count4++; + }); + assert(found1); + assert(found2); + assert(found3); + assert(found4); + assert(found5); + assert(found6); + assert(count1 == 2); + assert(count2 == 1); + assert(count3 == 2); + assert(count4 == 1); } - - cerr << "ReferencePathOverlay tests successful!" << endl; -} + } -void test_vectorizable_overlays() { - - vector implementations; + // another batch of tests involving divide handle and reversing + // self edges + { + vector implementations; HashGraph hg; implementations.push_back(&hg); PackedGraph pg; implementations.push_back(&pg); - + MappedPackedGraph mpg; implementations.push_back(&mpg); - - for (MutablePathDeletableHandleGraph* implementation : implementations) { - - MutablePathDeletableHandleGraph& graph = *implementation; - - handle_t h1 = graph.create_handle("AAA"); - handle_t h2 = graph.create_handle("A"); - handle_t h3 = graph.create_handle("T"); - handle_t h4 = graph.create_handle("AAAAA"); - - graph.create_edge(h1, h2); - graph.create_edge(h1, h3); - graph.create_edge(h2, h4); - graph.create_edge(h3, h4); - - path_handle_t p1 = graph.create_path_handle("p1"); - step_handle_t s1 = graph.append_step(p1, h1); - step_handle_t s2 = graph.append_step(p1, h2); - step_handle_t s3 = graph.append_step(p1, h4); - - - bdsg::VectorizableOverlay overlay(&graph); - - set edge_ranks; - size_t edge_count = 0; - graph.for_each_edge([&](edge_t edge) { - edge_ranks.insert(overlay.edge_index(edge)); - ++edge_count; - }); - // every edge gets a unique rank - assert(edge_ranks.size() == edge_count); + for (DeletableHandleGraph *implementation : implementations) { + DeletableHandleGraph &graph = *implementation; - size_t node_count = 0; - map pos_to_node; - graph.for_each_handle([&](handle_t handle) { - pos_to_node[overlay.node_vector_offset(graph.get_id(handle))] = graph.get_id(handle); - ++node_count; - }); + handle_t h1 = graph.create_handle("ATGAA"); + handle_t h2 = graph.create_handle("ATGAA"); + + graph.create_edge(h1, graph.flip(h1)); + graph.create_edge(graph.flip(h2), h2); + + auto parts1 = graph.divide_handle(h1, {2, 4}); + auto parts2 = graph.divide_handle(h2, {2, 4}); + + assert(parts1.size() == 3); + assert(parts2.size() == 3); + + assert(graph.has_edge(parts1[0], parts1[1])); + assert(graph.has_edge(parts1[1], parts1[2])); + assert(graph.has_edge(parts1[2], graph.flip(parts1[2]))); + + assert(graph.has_edge(parts2[0], parts2[1])); + assert(graph.has_edge(parts2[1], parts2[2])); + assert(graph.has_edge(graph.flip(parts2[0]), parts2[0])); + } + } + + // another batch of tests that deal with deleting after dividing + { + vector> + implementations; + + // Add implementations + + PackedGraph pg, pg2; + implementations.push_back(make_pair(&pg, &pg2)); + + HashGraph hg, hg2; + implementations.push_back(make_pair(&hg, &hg2)); - // every node gets a unique rank - assert(pos_to_node.size() == node_count); + MappedPackedGraph mpg, mpg2; + implementations.push_back(make_pair(&mpg, &mpg2)); - auto pni = pos_to_node.begin(); - auto next = pni; - for (++next; next != pos_to_node.end(); ++pni, ++next) { - assert(next->first - pni->first == graph.get_length(graph.get_handle(pni->second))); + // And test them + for (int imp = 0; imp < implementations.size(); ++imp) { + + for (bool backwards : {false, true}) { + + MutablePathDeletableHandleGraph *g = backwards + ? implementations[imp].first + : implementations[imp].second; + + assert(g->get_node_count() == 0); + + handle_t handle1 = g->create_handle("CAAATAAGGCTTGGAAATTTTCTGGAGTTCTA"); + handle_t handle2 = g->create_handle("TTATATTCCAACTCTCTG"); + path_handle_t path_handle = g->create_path_handle("x"); + g->create_edge(handle1, handle2); + + if (backwards) { + handle1 = g->flip(handle1); + handle2 = g->flip(handle2); + g->append_step(path_handle, handle2); + g->append_step(path_handle, handle1); + } else { + g->append_step(path_handle, handle1); + g->append_step(path_handle, handle2); + } + + auto parts1 = g->divide_handle(handle1, vector({2, 7, 22, 31})); + auto parts2 = g->divide_handle(handle2, vector({1, 5, 10})); + + vector steps; + g->for_each_step_in_path(path_handle, [&](step_handle_t step_handle) { + steps.push_back(g->get_handle_of_step(step_handle)); + }); + + assert(steps.size() == 9); + int i = 0; + vector to_delete; + g->append_step(g->create_path_handle(to_string(i)), steps[i]); + ++i; + to_delete.push_back(steps[i++]); + g->append_step(g->create_path_handle(to_string(i)), steps[i]); + ++i; + to_delete.push_back(steps[i++]); + to_delete.push_back(steps[i++]); + to_delete.push_back(steps[i++]); + g->append_step(g->create_path_handle(to_string(i)), steps[i]); + ++i; + to_delete.push_back(steps[i++]); + g->append_step(g->create_path_handle(to_string(i)), steps[i]); + ++i; + + g->destroy_path(path_handle); + + for (auto handle : to_delete) { + g->destroy_handle(handle); } - // check that node_at_vector_offset works - graph.for_each_handle([&](handle_t handle) { - size_t pos = overlay.node_vector_offset(graph.get_id(handle)); - for (size_t i = 0; i < graph.get_length(handle); ++i) { - assert(overlay.node_at_vector_offset(pos + i + 1) == graph.get_id(handle)); - } - }); + g->for_each_path_handle([&](const path_handle_t &p) { + g->for_each_step_in_path(p, [&](const step_handle_t &s) { + auto h = g->get_handle_of_step(s); + }); + }); + + assert(g->get_node_count() == 4); + assert(g->get_path_count() == 4); + } } - cerr << "VectorizableOverlay tests successful!" << endl; -} + } -void test_packed_subgraph_overlay() { - - vector implementations; + // another batch of tests that deal with deleting down to an empty graph + { + vector implementations; + + // Add implementations + + PackedGraph pg; + implementations.push_back(&pg); HashGraph hg; implementations.push_back(&hg); + MappedPackedGraph mpg; + implementations.push_back(&mpg); + + // And test them + for (int imp = 0; imp < implementations.size(); ++imp) { + + MutablePathDeletableHandleGraph *g = implementations[imp]; + + // the graph that i discovered the bug this tests for + vector>> graph_spec{ + {1, "C", {19}}, {2, "A", {4, 5}}, + {3, "G", {4, 5}}, {4, "T", {6, 16, 18}}, + {5, "C", {6, 16, 18}}, {6, "TTG", {7, 8}}, + {7, "A", {9}}, {8, "G", {9}}, + {9, "AAATT", {16}}, {10, "A", {12}}, + {11, "T", {12}}, {12, "ATAT", {13, 14}}, + {13, "A", {15}}, {14, "T", {15}}, + {15, "C", {20}}, {16, "TTCTGG", {17, 18}}, + {17, "AGT", {18}}, {18, "TCTAT", {10, 11}}, + {19, "AAATAAG", {2, 3}}, {20, "CAACTCTCTG", {}}, + }; + + for (auto rec : graph_spec) { + g->create_handle(get<1>(rec), get<0>(rec)); + } + for (auto rec : graph_spec) { + for (auto n : get<2>(rec)) { + g->create_edge(g->get_handle(get<0>(rec)), g->get_handle(n)); + } + } + + // a series of deletes that elicits the behavior + vector> delete_edges{ + {g->get_handle(10, 1), g->get_handle(18, 1)}, + {g->get_handle(3, 0), g->get_handle(5, 0)}, + {g->get_handle(4, 0), g->get_handle(6, 0)}, + {g->get_handle(6, 0), g->get_handle(7, 0)}, + {g->get_handle(2, 0), g->get_handle(5, 0)}, + {g->get_handle(7, 0), g->get_handle(9, 0)}, + {g->get_handle(16, 0), g->get_handle(17, 0)}, + {g->get_handle(12, 0), g->get_handle(14, 0)}, + {g->get_handle(9, 0), g->get_handle(16, 0)}, + {g->get_handle(11, 1), g->get_handle(18, 1)}, + {g->get_handle(6, 0), g->get_handle(8, 0)}, + {g->get_handle(12, 0), g->get_handle(13, 0)}, + {g->get_handle(5, 0), g->get_handle(16, 0)}, + {g->get_handle(4, 0), g->get_handle(16, 0)}, + {g->get_handle(16, 0), g->get_handle(18, 0)}, + {g->get_handle(5, 0), g->get_handle(6, 0)}, + {g->get_handle(3, 0), g->get_handle(4, 0)}, + {g->get_handle(8, 0), g->get_handle(9, 0)}, + {g->get_handle(2, 0), g->get_handle(4, 0)}}; + for (auto edge : delete_edges) { + g->destroy_edge(edge.first, edge.second); + } + g->destroy_handle(g->get_handle(16, 0)); + g->destroy_handle(g->get_handle(13, 0)); + g->destroy_handle(g->get_handle(15, 0)); + g->destroy_handle(g->get_handle(20, 0)); + g->destroy_handle(g->get_handle(14, 0)); + g->destroy_handle(g->get_handle(10, 0)); + g->destroy_handle(g->get_handle(12, 0)); + g->destroy_handle(g->get_handle(11, 0)); + g->destroy_handle(g->get_handle(9, 0)); + g->destroy_handle(g->get_handle(4, 0)); + g->destroy_handle(g->get_handle(7, 0)); + g->destroy_handle(g->get_handle(18, 0)); + g->destroy_handle(g->get_handle(5, 0)); + g->destroy_handle(g->get_handle(1, 0)); + g->destroy_handle(g->get_handle(8, 0)); + g->destroy_handle(g->get_handle(19, 0)); + g->destroy_handle(g->get_handle(3, 0)); + g->destroy_handle(g->get_handle(6, 0)); + g->destroy_handle(g->get_handle(17, 0)); + g->destroy_handle(g->get_handle(2, 0)); + + g->create_handle("GATTACA", 4); + assert(g->get_node_count() == 1); + } + } + + // Edge counts stay accurate after deleting nodes + { + vector implementations; + + // Add implementations + PackedGraph pg; implementations.push_back(&pg); - + + HashGraph hg; + implementations.push_back(&hg); + MappedPackedGraph mpg; implementations.push_back(&mpg); - - for (MutablePathDeletableHandleGraph* implementation : implementations) { - - MutablePathDeletableHandleGraph& graph = *implementation; - - handle_t h1 = graph.create_handle("AAA"); - handle_t h2 = graph.create_handle("A"); - handle_t h3 = graph.create_handle("T"); - handle_t h4 = graph.create_handle("AAAAA"); - - graph.create_edge(h1, h2); - graph.create_edge(h1, h3); - graph.create_edge(h2, h4); - graph.create_edge(h3, h4); - - PackedSubgraphOverlay subgraph(&graph); - assert(subgraph.get_node_count() == 0); - subgraph.for_each_handle([&](const handle_t& h) { - assert(false); - }); - assert(!subgraph.has_node(graph.get_id(h1))); - assert(!subgraph.has_node(graph.get_id(h2))); - assert(!subgraph.has_node(graph.get_id(h3))); - assert(!subgraph.has_node(graph.get_id(h4))); - - subgraph.add_node(h1); - - assert(subgraph.get_node_count() == 1); - bool found1 = false; - subgraph.for_each_handle([&](const handle_t& h) { - if (subgraph.get_id(h) == graph.get_id(h1)) { - found1 = true; - assert(graph.get_sequence(h) == graph.get_sequence(h1)); - } - else { - assert(false); - } - }); - assert(found1); - found1 = false; - - assert(subgraph.has_node(graph.get_id(h1))); - assert(!subgraph.has_node(graph.get_id(h2))); - assert(!subgraph.has_node(graph.get_id(h3))); - assert(!subgraph.has_node(graph.get_id(h4))); - - subgraph.follow_edges(h1, true, [&](const handle_t& h) { - assert(false); - }); - subgraph.follow_edges(h1, false, [&](const handle_t& h) { - assert(false); - }); - - assert(subgraph.get_degree(h1, true) == 0); - assert(subgraph.get_degree(h1, false) == 0); - - subgraph.add_node(h4); - - assert(subgraph.get_node_count() == 2); - bool found2 = false; - subgraph.for_each_handle([&](const handle_t& h) { - if (subgraph.get_id(h) == graph.get_id(h1)) { - found1 = true; - assert(graph.get_sequence(h) == graph.get_sequence(h1)); - } - else if (subgraph.get_id(h) == graph.get_id(h4)) { - found2 = true; - assert(graph.get_sequence(h) == graph.get_sequence(h4)); - } - else { - assert(false); - } - }); - assert(found1); - assert(found2); - found1 = false; - found2 = false; - - assert(subgraph.has_node(graph.get_id(h1))); - assert(!subgraph.has_node(graph.get_id(h2))); - assert(!subgraph.has_node(graph.get_id(h3))); - assert(subgraph.has_node(graph.get_id(h4))); - - subgraph.follow_edges(h1, true, [&](const handle_t& h) { - assert(false); - }); - subgraph.follow_edges(h1, false, [&](const handle_t& h) { - assert(false); - }); - subgraph.follow_edges(h4, true, [&](const handle_t& h) { - assert(false); - }); - subgraph.follow_edges(h4, false, [&](const handle_t& h) { - assert(false); - }); - - - assert(subgraph.get_degree(h1, true) == 0); - assert(subgraph.get_degree(h1, false) == 0); - assert(subgraph.get_degree(h4, true) == 0); - assert(subgraph.get_degree(h4, false) == 0); - - subgraph.add_node(graph.flip(h2)); - - assert(subgraph.get_node_count() == 3); - bool found3 = false; - subgraph.for_each_handle([&](const handle_t& h) { - if (subgraph.get_id(h) == graph.get_id(h1)) { - found1 = true; - assert(graph.get_sequence(h) == graph.get_sequence(h1)); - } - else if (subgraph.get_id(h) == graph.get_id(h2)) { - found2 = true; - assert(graph.get_sequence(h) == graph.get_sequence(h2)); - } - else if (subgraph.get_id(h) == graph.get_id(h4)) { - found3 = true; - assert(graph.get_sequence(h) == graph.get_sequence(h4)); - } - else { - assert(false); - } - }); - assert(found1); - assert(found2); - assert(found3); - found1 = false; - found2 = false; - found3 = false; - - assert(subgraph.has_node(graph.get_id(h1))); - assert(subgraph.has_node(graph.get_id(h2))); - assert(!subgraph.has_node(graph.get_id(h3))); - assert(subgraph.has_node(graph.get_id(h4))); - - bool found4 = false; - subgraph.follow_edges(h1, true, [&](const handle_t& h) { - assert(false); - }); - subgraph.follow_edges(h1, false, [&](const handle_t& h) { - if (subgraph.get_id(h) == graph.get_id(h2) && !graph.get_is_reverse(h)) { - found1 = true; - } - else { - assert(false); - } - }); - subgraph.follow_edges(h2, true, [&](const handle_t& h) { - if (subgraph.get_id(h) == graph.get_id(h1) && !graph.get_is_reverse(h)) { - found2 = true; - } - else { - assert(false); - } - }); - subgraph.follow_edges(h2, false, [&](const handle_t& h) { - if (subgraph.get_id(h) == graph.get_id(h4) && !graph.get_is_reverse(h)) { - found3 = true; - } - else { - assert(false); - } - }); - subgraph.follow_edges(h4, true, [&](const handle_t& h) { - if (subgraph.get_id(h) == graph.get_id(h2) && !graph.get_is_reverse(h)) { - found4 = true; - } - else { - assert(false); - } - }); - subgraph.follow_edges(h4, false, [&](const handle_t& h) { - assert(false); - }); - - assert(subgraph.get_degree(h1, true) == 0); - assert(subgraph.get_degree(h1, false) == 1); - assert(subgraph.get_degree(h2, true) == 1); - assert(subgraph.get_degree(h2, false) == 1); - assert(subgraph.get_degree(h4, true) == 1); - assert(subgraph.get_degree(h4, false) == 0); - - assert(found1); - assert(found2); - assert(found3); - assert(found4); - found1 = false; - found2 = false; - found3 = false; - found4 = false; - - subgraph.remove_node(h1); - - assert(subgraph.get_node_count() == 2); - subgraph.for_each_handle([&](const handle_t& h) { - if (subgraph.get_id(h) == graph.get_id(h2)) { - found1 = true; - assert(graph.get_sequence(h) == graph.get_sequence(h2)); - } - else if (subgraph.get_id(h) == graph.get_id(h4)) { - found2 = true; - assert(graph.get_sequence(h) == graph.get_sequence(h4)); - } - else { - assert(false); - } - }); - assert(found1); - assert(found2); - found1 = false; - found2 = false; - - assert(!subgraph.has_node(graph.get_id(h1))); - assert(subgraph.has_node(graph.get_id(h2))); - assert(!subgraph.has_node(graph.get_id(h3))); - assert(subgraph.has_node(graph.get_id(h4))); - - subgraph.follow_edges(h2, true, [&](const handle_t& h) { - assert(false); - }); - subgraph.follow_edges(h2, false, [&](const handle_t& h) { - if (subgraph.get_id(h) == graph.get_id(h4) && !graph.get_is_reverse(h)) { - found1 = true; - } - else { - assert(false); - } - }); - subgraph.follow_edges(h4, true, [&](const handle_t& h) { - if (subgraph.get_id(h) == graph.get_id(h2) && !graph.get_is_reverse(h)) { - found2 = true; - } - else { - assert(false); - } - }); - subgraph.follow_edges(h4, false, [&](const handle_t& h) { - assert(false); + + // note: not valid in graph with reversing self edges + auto count_edges = [&](const HandleGraph &g) { + int cnt = 0; + g.for_each_handle([&](const handle_t &h) { + for (bool r : {true, false}) { + g.follow_edges(h, r, [&](const handle_t &n) { ++cnt; }); + } + }); + assert(cnt % 2 == 0); + return cnt / 2; + }; + + // And test them + for (int imp = 0; imp < implementations.size(); ++imp) { + + MutablePathDeletableHandleGraph *graph = implementations[imp]; + + handle_t h1 = graph->create_handle("A"); + handle_t h2 = graph->create_handle("AAA"); + handle_t h3 = graph->create_handle("CC"); + handle_t h4 = graph->create_handle("G"); + handle_t h5 = graph->create_handle("T"); + handle_t h6 = graph->create_handle("T"); + handle_t h7 = graph->create_handle("TT"); + handle_t h8 = graph->create_handle("T"); + handle_t h9 = graph->create_handle("TTT"); + handle_t h10 = graph->create_handle("C"); + handle_t h11 = graph->create_handle("CC"); + handle_t h12 = graph->create_handle("A"); + handle_t h13 = graph->create_handle("AA"); + + graph->create_edge(h1, h2); + graph->create_edge(h2, h3); + graph->create_edge(h2, h4); + graph->create_edge(h3, h4); + graph->create_edge(h3, h5); + graph->create_edge(h5, h6); + graph->create_edge(h6, h7); + graph->create_edge(h7, h8); + graph->create_edge(h8, h9); + graph->create_edge(h9, h10); + graph->create_edge(h9, h12); + graph->create_edge(h10, h11); + graph->create_edge(h11, h12); + graph->create_edge(h12, h13); + graph->create_edge(h5, h7); + graph->create_edge(h5, h11); + graph->create_edge(h7, h13); + graph->create_edge(h8, h12); + + graph->destroy_handle(h1); + assert(graph->get_edge_count() == count_edges(*graph)); + graph->destroy_handle(h6); + assert(graph->get_edge_count() == count_edges(*graph)); + graph->destroy_handle(h9); + assert(graph->get_edge_count() == count_edges(*graph)); + graph->destroy_handle(h10); + assert(graph->get_edge_count() == count_edges(*graph)); + } + } + + // batch deletion of paths works as expected + { + vector implementations; + + // Add implementations + + PackedGraph pg; + implementations.push_back(&pg); + + HashGraph hg; + implementations.push_back(&hg); + + MappedPackedGraph mpg; + implementations.push_back(&mpg); + + for (int imp = 0; imp < implementations.size(); ++imp) { + + MutablePathDeletableHandleGraph &graph = *implementations[imp]; + + auto h1 = graph.create_handle("A"); + auto h2 = graph.create_handle("A"); + auto h3 = graph.create_handle("A"); + + graph.create_edge(h1, h2); + graph.create_edge(h2, h3); + + auto p1 = graph.create_path_handle("1"); + auto p2 = graph.create_path_handle("2"); + auto p3 = graph.create_path_handle("3"); + auto p4 = graph.create_path_handle("4"); + auto p5 = graph.create_path_handle("5"); + + for (const auto &p : {p1, p2, p3, p4, p5}) { + for (auto h : {h1, h2, h3}) { + graph.append_step(p, h); + } + } + + graph.destroy_paths({p1, p3, p4}); + + set paths_seen; + set paths_expected{p2, p5}; + graph.for_each_path_handle([&](const path_handle_t &path) { + assert(!paths_seen.count(path)); + paths_seen.insert(path); + std::vector handles; + std::vector handles_expected{h1, h2, h3}; + for (auto h : graph.scan_path(path)) { + handles.push_back(h); + } + assert(handles == handles_expected); + }); + + assert(paths_seen == paths_expected); + + graph.for_each_handle([&](const handle_t &h) { + set paths; + graph.for_each_step_on_handle(h, [&](const step_handle_t &step) { + auto p = graph.get_path_handle_of_step(step); + assert(!paths.count(p)); + paths.insert(p); }); - - assert(subgraph.get_degree(h2, true) == 0); - assert(subgraph.get_degree(h2, false) == 1); - assert(subgraph.get_degree(h4, true) == 1); - assert(subgraph.get_degree(h4, false) == 0); - - assert(found1); - assert(found2); - found1 = false; - found2 = false; + assert(paths_seen == paths_expected); + }); } - - cerr << "PackedSubgraphOverlay tests successful!" << endl; + } + + cerr << "DeletableHandleGraph tests successful!" << endl; } -void test_mapped_packed_graph() { - auto check_graph = [](const MappedPackedGraph& mpg) { - // Dump it into this map - unordered_map graph_contents; - mpg.for_each_handle([&](const handle_t& h) { - graph_contents[mpg.get_id(h)] = mpg.get_sequence(h); - }); - - // Make sure it has the right things - assert(graph_contents.at(1) == "GATTACA"); - assert(graph_contents.at(2) == "CATTAG"); +void test_mutable_path_handle_graphs() { + + vector implementations; + + PackedGraph pg; + implementations.push_back(&pg); + + HashGraph hg; + implementations.push_back(&hg); + + MappedPackedGraph mpg; + implementations.push_back(&mpg); + + for (MutablePathDeletableHandleGraph *implementation : implementations) { + + auto check_path = [&](MutablePathDeletableHandleGraph &graph, + const path_handle_t &p, + const vector &steps) { + assert(graph.get_step_count(p) == steps.size()); + + // Make sure steps connect back to the path + step_handle_t begin_step = graph.path_begin(p); + step_handle_t end_step = graph.path_end(p); + assert(graph.get_path_handle_of_step(begin_step) == p); + assert(graph.get_path_handle_of_step(end_step) == p); + + step_handle_t step = graph.path_begin(p); + for (int i = 0; i < steps.size(); i++) { + auto here = graph.get_handle_of_step(step); + assert(graph.get_path_handle_of_step(step) == p); + assert(graph.get_handle_of_step(step) == steps[i]); + + if (graph.get_is_circular(p)) { + assert(graph.has_next_step(step)); + assert(graph.has_previous_step(step)); + } else { + assert(graph.has_next_step(step) == i + 1 < steps.size()); + assert(graph.has_previous_step(step) == i > 0); + } + + step = graph.get_next_step(step); + } + + if (graph.get_is_circular(p) && !graph.is_empty(p)) { + assert(step == graph.path_begin(p)); + } else { + assert(step == graph.path_end(p)); + } + + step = graph.path_back(p); + + for (int i = steps.size() - 1; i >= 0; i--) { + + assert(graph.get_path_handle_of_step(step) == p); + assert(graph.get_handle_of_step(step) == steps[i]); + + if (graph.get_is_circular(p)) { + assert(graph.has_next_step(step)); + assert(graph.has_previous_step(step)); + } else { + assert(graph.has_next_step(step) == i + 1 < steps.size()); + assert(graph.has_previous_step(step) == i > 0); + } + + step = graph.get_previous_step(step); + } + + if (graph.get_is_circular(p) && !graph.is_empty(p)) { + assert(step == graph.path_back(p)); + } else { + assert(step == graph.path_front_end(p)); + } }; - char filename[] = "tmpXXXXXX"; - int fd = mkstemp(filename); - assert(fd != -1); + auto check_flips = [&](MutablePathDeletableHandleGraph &graph, + const path_handle_t &p, + const vector &steps) { + auto flipped = steps; + for (size_t i = 0; i < steps.size(); i++) { + graph.apply_orientation(graph.flip(graph.forward(flipped[i]))); + flipped[i] = graph.flip(flipped[i]); + check_path(graph, p, flipped); + + graph.apply_orientation(graph.flip(graph.forward(flipped[i]))); + flipped[i] = graph.flip(flipped[i]); + check_path(graph, p, flipped); + } + }; + + MutablePathDeletableHandleGraph &graph = *implementation; + + handle_t h1 = graph.create_handle("AC"); + handle_t h2 = graph.create_handle("CAGTGA"); + handle_t h3 = graph.create_handle("GT"); + + graph.create_edge(h1, h2); + graph.create_edge(h2, h3); + graph.create_edge(h1, graph.flip(h2)); + graph.create_edge(graph.flip(h2), h3); + + assert(!graph.has_path("1")); + assert(graph.get_path_count() == 0); + + path_handle_t p1 = graph.create_path_handle("1"); + + assert(graph.has_path("1")); + assert(graph.get_path_count() == 1); + assert(graph.get_path_handle("1") == p1); + assert(graph.get_path_name(p1) == "1"); + assert(graph.get_step_count(p1) == 0); + assert(graph.is_empty(p1)); + + graph.append_step(p1, h1); + + assert(graph.get_step_count(p1) == 1); + assert(!graph.is_empty(p1)); + + graph.append_step(p1, h2); + graph.append_step(p1, h3); + + assert(graph.get_step_count(p1) == 3); + + // graph can traverse a path + check_path(graph, p1, {h1, h2, h3}); + + // graph preserves paths when reversing nodes + check_flips(graph, p1, {h1, h2, h3}); + + // make a circular path + path_handle_t p2 = graph.create_path_handle("2", true); + assert(graph.get_path_count() == 2); + + graph.append_step(p2, h1); + graph.append_step(p2, graph.flip(h2)); + graph.append_step(p2, h3); + + check_path(graph, p2, {h1, graph.flip(h2), h3}); + + // graph can query steps of a node on paths + + bool found1 = false, found2 = false; + vector steps = graph.steps_of_handle(h1); + for (auto &step : steps) { + if (graph.get_path_handle_of_step(step) == p1 && + graph.get_handle_of_step(step) == h1) { + found1 = true; + } else if (graph.get_path_handle_of_step(step) == p2 && + graph.get_handle_of_step(step) == h1) { + found2 = true; + } else { + assert(false); + } + } + assert(found1); + assert(found2); + found1 = found2 = false; + + steps = graph.steps_of_handle(h1, true); + for (auto &step : steps) { + if (graph.get_path_handle_of_step(step) == p1 && + graph.get_handle_of_step(step) == h1) { + found1 = true; + } else if (graph.get_path_handle_of_step(step) == p2 && + graph.get_handle_of_step(step) == h1) { + found2 = true; + } else { + assert(false); + } + } + assert(found1); + assert(found2); + found1 = found2 = false; + + steps = graph.steps_of_handle(graph.flip(h1), true); + for (auto &step : steps) { + assert(false); + } + + steps = graph.steps_of_handle(h2, true); + for (auto &step : steps) { + if (graph.get_path_handle_of_step(step) == p1 && + graph.get_handle_of_step(step) == h2) { + found1 = true; + } else { + assert(false); + } + } + steps = graph.steps_of_handle(graph.flip(h2), true); + for (auto &step : steps) { + if (graph.get_path_handle_of_step(step) == p2 && + graph.get_handle_of_step(step) == graph.flip(h2)) { + found2 = true; + } else { + assert(false); + } + } + assert(found1); + assert(found2); + found1 = found2 = false; + + vector segments = graph.divide_handle(h2, {size_t(2), size_t(4)}); + + // graph preserves paths when dividing nodes + + check_path(graph, p1, {h1, segments[0], segments[1], segments[2], h3}); + check_path(graph, p2, + {h1, graph.flip(segments[2]), graph.flip(segments[1]), + graph.flip(segments[0]), h3}); + + path_handle_t p3 = graph.create_path_handle("3"); + graph.append_step(p3, h1); + graph.append_step(p3, segments[0]); + + assert(graph.has_path("3")); + assert(graph.get_path_count() == 3); + + // graph can toggle circularity + + graph.for_each_path_handle([&](const path_handle_t &p) { + vector steps; + + for (handle_t h : graph.scan_path(p)) { + steps.push_back(h); + } + + bool starting_circularity = graph.get_is_circular(p); + + // make every transition occur + for (bool circularity : {true, true, false, false, true}) { + graph.set_circularity(p, circularity); + assert(graph.get_is_circular(p) == circularity); + check_path(graph, p, steps); + } + + graph.set_circularity(p, starting_circularity); + }); + + // graph can destroy paths + + graph.destroy_path(p3); + + assert(!graph.has_path("3")); + assert(graph.get_path_count() == 2); + + bool found3 = false; + + graph.for_each_path_handle([&](const path_handle_t &p) { + if (graph.get_path_name(p) == "1") { + found1 = true; + } else if (graph.get_path_name(p) == "2") { + found2 = true; + } else if (graph.get_path_name(p) == "3") { + found3 = true; + } else { + assert(false); + } + }); + + assert(found1); + assert(found2); + assert(!found3); + + // check flips to see if membership records are still functional + check_flips(graph, p1, {h1, segments[0], segments[1], segments[2], h3}); + check_flips(graph, p2, + {h1, graph.flip(segments[2]), graph.flip(segments[1]), + graph.flip(segments[0]), h3}); + + graph.destroy_path(p1); + + assert(!graph.has_path("1")); + assert(graph.get_path_count() == 1); + + found1 = found2 = found3 = false; + + graph.for_each_path_handle([&](const path_handle_t &p) { + if (graph.get_path_name(p) == "1") { + found1 = true; + } else if (graph.get_path_name(p) == "2") { + found2 = true; + } else if (graph.get_path_name(p) == "3") { + found3 = true; + } else { + assert(false); + } + }); + + assert(!found1); + assert(found2); + assert(!found3); + + // check flips to see if membership records are still functional + check_flips(graph, p2, + {h1, graph.flip(segments[2]), graph.flip(segments[1]), + graph.flip(segments[0]), h3}); + + // make a path to rewrite + path_handle_t p4 = graph.create_path_handle("4"); + graph.prepend_step(p4, h3); + graph.prepend_step(p4, segments[2]); + graph.prepend_step(p4, segments[1]); + graph.prepend_step(p4, segments[0]); + graph.prepend_step(p4, h1); + + check_flips(graph, p4, {h1, segments[0], segments[1], segments[2], h3}); + + auto check_rewritten_segment = + [&](const pair &new_segment, + const vector &steps) { + int i = 0; + for (auto step = new_segment.first; step != new_segment.second; + step = graph.get_next_step(step)) { + assert(graph.get_handle_of_step(step) == steps[i]); + i++; + } + assert(i == steps.size()); + }; + + // rewrite the middle portion of a path + + step_handle_t s1 = graph.get_next_step(graph.path_begin(p4)); + step_handle_t s2 = + graph.get_next_step(graph.get_next_step(graph.get_next_step(s1))); + + auto new_segment = + graph.rewrite_segment(s1, s2, + {graph.flip(segments[2]), graph.flip(segments[1]), + graph.flip(segments[0])}); + + check_flips(graph, p4, + {h1, graph.flip(segments[2]), graph.flip(segments[1]), + graph.flip(segments[0]), h3}); + check_rewritten_segment(new_segment, + {graph.flip(segments[2]), graph.flip(segments[1]), + graph.flip(segments[0])}); + + // rewrite around the end of a circular path to delete + + graph.create_edge(h3, h1); + graph.create_edge(segments[2], segments[0]); + graph.set_circularity(p4, true); + + s1 = graph.get_previous_step(graph.path_begin(p4)); + s2 = graph.get_next_step(graph.path_begin(p4)); + assert(s2 != graph.path_end(p4)); + + new_segment = graph.rewrite_segment(s1, s2, vector()); + // The end we get should be the same as the end we sent, since it is + // exclusive + assert(new_segment.second == s2); + + check_flips(graph, p4, + {graph.flip(segments[2]), graph.flip(segments[1]), + graph.flip(segments[0])}); + check_rewritten_segment(new_segment, vector()); + + // add into an empty slot + + new_segment = graph.rewrite_segment(new_segment.first, new_segment.second, + {graph.flip(h1), graph.flip(h3)}); + + check_flips(graph, p4, + {graph.flip(h1), graph.flip(h3), graph.flip(segments[2]), + graph.flip(segments[1]), graph.flip(segments[0])}); + check_rewritten_segment(new_segment, {graph.flip(h1), graph.flip(h3)}); + } + + { + vector< + pair> + implementations; + + // Add implementations + + HashGraph hg, hg2; + implementations.push_back(make_pair(&hg, &hg2)); + + PackedGraph pg, pg2; + implementations.push_back(make_pair(&pg, &pg2)); + + MappedPackedGraph mpg, mpg2; + implementations.push_back(make_pair(&mpg, &mpg2)); + + // And test them + for (int imp = 0; imp < implementations.size(); ++imp) { + for (bool backwards : {false, true}) { + + MutablePathMutableHandleGraph *g = backwards + ? implementations[imp].first + : implementations[imp].second; + + assert(g->get_node_count() == 0); + + handle_t handle = g->create_handle("TTATATTCCAACTCTCTG"); + if (backwards) { + handle = g->flip(handle); + } + path_handle_t path_handle = g->create_path_handle("Path"); + g->append_step(path_handle, handle); + string seq = g->get_sequence(handle); + vector true_parts = {seq.substr(0, 1), seq.substr(1, 4), + seq.substr(5, 5), seq.substr(10)}; + + // Should get (C,AGAG,AGTTG,GAATATAA) (forward) + // Should get (T,TATA,TTCCA,ACTCTCTG) (reverse) + auto parts = g->divide_handle(handle, {1, 5, 10}); + assert(parts.size() == true_parts.size()); + for (int i = 0; i < parts.size(); ++i) { + assert(g->get_sequence(parts[i]) == true_parts[i]); + assert(g->get_is_reverse(parts[i]) == backwards); + } + + vector steps; + g->for_each_step_in_path(path_handle, [&](step_handle_t step_handle) { + steps.push_back(g->get_handle_of_step(step_handle)); + }); + assert(steps.size() == true_parts.size()); + for (int i = 0; i < parts.size(); ++i) { + assert(g->get_sequence(steps[i]) == true_parts[i]); + assert(g->get_is_reverse(steps[i]) == backwards); + } + } + } + } + + cerr << "MutablePathDeletableHandleGraph tests successful!" << endl; +} + +template void test_packed_vector() { + enum vec_op_t { SET = 0, GET = 1, APPEND = 2, POP = 3, SERIALIZE = 4 }; + + random_device rd; + default_random_engine prng(rd()); + uniform_int_distribution op_distr(0, 4); + + int num_runs = 1000; + int num_ops = 200; + int gets_per_op = 5; + int sets_per_op = 5; + int appends_per_op = 3; + int pops_per_op = 1; + + for (size_t i = 0; i < num_runs; i++) { + + uint64_t next_val = 0; + + vector std_vec; + PackedVectorImpl dyn_vec; + + for (size_t j = 0; j < num_ops; j++) { + + vec_op_t op = (vec_op_t)op_distr(prng); + switch (op) { + case SET: + if (!std_vec.empty()) { + for (size_t k = 0; k < sets_per_op; k++) { + size_t idx = prng() % dyn_vec.size(); + std_vec[idx] = next_val; + dyn_vec.set(idx, next_val); + next_val++; + } + } + + break; + + case GET: + if (!std_vec.empty()) { + for (size_t k = 0; k < gets_per_op; k++) { + size_t idx = prng() % dyn_vec.size(); + assert(std_vec[idx] == dyn_vec.get(idx)); + next_val++; + } + } + + break; + + case APPEND: + for (size_t k = 0; k < appends_per_op; k++) { + std_vec.push_back(next_val); + dyn_vec.push_back(next_val); + next_val++; + } + + break; + + case POP: + if (!std_vec.empty()) { + for (size_t k = 0; k < pops_per_op; k++) { + std_vec.pop_back(); + dyn_vec.pop_back(); + } + } + + break; + + case SERIALIZE: { + stringstream strm; + + dyn_vec.serialize(strm); + strm.seekg(0); + PackedVectorImpl copy_vec(strm); + + assert(copy_vec.size() == dyn_vec.size()); + for (size_t i = 0; i < copy_vec.size(); i++) { + assert(copy_vec.get(i) == dyn_vec.get(i)); + } + break; + } + + default: + break; + } + + assert(std_vec.empty() == dyn_vec.empty()); + assert(std_vec.size() == dyn_vec.size()); + } + } + cerr << "PackedVector (" << typeid(PackedVectorImpl).name() + << ") tests successful!" << endl; +} + +/** + * Generic iterator test function that works with any vector-like container + * (PackedVector, PagedVector, RobustPagedVector, PackedDeque). + * + * Tests ForwardIterator, BidirectionalIterator, RandomAccessIterator, and + * iterator order comparison, but not OutputIterator. + */ +template void test_iterators() { + // ForwardIterator tests + + // Empty iteration + { + VectorLike vec; + assert(vec.begin() == vec.end()); + + size_t count = 0; + for (auto it = vec.begin(); it != vec.end(); ++it) { + count++; + } + assert(count == 0); + } + + // Single element + { + VectorLike vec; + vec.push_back(42); + + assert(vec.begin() != vec.end()); + + auto it = vec.begin(); + assert(*it == 42); + ++it; + assert(it == vec.end()); + } + + // Multiple elements - basic iteration + { + VectorLike vec; + vector expected = {10, 20, 30, 40, 50}; + + for (auto val : expected) { + vec.push_back(val); + } + + // Iterate and compare + size_t idx = 0; + for (auto it = vec.begin(); it != vec.end(); ++it) { + assert(idx < expected.size()); + assert(*it == expected[idx]); + idx++; + } + assert(idx == expected.size()); + } + + // Range-based for loop + { + VectorLike vec; + vector expected = {100, 200, 300, 400, 500, 600, 700, 800}; + + for (auto val : expected) { + vec.push_back(val); + } + + size_t idx = 0; + for (auto val : vec) { + assert(idx < expected.size()); + assert(val == expected[idx]); + idx++; + } + assert(idx == expected.size()); + } + + // Iterator equality and inequality + { + VectorLike vec; + vec.push_back(1); + vec.push_back(2); + vec.push_back(3); + + auto it1 = vec.begin(); + auto it2 = vec.begin(); + assert(it1 == it2); + + ++it2; + assert(it1 != it2); + + ++it1; + assert(it1 == it2); + } + + // std::distance compatibility + { + VectorLike vec; + for (size_t i = 0; i < 15; i++) { + vec.push_back(i); + } + + auto dist = std::distance(vec.begin(), vec.end()); + assert((size_t)dist == vec.size()); + assert((size_t)dist == 15); + } + + // std::find compatibility + { + VectorLike vec; + vec.push_back(10); + vec.push_back(20); + vec.push_back(30); + vec.push_back(40); + vec.push_back(50); + + auto it = std::find(vec.begin(), vec.end(), 30); + assert(it != vec.end()); + assert(*it == 30); + + auto it2 = std::find(vec.begin(), vec.end(), 999); + assert(it2 == vec.end()); + } + + // Const iterator + { + VectorLike vec; + vec.push_back(5); + vec.push_back(15); + vec.push_back(25); + + const VectorLike &const_vec = vec; + + size_t count = 0; + for (auto it = const_vec.begin(); it != const_vec.end(); ++it) { + count++; + } + assert(count == 3); + + auto it = const_vec.begin(); + assert(*it == 5); + ++it; + assert(*it == 15); + ++it; + assert(*it == 25); + } + + // Large container with various patterns + { + VectorLike vec; + random_device rd; + default_random_engine prng(rd()); + uniform_int_distribution val_distr(0, 10000); + + vector expected; + size_t num_elements = 200; + + for (size_t i = 0; i < num_elements; i++) { + uint64_t val = val_distr(prng); + expected.push_back(val); + vec.push_back(val); + } + + size_t idx = 0; + for (auto val : vec) { + assert(val == expected[idx]); + idx++; + } + assert(idx == expected.size()); + } + + // Iteration after modification + { + VectorLike vec; + vec.push_back(1); + vec.push_back(2); + vec.push_back(3); + + // First iteration + size_t count = 0; + for (auto it = vec.begin(); it != vec.end(); ++it) { + count++; + } + assert(count == 3); + + // Modify + vec.push_back(4); + vec.set(0, 100); + + // Second iteration + vector expected = {100, 2, 3, 4}; + size_t idx = 0; + for (auto val : vec) { + assert(val == expected[idx]); + idx++; + } + assert(idx == 4); + } + + // Iterator copy construction + { + VectorLike vec; + vec.push_back(10); + vec.push_back(20); + + auto it1 = vec.begin(); + auto it2(it1); // Copy constructor + + assert(it1 == it2); + assert(*it1 == *it2); + assert(*it1 == 10); + } + + // Iterator assignment + { + VectorLike vec; + vec.push_back(10); + vec.push_back(20); + vec.push_back(30); + + auto it1 = vec.begin(); + auto it2 = vec.begin(); + ++it2; + + assert(*it1 == 10); + assert(*it2 == 20); + + it1 = it2; // Assignment + assert(it1 == it2); + assert(*it1 == 20); + } + + // BidirectionalIterator tests. + { + VectorLike vec; + vec.push_back(10); + vec.push_back(20); + vec.push_back(30); + + auto it1 = vec.begin(); + auto it2 = it1; + ++it2; + auto also_decremented = --it2; + + assert(it2 == it1); + assert(also_decremented == it1); + + it2++; + auto not_decremented = it2--; + + assert(it2 == it1); + assert(not_decremented != it1); + assert(*not_decremented == 20); + + auto it3 = vec.end(); + it3--; + assert(it3 != vec.end()); + assert(*it3 == 30); + } + + // RandomAccessIterator tests + { + VectorLike vec; + vec.push_back(10); + vec.push_back(20); + vec.push_back(30); + + auto it1 = vec.begin(); + auto it2 = it1; + + it1 += 1; + assert(*it1 == 20); + + it1 += 2; + assert(it1 == vec.end()); + + it1 -= 1; + auto it3 = it2 + 2; + assert(it1 == it3); + assert(*it1 == 30); + assert(it2 == vec.begin()); + + auto it4 = it1 - 2; + assert(*it4 == 10); + + assert(*it1 == vec.begin()[2]); + assert(*it4 == vec.begin()[0]); + assert(it4[2] == *it1); + assert(it1[-2] == *it4); + + assert(it1 + -2 == it4); + assert(it4 - -2 == it1); + + it1 += -2; + assert(it1 == it4); + + it1 -= -1; + it4++; + assert(it1 == it4); + } + + // Iterator comparison tests + { + VectorLike vec; + vec.push_back(10); + vec.push_back(20); + vec.push_back(30); + + auto it1 = vec.begin(); + auto it2 = it1; + + assert(it1 >= it2); + assert(it1 <= it2); + assert(!(it1 < it2)); + assert(!(it1 > it2)); + it1++; + + assert(it1 >= it2); + assert(!(it1 <= it2)); + assert(!(it2 >= it1)); + assert(it2 <= it1); + assert(!(it1 < it2)); + assert(it1 > it2); + assert(it2 < it1); + assert(!(it2 > it1)); + } + + // Iterator distance tests + { + VectorLike vec; + vec.push_back(10); + vec.push_back(20); + vec.push_back(30); + + assert(vec.end() - vec.begin() == vec.size()); + + auto it1 = vec.begin(); + auto it2 = it1; + + it1 += 1; + it2 += 2; + + assert(it2 - it1 == 1); + assert(it1 - it2 == -1); + + it1--; + assert(it2 - it1 == 2); + assert(it1 - it2 == -2); + } + + cerr << "Iterator (" << typeid(typename VectorLike::iterator).name() + << ") tests successful!" << endl; +} + +template void test_paged_vector() { + enum vec_op_t { SET = 0, GET = 1, APPEND = 2, POP = 3, SERIALIZE = 4 }; + std::random_device rd; + std::default_random_engine prng(rd()); + std::uniform_int_distribution op_distr(0, 4); + std::uniform_int_distribution val_distr(0, 100); + + int num_runs = 200; + int num_ops = 200; + int gets_per_op = 5; + int sets_per_op = 5; + int appends_per_op = 3; + int pops_per_op = 1; + + for (size_t i = 0; i < num_runs; i++) { + + uint64_t next_val = val_distr(prng); + + std::vector std_vec; + PagedVectorImpl dyn_vec; + + for (size_t j = 0; j < num_ops; j++) { + + vec_op_t op = (vec_op_t)op_distr(prng); + switch (op) { + case SET: + if (!std_vec.empty()) { + for (size_t k = 0; k < sets_per_op; k++) { + size_t idx = prng() % dyn_vec.size(); + std_vec[idx] = next_val; + dyn_vec.set(idx, next_val); + next_val = val_distr(prng); + } + } + + break; + + case GET: + if (!std_vec.empty()) { + for (size_t k = 0; k < gets_per_op; k++) { + size_t idx = prng() % dyn_vec.size(); + assert(std_vec[idx] == dyn_vec.get(idx)); + next_val = val_distr(prng); + } + } + + break; + + case APPEND: + for (size_t k = 0; k < appends_per_op; k++) { + std_vec.push_back(next_val); + dyn_vec.push_back(next_val); + next_val = val_distr(prng); + } + + break; + + case POP: + if (!std_vec.empty()) { + for (size_t k = 0; k < pops_per_op; k++) { + std_vec.pop_back(); + dyn_vec.pop_back(); + } + } + + break; + + case SERIALIZE: { + stringstream strm; + + dyn_vec.serialize(strm); + strm.seekg(0); + PagedVectorImpl copy_vec(strm); + + assert(copy_vec.size() == dyn_vec.size()); + for (size_t i = 0; i < copy_vec.size(); i++) { + assert(copy_vec.get(i) == dyn_vec.get(i)); + } + break; + } + + default: + break; + } + + assert(std_vec.empty() == dyn_vec.empty()); + assert(std_vec.size() == dyn_vec.size()); + } + } + cerr << "PagedVector (" << typeid(PagedVectorImpl).name() + << ") tests successful!" << endl; +} + +void test_packed_deque() { + enum deque_op_t { + SET = 0, + GET = 1, + APPEND_LEFT = 2, + POP_LEFT = 3, + APPEND_RIGHT = 4, + POP_RIGHT = 5, + SERIALIZE = 6 + }; + std::random_device rd; + std::default_random_engine prng(rd()); + std::uniform_int_distribution op_distr(0, 6); + + int num_runs = 1000; + int num_ops = 200; + int gets_per_op = 5; + int sets_per_op = 5; + int appends_per_op = 3; + int pops_per_op = 1; + + for (size_t i = 0; i < num_runs; i++) { + + uint64_t next_val = 0; + + std::deque std_deq; + PackedDeque<> suc_deq; + + for (size_t j = 0; j < num_ops; j++) { + + deque_op_t op = (deque_op_t)op_distr(prng); + switch (op) { + case SET: + if (!std_deq.empty()) { + for (size_t k = 0; k < sets_per_op; k++) { + size_t idx = prng() % std_deq.size(); + std_deq[idx] = next_val; + suc_deq.set(idx, next_val); + next_val++; + } + } + + break; + + case GET: + if (!std_deq.empty()) { + for (size_t k = 0; k < gets_per_op; k++) { + size_t idx = prng() % std_deq.size(); + assert(std_deq[idx] == suc_deq.get(idx)); + next_val++; + } + } + + break; + + case APPEND_LEFT: + for (size_t k = 0; k < appends_per_op; k++) { + std_deq.push_front(next_val); + suc_deq.push_front(next_val); + next_val++; + } + + break; + + case POP_LEFT: + for (size_t k = 0; k < pops_per_op && !std_deq.empty(); k++) { + std_deq.pop_front(); + suc_deq.pop_front(); + } + + break; + + case APPEND_RIGHT: + for (size_t k = 0; k < appends_per_op; k++) { + std_deq.push_back(next_val); + suc_deq.push_back(next_val); + next_val++; + } + + break; + + case POP_RIGHT: + for (size_t k = 0; k < pops_per_op && !std_deq.empty(); k++) { + std_deq.pop_back(); + suc_deq.pop_back(); + } + + break; + + case SERIALIZE: { + stringstream strm; + + suc_deq.serialize(strm); + strm.seekg(0); + PackedDeque<> copy_deq(strm); + + assert(copy_deq.size() == suc_deq.size()); + for (size_t i = 0; i < copy_deq.size(); i++) { + assert(copy_deq.get(i) == suc_deq.get(i)); + } + break; + } + + default: + break; + } + + assert(std_deq.empty() == suc_deq.empty()); + assert(std_deq.size() == suc_deq.size()); + } + } + cerr << "PackedDeque tests successful!" << endl; +} + +void test_packed_set() { + enum set_op_t { INSERT = 0, REMOVE = 1, FIND = 2 }; + + random_device rd; + default_random_engine prng(rd()); + uniform_int_distribution op_distr(0, 2); + + int num_runs = 1000; + int num_ops = 200; + int inserts_per_op = 2; + int prev_inserts_per_op = 1; + int removes_per_op = 1; + int finds_per_op = 5; + + for (size_t i = 0; i < num_runs; i++) { + uint64_t next_val = 0; + + unordered_set std_set; + PackedSet<> packed_set; + + for (size_t j = 0; j < num_ops; j++) { + set_op_t op = (set_op_t)op_distr(prng); + switch (op) { + case INSERT: + + for (size_t k = 0; k < inserts_per_op; ++k) { + packed_set.insert(next_val); + std_set.insert(next_val); + next_val++; + } + for (size_t k = 0; k < prev_inserts_per_op; ++k) { + uint64_t val = prng() % next_val; + packed_set.insert(val); + std_set.insert(val); + } + + break; + + case REMOVE: + if (next_val > 0) { + for (size_t k = 0; k < removes_per_op; ++k) { + uint64_t val = prng() % next_val; + packed_set.remove(val); + std_set.erase(val); + } + } else { + packed_set.remove(0); + packed_set.remove(1); + packed_set.remove(2); + std_set.erase(0); + std_set.erase(1); + std_set.erase(2); + } + + break; + + case FIND: + if (next_val) { + for (size_t k = 0; k < finds_per_op; k++) { + uint64_t val = prng() % next_val; + assert(packed_set.find(val) == (bool)std_set.count(val)); + } + } else { + assert(packed_set.find(0) == (bool)std_set.count(0)); + assert(packed_set.find(1) == (bool)std_set.count(1)); + assert(packed_set.find(2) == (bool)std_set.count(2)); + } + + break; + + // case SERIALIZE: + // { + // stringstream strm; + // + // dyn_vec.serialize(strm); + // strm.seekg(0); + // PackedVector<> copy_vec(strm); + // + // assert(copy_vec.size() == dyn_vec.size()); + // for (size_t i = 0; i < copy_vec.size(); i++) { + // assert(copy_vec.get(i) == dyn_vec.get(i)); + // } + // break; + // } + + default: + break; + } + + assert(std_set.empty() == packed_set.empty()); + assert(std_set.size() == packed_set.size()); + } + } + cerr << "PackedSet tests successful!" << endl; +} + +void test_packed_graph() { + + auto check_path = [&](MutablePathDeletableHandleGraph &graph, + const path_handle_t &p, const vector &steps) { + assert(graph.get_step_count(p) == steps.size()); + + step_handle_t step = graph.path_begin(p); + for (int i = 0; i < steps.size(); i++) { + + assert(graph.get_path_handle_of_step(step) == p); + assert(graph.get_handle_of_step(step) == steps[i]); + + if (graph.get_is_circular(p)) { + assert(graph.has_next_step(step)); + assert(graph.has_previous_step(step)); + } else { + assert(graph.has_next_step(step) == i + 1 < steps.size()); + assert(graph.has_previous_step(step) == i > 0); + } + + step = graph.get_next_step(step); + } + + if (graph.get_is_circular(p) && !graph.is_empty(p)) { + assert(step == graph.path_begin(p)); + } else { + assert(step == graph.path_end(p)); + } + + step = graph.path_back(p); + + for (int i = steps.size() - 1; i >= 0; i--) { + + assert(graph.get_path_handle_of_step(step) == p); + assert(graph.get_handle_of_step(step) == steps[i]); + + if (graph.get_is_circular(p)) { + assert(graph.has_next_step(step)); + assert(graph.has_previous_step(step)); + } else { + assert(graph.has_next_step(step) == i + 1 < steps.size()); + assert(graph.has_previous_step(step) == i > 0); + } + + step = graph.get_previous_step(step); + } + + if (graph.get_is_circular(p) && !graph.is_empty(p)) { + assert(step == graph.path_back(p)); + } else { + assert(step == graph.path_front_end(p)); + } + }; + + auto check_flips = [&](MutablePathDeletableHandleGraph &graph, + const path_handle_t &p, + const vector &steps) { + auto flipped = steps; + for (size_t i = 0; i < steps.size(); i++) { + graph.apply_orientation(graph.flip(graph.forward(flipped[i]))); + flipped[i] = graph.flip(flipped[i]); + check_path(graph, p, flipped); + + graph.apply_orientation(graph.flip(graph.forward(flipped[i]))); + flipped[i] = graph.flip(flipped[i]); + check_path(graph, p, flipped); + } + }; + + // defragmentation + { + PackedGraph graph; + + handle_t h1 = graph.create_handle("ATGTAG"); + handle_t h2 = graph.create_handle("ACCCC"); + handle_t h3 = graph.create_handle("C"); + handle_t h4 = graph.create_handle("ATT"); + handle_t h5 = graph.create_handle("GGCA"); + + graph.create_edge(h1, h2); + graph.create_edge(h1, h3); + graph.create_edge(h2, h3); + graph.create_edge(h3, h5); + graph.create_edge(h3, h4); + graph.create_edge(h4, h5); + + path_handle_t p0 = graph.create_path_handle("0"); + path_handle_t p1 = graph.create_path_handle("1"); + path_handle_t p2 = graph.create_path_handle("2"); + + graph.append_step(p0, h3); + graph.append_step(p0, h4); + graph.append_step(p0, h5); + + graph.append_step(p1, h1); + graph.append_step(p1, h3); + graph.append_step(p1, h5); + + graph.append_step(p2, h1); + graph.append_step(p2, h2); + graph.append_step(p2, h3); + graph.append_step(p2, h4); + graph.append_step(p2, h5); + + graph.destroy_path(p0); + graph.destroy_path(p2); + graph.destroy_handle(h2); + graph.destroy_handle(h4); + + assert(graph.get_sequence(h1) == "ATGTAG"); + assert(graph.get_sequence(h3) == "C"); + assert(graph.get_sequence(h5) == "GGCA"); + + bool found = false; + graph.follow_edges(h1, false, [&](const handle_t &next) { + if (next == h3) { + found = true; + } else { + assert(false); + } + return true; + }); + assert(found); + + found = false; + graph.follow_edges(h3, false, [&](const handle_t &next) { + if (next == h5) { + found = true; + } else { + assert(false); + } + return true; + }); + assert(found); + + check_flips(graph, p1, {h1, h3, h5}); + } + + // tightening vector allocations + { + PackedGraph graph; + handle_t h1 = graph.create_handle("ATGTAG"); + handle_t h2 = graph.create_handle("ACCCC"); + handle_t h3 = graph.create_handle("C"); + handle_t h4 = graph.create_handle("ATT"); + handle_t h5 = graph.create_handle("GGCA"); + + graph.create_edge(h1, h2); + graph.create_edge(h1, h3); + graph.create_edge(h2, h3); + graph.create_edge(h3, h5); + graph.create_edge(h3, h4); + graph.create_edge(h4, h5); + + path_handle_t p0 = graph.create_path_handle("0"); + path_handle_t p1 = graph.create_path_handle("1"); + path_handle_t p2 = graph.create_path_handle("2"); + + graph.append_step(p0, h3); + graph.append_step(p0, h4); + graph.append_step(p0, h5); + + graph.append_step(p1, h1); + graph.append_step(p1, h3); + graph.append_step(p1, h5); + + graph.append_step(p2, h1); + graph.append_step(p2, h2); + graph.append_step(p2, h3); + graph.append_step(p2, h4); + graph.append_step(p2, h5); + + // delete some things, but not enough to trigger defragmentation + graph.destroy_path(p2); + graph.destroy_handle(h2); + // reallocate and compress down to the smaller size + graph.optimize(false); + + assert(graph.get_sequence(h1) == "ATGTAG"); + assert(graph.get_sequence(h3) == "C"); + assert(graph.get_sequence(h4) == "ATT"); + assert(graph.get_sequence(h5) == "GGCA"); + + int count = 0; + bool found1 = false, found2 = false; + graph.follow_edges(h1, false, [&](const handle_t &h) { + if (h == h3) { + found1 = true; + } + count++; + }); + assert(found1); + assert(count == 1); + + count = 0; + found1 = false, found2 = false; + graph.follow_edges(h1, true, [&](const handle_t &h) { count++; }); + assert(count == 0); + + count = 0; + found1 = false, found2 = false; + graph.follow_edges(h3, false, [&](const handle_t &h) { + if (h == h4) { + found1 = true; + } + if (h == h5) { + found2 = true; + } + count++; + }); + assert(found1); + assert(found2); + assert(count == 2); + + count = 0; + found1 = false, found2 = false; + graph.follow_edges(h3, true, [&](const handle_t &h) { + if (h == h1) { + found1 = true; + } + count++; + }); + assert(found1); + assert(count == 1); + + count = 0; + found1 = false, found2 = false; + graph.follow_edges(h4, false, [&](const handle_t &h) { + if (h == h5) { + found1 = true; + } + count++; + }); + assert(found1); + assert(count == 1); + + count = 0; + found1 = false, found2 = false; + graph.follow_edges(h4, true, [&](const handle_t &h) { + if (h == h3) { + found1 = true; + } + count++; + }); + assert(found1); + assert(count == 1); + + count = 0; + found1 = false, found2 = false; + graph.follow_edges(h5, false, [&](const handle_t &h) { count++; }); + assert(count == 0); + + count = 0; + found1 = false, found2 = false; + graph.follow_edges(h5, true, [&](const handle_t &h) { + if (h == h3) { + found1 = true; + } else if (h == h4) { + found2 = true; + } + count++; + }); + assert(found1); + assert(found2); + assert(count == 2); + + check_flips(graph, p0, {h3, h4, h5}); + check_flips(graph, p1, {h1, h3, h5}); + } + + // optimizing with id reassignment + { + PackedGraph graph; + handle_t h1 = graph.create_handle("ATGTAG"); + handle_t h2 = graph.create_handle("ACCCC"); + handle_t h3 = graph.create_handle("C"); + handle_t h4 = graph.create_handle("ATT"); + handle_t h5 = graph.create_handle("GGCA"); + + graph.create_edge(h1, h2); + graph.create_edge(h1, h3); + graph.create_edge(h2, h3); + graph.create_edge(h3, h5); + graph.create_edge(h3, h4); + graph.create_edge(h4, h5); + + path_handle_t p0 = graph.create_path_handle("0"); + path_handle_t p1 = graph.create_path_handle("1"); + path_handle_t p2 = graph.create_path_handle("2"); + + graph.append_step(p0, h3); + graph.append_step(p0, h4); + graph.append_step(p0, h5); + + graph.append_step(p1, h1); + graph.append_step(p1, h3); + graph.append_step(p1, h5); + + graph.append_step(p2, h1); + graph.append_step(p2, h2); + graph.append_step(p2, h3); + graph.append_step(p2, h4); + graph.append_step(p2, h5); + + // delete some things, but not enough to trigger defragmentation + graph.destroy_path(p2); + graph.destroy_handle(h2); + // reallocate and compress down to the smaller size, reassigning IDs + graph.optimize(true); + set seen_ids; + + int count = 0; + bool found1 = false, found2 = false, found3 = false, found4 = false; + graph.for_each_handle([&](const handle_t &handle) { + if (graph.get_sequence(handle) == "ATGTAG") { + h1 = handle; + found1 = true; + } else if (graph.get_sequence(handle) == "C") { + h3 = handle; + found2 = true; + } else if (graph.get_sequence(handle) == "ATT") { + h4 = handle; + found3 = true; + } else if (graph.get_sequence(handle) == "GGCA") { + h5 = handle; + found4 = true; + } else { + assert(false); + } + count++; + + seen_ids.insert(graph.get_id(handle)); + + assert(graph.get_id(handle) >= 1); + assert(graph.get_id(handle) <= 4); + }); + + assert(found1); + assert(found2); + assert(found3); + assert(found4); + assert(count == 4); + assert(seen_ids.size() == 4); + + count = 0; + found1 = found2 = found3 = found4 = false; + + graph.follow_edges(h1, false, [&](const handle_t &h) { + if (h == h3) { + found1 = true; + } + count++; + }); + assert(found1); + assert(count == 1); + + count = 0; + found1 = false, found2 = false; + graph.follow_edges(h1, true, [&](const handle_t &h) { count++; }); + assert(count == 0); + + count = 0; + found1 = false, found2 = false; + graph.follow_edges(h3, false, [&](const handle_t &h) { + if (h == h4) { + found1 = true; + } + if (h == h5) { + found2 = true; + } + count++; + }); + assert(found1); + assert(found2); + assert(count == 2); + + count = 0; + found1 = false, found2 = false; + graph.follow_edges(h3, true, [&](const handle_t &h) { + if (h == h1) { + found1 = true; + } + count++; + }); + assert(found1); + assert(count == 1); + + count = 0; + found1 = false, found2 = false; + graph.follow_edges(h4, false, [&](const handle_t &h) { + if (h == h5) { + found1 = true; + } + count++; + }); + assert(found1); + assert(count == 1); + + count = 0; + found1 = false, found2 = false; + graph.follow_edges(h4, true, [&](const handle_t &h) { + if (h == h3) { + found1 = true; + } + count++; + }); + assert(found1); + assert(count == 1); + + count = 0; + found1 = false, found2 = false; + graph.follow_edges(h5, false, [&](const handle_t &h) { count++; }); + assert(count == 0); + + count = 0; + found1 = false, found2 = false; + graph.follow_edges(h5, true, [&](const handle_t &h) { + if (h == h3) { + found1 = true; + } else if (h == h4) { + found2 = true; + } + count++; + }); + assert(found1); + assert(found2); + assert(count == 2); + + check_flips(graph, p0, {h3, h4, h5}); + check_flips(graph, p1, {h1, h3, h5}); + } + + cerr << "PackedGraph tests successful!" << endl; +} + +void test_multithreaded_overlay_construction() { + HashGraph graph; + + std::string node_content = "GATTACACATTAG"; + size_t node_count = 1000; + size_t true_path_length = node_count * node_content.size(); + size_t path_count = 10; + // We should coalesce 2 paths into each index. + size_t steps_per_index = node_count * 2; + + // Make a long linear graph + std::vector nodes; + for (size_t i = 0; i < node_count; i++) { + nodes.push_back(graph.create_handle(node_content)); + if (nodes.size() > 1) { + graph.create_edge(nodes[nodes.size() - 2], nodes[nodes.size() - 1]); + } + } + + // Make a bunch of paths and keep their names + std::vector paths; + for (size_t i = 0; i < path_count; i++) { + string path_name = "path" + std::to_string(i); + paths.push_back(path_name); + path_handle_t path_handle = graph.create_path_handle(path_name); + for (auto &visit : nodes) { + graph.append_step(path_handle, visit); + } + } + + // Back up the thread count we have been using. + int backup_thread_count = omp_get_max_threads(); + for (int thread_count = 1; thread_count <= 4; thread_count++) { + // Try this number of threads + omp_set_num_threads(thread_count); + + // Make an overlay with this many threads for construction + PackedPositionOverlay overlay(&graph, {}, steps_per_index); + + // Make sure it is right + for (auto &path_name : paths) { + assert(overlay.has_path(path_name)); + path_handle_t path_handle = overlay.get_path_handle(path_name); + // Make sure they have the right name and length. + assert(overlay.get_path_name(path_handle) == path_name); + assert(overlay.get_path_length(path_handle) == true_path_length); + for (size_t i = 0; i < true_path_length; i++) { + // For each position + // Figure out what node and orientation it should have. + handle_t true_underlying_handle = nodes.at(i / node_content.size()); + // Find its step + step_handle_t seen_step = overlay.get_step_at_position(path_handle, i); + // Make sure it is on the right path + assert(overlay.get_path_handle_of_step(seen_step) == path_handle); + // Make sure it is the right node + handle_t observed_handle = overlay.get_handle_of_step(seen_step); + assert(overlay.get_underlying_handle(observed_handle) == + true_underlying_handle); + // Make sure the step is at the right place + size_t true_step_start = i - (i % node_content.size()); + assert(overlay.get_position_of_step(seen_step) == true_step_start); + } + } + } + // Go back to the default thread count. + omp_set_num_threads(backup_thread_count); + + cerr << "Multithreaded PackedPositionOverlay tests successful!" << endl; +} + +void test_path_position_overlays() { + + vector implementations; + + HashGraph hg; + implementations.push_back(&hg); + + PackedGraph pg; + implementations.push_back(&pg); + + MappedPackedGraph mpg; + implementations.push_back(&mpg); + + for (MutablePathDeletableHandleGraph *implementation : implementations) { + + MutablePathDeletableHandleGraph &graph = *implementation; + + handle_t h1 = graph.create_handle("AAA"); + handle_t h2 = graph.create_handle("A"); + handle_t h3 = graph.create_handle("T"); + handle_t h4 = graph.create_handle("AAAAA"); + + graph.create_edge(h1, h2); + graph.create_edge(h1, h3); + graph.create_edge(h2, h4); + graph.create_edge(h3, h4); + + path_handle_t p1 = graph.create_path_handle("p1"); + step_handle_t s1 = graph.append_step(p1, h1); + step_handle_t s2 = graph.append_step(p1, h2); + step_handle_t s3 = graph.append_step(p1, h4); + + // static position overlays + { + vector overlays; + + PositionOverlay basic_overlay(&graph); + PackedPositionOverlay packed_overlay(&graph); + + overlays.push_back(&basic_overlay); + overlays.push_back(&packed_overlay); + + for (PathPositionHandleGraph *implementation : overlays) { + PathPositionHandleGraph &overlay = *implementation; + + assert(overlay.get_path_length(p1) == 9); + + assert(overlay.get_position_of_step(s1) == 0); + assert(overlay.get_position_of_step(s2) == 3); + assert(overlay.get_position_of_step(s3) == 4); + + assert(overlay.get_step_at_position(p1, 0) == s1); + assert(overlay.get_step_at_position(p1, 1) == s1); + assert(overlay.get_step_at_position(p1, 2) == s1); + assert(overlay.get_step_at_position(p1, 3) == s2); + assert(overlay.get_step_at_position(p1, 4) == s3); + assert(overlay.get_step_at_position(p1, 5) == s3); + assert(overlay.get_step_at_position(p1, 6) == s3); + assert(overlay.get_step_at_position(p1, 7) == s3); + assert(overlay.get_step_at_position(p1, 8) == s3); + assert(overlay.get_step_at_position(p1, 9) == overlay.path_end(p1)); + assert(overlay.get_step_at_position(p1, 10) == overlay.path_end(p1)); + assert(overlay.get_step_at_position(p1, 1000) == overlay.path_end(p1)); + } + } + + // mutable position overlay + { + MutablePositionOverlay overlay(&graph); + + handle_t h5 = overlay.create_handle("AAAA"); + + overlay.create_edge(h4, h5); + overlay.create_edge(h5, h5); + + step_handle_t s4 = overlay.append_step(p1, h5); + + assert(overlay.get_path_length(p1) == 13); + + assert(overlay.get_position_of_step(s4) == 9); + + assert(overlay.get_step_at_position(p1, 9) == s4); + assert(overlay.get_step_at_position(p1, 10) == s4); + assert(overlay.get_step_at_position(p1, 11) == s4); + assert(overlay.get_step_at_position(p1, 12) == s4); + assert(overlay.get_step_at_position(p1, 13) == overlay.path_end(p1)); + assert(overlay.get_step_at_position(p1, 14) == overlay.path_end(p1)); + assert(overlay.get_step_at_position(p1, 1000) == overlay.path_end(p1)); + + step_handle_t s5 = overlay.append_step(p1, h5); + + assert(overlay.get_path_length(p1) == 17); + + assert(overlay.get_position_of_step(s5) == 13); + + assert(overlay.get_step_at_position(p1, 13) == s5); + assert(overlay.get_step_at_position(p1, 14) == s5); + assert(overlay.get_step_at_position(p1, 15) == s5); + assert(overlay.get_step_at_position(p1, 16) == s5); + assert(overlay.get_step_at_position(p1, 17) == overlay.path_end(p1)); + assert(overlay.get_step_at_position(p1, 18) == overlay.path_end(p1)); + assert(overlay.get_step_at_position(p1, 1000) == overlay.path_end(p1)); + + path_handle_t p2 = overlay.create_path_handle("p2"); + + assert(overlay.get_path_length(p2) == 0); + + step_handle_t s6 = overlay.prepend_step(p2, h3); + + assert(overlay.get_path_length(p2) == 1); + + assert(overlay.get_position_of_step(s6) == 0); + + assert(overlay.get_step_at_position(p2, 0) == s6); + assert(overlay.get_step_at_position(p2, 1) == overlay.path_end(p2)); + assert(overlay.get_step_at_position(p2, 2) == overlay.path_end(p2)); + assert(overlay.get_step_at_position(p2, 1000) == overlay.path_end(p2)); + + step_handle_t s7 = overlay.prepend_step(p2, h1); + + assert(overlay.get_path_length(p2) == 4); + + assert(overlay.get_position_of_step(s7) == 0); + assert(overlay.get_position_of_step(s6) == 3); + + assert(overlay.get_step_at_position(p2, 0) == s7); + assert(overlay.get_step_at_position(p2, 1) == s7); + assert(overlay.get_step_at_position(p2, 2) == s7); + assert(overlay.get_step_at_position(p2, 3) == s6); + assert(overlay.get_step_at_position(p2, 4) == overlay.path_end(p2)); + assert(overlay.get_step_at_position(p2, 5) == overlay.path_end(p2)); + assert(overlay.get_step_at_position(p2, 1000) == overlay.path_end(p2)); + + handle_t h2_flip = overlay.apply_orientation(overlay.flip(h2)); + assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 3)) == + overlay.flip(h2_flip)); + + vector offs_1{1}; + auto parts_1 = overlay.divide_handle(overlay.flip(h1), offs_1); + assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 0)) == + overlay.flip(parts_1[1])); + assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 1)) == + overlay.flip(parts_1[1])); + assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 2)) == + overlay.flip(parts_1[0])); + assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 3)) == + overlay.flip(h2_flip)); + assert(overlay.get_handle_of_step(overlay.get_step_at_position(p2, 0)) == + overlay.flip(parts_1[1])); + assert(overlay.get_handle_of_step(overlay.get_step_at_position(p2, 1)) == + overlay.flip(parts_1[1])); + assert(overlay.get_handle_of_step(overlay.get_step_at_position(p2, 2)) == + overlay.flip(parts_1[0])); + assert(overlay.get_handle_of_step(overlay.get_step_at_position(p2, 3)) == + h3); + + vector offs_2{1, 3}; + auto parts_2 = overlay.divide_handle(h5, offs_2); + assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 9)) == + parts_2[0]); + assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 10)) == + parts_2[1]); + assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 11)) == + parts_2[1]); + assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 12)) == + parts_2[2]); + assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 13)) == + parts_2[0]); + assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 14)) == + parts_2[1]); + assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 15)) == + parts_2[1]); + assert(overlay.get_handle_of_step(overlay.get_step_at_position(p1, 16)) == + parts_2[2]); + assert(overlay.get_step_at_position(p1, 17) == overlay.path_end(p1)); + assert(overlay.get_step_at_position(p1, 18) == overlay.path_end(p1)); + assert(overlay.get_step_at_position(p1, 1000) == overlay.path_end(p1)); + } + } + cerr << "PathPositionOverlay tests successful!" << endl; +} + +void test_packed_reference_path_overlay() { + + vector implementations; + + HashGraph hg; + implementations.push_back(&hg); + + PackedGraph pg; + implementations.push_back(&pg); + + MappedPackedGraph mpg; + implementations.push_back(&mpg); + + for (MutablePathDeletableHandleGraph *implementation : implementations) { + + MutablePathDeletableHandleGraph &graph = *implementation; + + handle_t h1 = graph.create_handle("AAA"); + handle_t h2 = graph.create_handle("A"); + handle_t h3 = graph.create_handle("T"); + handle_t h4 = graph.create_handle("AAAAA"); + + graph.create_edge(h1, h2); + graph.create_edge(h1, h3); + graph.create_edge(h2, h4); + graph.create_edge(h3, h4); + + path_handle_t p1 = graph.create_path_handle("p1"); + step_handle_t s1 = graph.append_step(p1, h1); + step_handle_t s2 = graph.append_step(p1, h2); + step_handle_t s3 = graph.append_step(p1, h4); + + path_handle_t p2 = graph.create_path_handle("p2"); + step_handle_t s2_1 = graph.append_step(p2, graph.flip(h4)); + step_handle_t s2_2 = graph.append_step(p2, graph.flip(h3)); + step_handle_t s2_3 = graph.append_step(p2, graph.flip(h1)); + { - // Make a graph - MappedPackedGraph mpg; - // Give it a node - mpg.create_handle("GATTACA", 1); - // Save it to an FD - mpg.serialize(fd); - // Make sure write-back works - mpg.create_handle("CATTAG", 2); - - // Make sure it looks right now - check_graph(mpg); + + PackedReferencePathOverlay overlay(&graph); + + assert(overlay.get_path_length(p1) == 9); + + assert(overlay.get_position_of_step(s1) == 0); + assert(overlay.get_position_of_step(s2) == 3); + assert(overlay.get_position_of_step(s3) == 4); + + assert(overlay.get_step_at_position(p1, 0) == s1); + assert(overlay.get_step_at_position(p1, 1) == s1); + assert(overlay.get_step_at_position(p1, 2) == s1); + assert(overlay.get_step_at_position(p1, 3) == s2); + assert(overlay.get_step_at_position(p1, 4) == s3); + assert(overlay.get_step_at_position(p1, 5) == s3); + assert(overlay.get_step_at_position(p1, 6) == s3); + assert(overlay.get_step_at_position(p1, 7) == s3); + assert(overlay.get_step_at_position(p1, 8) == s3); + assert(overlay.get_step_at_position(p1, 9) == overlay.path_end(p1)); + assert(overlay.get_step_at_position(p1, 10) == overlay.path_end(p1)); + assert(overlay.get_step_at_position(p1, 1000) == overlay.path_end(p1)); + + bool found1 = false; + bool found2 = false; + overlay.for_each_step_on_handle(h1, [&](const step_handle_t &s) { + if (s == s1) { + found1 = true; + } else if (s == s2_3) { + found2 = true; + } else { + assert(false); + } + }); + assert(found1); + assert(found2); + found1 = false; + found2 = false; + + overlay.for_each_step_on_handle(h2, [&](const step_handle_t &s) { + if (s == s2) { + found1 = true; + } else { + assert(false); + } + }); + assert(found1); + found1 = false; + + overlay.for_each_step_on_handle(h3, [&](const step_handle_t &s) { + if (s == s2_2) { + found1 = true; + } else { + assert(false); + } + }); + assert(found1); + found1 = false; + + overlay.for_each_step_on_handle(h4, [&](const step_handle_t &s) { + if (s == s3) { + found1 = true; + } else if (s == s2_1) { + found2 = true; + } else { + assert(false); + } + }); + assert(found1); + assert(found2); + found1 = false; + found2 = false; } + { - // Make a graph again - MappedPackedGraph mpg; - // Load it from the fd - mpg.deserialize(fd); - // Make sure it looks right - check_graph(mpg); + + // Make sure we can handle a lot of paths + for (size_t i = 0; i < 100; i++) { + path_handle_t pn = graph.create_path_handle("pn" + std::to_string(i)); + graph.append_step(pn, h1); + graph.append_step(pn, h2); + graph.append_step(pn, h4); + } + + // Split the paths up agross many indexes for testing + PackedReferencePathOverlay overlay(&graph, {}, 10); + + std::unordered_set seen_paths; + overlay.for_each_step_on_handle(h1, [&](const step_handle_t &s) { + seen_paths.insert( + overlay.get_path_name(overlay.get_path_handle_of_step(s))); + }); + // Should have the 2 original paths and the 100 new ones. + assert(seen_paths.size() == 102); } - assert(close(fd) == 0); + } + cerr << "PackedReferencePathOverlay tests successful!" << endl; +} + +void test_reference_path_overlay() { + + vector implementations; + + HashGraph hg; + implementations.push_back(&hg); + + PackedGraph pg; + implementations.push_back(&pg); + + MappedPackedGraph mpg; + implementations.push_back(&mpg); + + for (MutablePathDeletableHandleGraph *implementation : implementations) { + + MutablePathDeletableHandleGraph &graph = *implementation; + + auto h1 = graph.create_handle("AAAA"); + auto h2 = graph.create_handle("AA"); + auto h3 = graph.create_handle("A"); + auto h4 = graph.create_handle("AAAAAA"); + + graph.create_edge(h1, h2); + graph.create_edge(h1, h3); + graph.create_edge(h2, h4); + graph.create_edge(h3, h4); + + auto p = graph.create_path_handle("p"); + auto s1 = graph.append_step(p, h1); + auto s2 = graph.append_step(p, h2); + auto s3 = graph.append_step(p, h4); + { - // Make a graph again - MappedPackedGraph mpg; - // Load it from the file - mpg.deserialize(filename); - // Make sure it looks right - check_graph(mpg); + ReferencePathOverlay ref_overlay(&graph); + + auto os1 = ref_overlay.path_begin(p); + auto os2 = ref_overlay.get_next_step(os1); + auto os3 = ref_overlay.get_next_step(os2); + + assert(ref_overlay.get_next_step(os3) == ref_overlay.path_end(p)); + assert(ref_overlay.get_previous_step(os1) == + ref_overlay.path_front_end(p)); + + assert(ref_overlay.has_next_step(os1)); + assert(ref_overlay.has_next_step(os2)); + assert(!ref_overlay.has_next_step(os3)); + + assert(!ref_overlay.has_previous_step(os1)); + assert(ref_overlay.has_previous_step(os2)); + assert(ref_overlay.has_previous_step(os3)); + + assert(ref_overlay.get_next_step(os1) == os2); + assert(ref_overlay.get_next_step(os2) == os3); + assert(ref_overlay.get_next_step(os3) == ref_overlay.path_end(p)); + assert(ref_overlay.get_previous_step(os1) == + ref_overlay.path_front_end(p)); + assert(ref_overlay.get_previous_step(os2) == os1); + assert(ref_overlay.get_previous_step(os3) == os2); + + assert(ref_overlay.get_step_count(p) == 3); + + assert(ref_overlay.get_path_length(p) == 12); + + assert(ref_overlay.get_position_of_step(os1) == 0); + assert(ref_overlay.get_position_of_step(os2) == 4); + assert(ref_overlay.get_position_of_step(os3) == 6); + + for (size_t i = 0; i < 25; ++i) { + if (i < 4) { + assert(ref_overlay.get_step_at_position(p, i) == os1); + } else if (i < 6) { + assert(ref_overlay.get_step_at_position(p, i) == os2); + } else if (i < 12) { + assert(ref_overlay.get_step_at_position(p, i) == os3); + } else { + assert(ref_overlay.get_step_at_position(p, i) == + ref_overlay.path_end(p)); + } + } + + int count = 0; + ref_overlay.for_each_step_on_handle(h1, [&](const step_handle_t &s) { + assert(s == os1); + ++count; + }); + assert(count == 1); + count = 0; + ref_overlay.for_each_step_on_handle(h2, [&](const step_handle_t &s) { + assert(s == os2); + ++count; + }); + assert(count == 1); + count = 0; + ref_overlay.for_each_step_on_handle( + h3, [&](const step_handle_t &s) { ++count; }); + assert(count == 0); + count = 0; + ref_overlay.for_each_step_on_handle(h4, [&](const step_handle_t &s) { + assert(s == os3); + ++count; + }); + assert(count == 1); + } + + random_device rd; + default_random_engine prng(12261988); //(rd()); + + uniform_int_distribution node_len_distr(1, 5); + + vector paths(1, p); + + paths.push_back(graph.create_path_handle(std::to_string(paths.size()))); + paths.push_back(graph.create_path_handle(std::to_string(paths.size()))); + + uniform_int_distribution path_distr(0, paths.size() - 1); + + std::vector handles; + + // add enough nodes to stress test the parallel code + for (size_t i = 0; i < 200000; ++i) { + auto p = paths[path_distr(prng)]; + string seq(node_len_distr(prng), 'A'); + auto h = graph.create_handle(seq); + handles.push_back(h); + if (graph.get_step_count(p) != 0) { + graph.create_edge(graph.get_handle_of_step(graph.path_back(p)), h); + } + graph.append_step(p, h); + } + + uniform_int_distribution handle_distr(0, handles.size() - 1); + + // add enough path steps that some nodes will have >= 3 path coverage + for (size_t i = 0; i < 100000; ++i) { + auto p = paths[path_distr(prng)]; + auto h1 = graph.get_handle_of_step(graph.path_back(p)); + auto h2 = handles[handle_distr(prng)]; + graph.create_edge(h1, h2); + graph.append_step(p, h2); } + { - // Make a graph again - MappedPackedGraph mpg; - // Load it from a stream - std::ifstream stream(filename); - mpg.deserialize(stream); - // Make sure it looks right - check_graph(mpg); + ReferencePathOverlay ref_overlay(&graph); + + assert(ref_overlay.get_path_count() == paths.size()); + + std::unordered_map> steps_on_handle; + + ref_overlay.for_each_path_handle([&](const path_handle_t &path) { + size_t walked_len = 0; + for (auto s = ref_overlay.path_begin(path), + end = ref_overlay.path_end(path); + s != end; s = ref_overlay.get_next_step(s)) { + assert(ref_overlay.get_path_handle_of_step(s) == path); + assert(ref_overlay.get_position_of_step(s) == walked_len); + auto h = ref_overlay.get_handle_of_step(s); + size_t len = ref_overlay.get_length(h); + for (size_t i = 0; i < len; ++i) { + auto s2 = ref_overlay.get_step_at_position(path, walked_len + i); + assert(s2 == s); + } + steps_on_handle[h].push_back(s); + walked_len += len; + } + assert(ref_overlay.get_path_length(path) == walked_len); + }); + + ref_overlay.for_each_handle([&](const handle_t &handle) { + // std::cerr << "check handles on " << + // ref_overlay.get_id(handle) << '\n'; + auto &direct = steps_on_handle[handle]; + std::sort(direct.begin(), direct.end()); + vector indexed; + ref_overlay.for_each_step_on_handle( + handle, + [&](const step_handle_t &step) { indexed.push_back(step); }); + std::sort(indexed.begin(), indexed.end()); + if (direct != indexed) { + std::cerr << "error on node " << ref_overlay.get_id(handle) << '\n'; + std::cerr << "direct\n"; + for (auto s : direct) { + std::cerr << '\t' << handlegraph::as_integers(s)[0] << '\t' + << handlegraph::as_integers(s)[1] << '\t' + << handlegraph::as_integer( + ref_overlay.get_path_handle_of_step(s)) + << '\t' + << ref_overlay.get_id(ref_overlay.get_handle_of_step(s)) + << '\n'; + } + std::cerr << "indexed\n"; + for (auto s : indexed) { + std::cerr << '\t' << handlegraph::as_integers(s)[0] << '\t' + << handlegraph::as_integers(s)[1] << '\t' + << handlegraph::as_integer( + ref_overlay.get_path_handle_of_step(s)) + << '\t' + << ref_overlay.get_id(ref_overlay.get_handle_of_step(s)) + << '\n'; + } + } + assert(direct == indexed); + }); } - unlink(filename); - - cerr << "MappedPackedGraph tests successful!" << endl; + } + + cerr << "ReferencePathOverlay tests successful!" << endl; +} + +void test_vectorizable_overlays() { + + vector implementations; + + HashGraph hg; + implementations.push_back(&hg); + + PackedGraph pg; + implementations.push_back(&pg); + + MappedPackedGraph mpg; + implementations.push_back(&mpg); + + for (MutablePathDeletableHandleGraph *implementation : implementations) { + + MutablePathDeletableHandleGraph &graph = *implementation; + + handle_t h1 = graph.create_handle("AAA"); + handle_t h2 = graph.create_handle("A"); + handle_t h3 = graph.create_handle("T"); + handle_t h4 = graph.create_handle("AAAAA"); + + graph.create_edge(h1, h2); + graph.create_edge(h1, h3); + graph.create_edge(h2, h4); + graph.create_edge(h3, h4); + + path_handle_t p1 = graph.create_path_handle("p1"); + step_handle_t s1 = graph.append_step(p1, h1); + step_handle_t s2 = graph.append_step(p1, h2); + step_handle_t s3 = graph.append_step(p1, h4); + + bdsg::VectorizableOverlay overlay(&graph); + + set edge_ranks; + size_t edge_count = 0; + graph.for_each_edge([&](edge_t edge) { + edge_ranks.insert(overlay.edge_index(edge)); + ++edge_count; + }); + + // every edge gets a unique rank + assert(edge_ranks.size() == edge_count); + + size_t node_count = 0; + map pos_to_node; + graph.for_each_handle([&](handle_t handle) { + pos_to_node[overlay.node_vector_offset(graph.get_id(handle))] = + graph.get_id(handle); + ++node_count; + }); + + // every node gets a unique rank + assert(pos_to_node.size() == node_count); + + auto pni = pos_to_node.begin(); + auto next = pni; + for (++next; next != pos_to_node.end(); ++pni, ++next) { + assert(next->first - pni->first == + graph.get_length(graph.get_handle(pni->second))); + } + + // check that node_at_vector_offset works + graph.for_each_handle([&](handle_t handle) { + size_t pos = overlay.node_vector_offset(graph.get_id(handle)); + for (size_t i = 0; i < graph.get_length(handle); ++i) { + assert(overlay.node_at_vector_offset(pos + i + 1) == + graph.get_id(handle)); + } + }); + } + cerr << "VectorizableOverlay tests successful!" << endl; +} + +void test_packed_subgraph_overlay() { + + vector implementations; + + HashGraph hg; + implementations.push_back(&hg); + + PackedGraph pg; + implementations.push_back(&pg); + + MappedPackedGraph mpg; + implementations.push_back(&mpg); + + for (MutablePathDeletableHandleGraph *implementation : implementations) { + + MutablePathDeletableHandleGraph &graph = *implementation; + + handle_t h1 = graph.create_handle("AAA"); + handle_t h2 = graph.create_handle("A"); + handle_t h3 = graph.create_handle("T"); + handle_t h4 = graph.create_handle("AAAAA"); + + graph.create_edge(h1, h2); + graph.create_edge(h1, h3); + graph.create_edge(h2, h4); + graph.create_edge(h3, h4); + + PackedSubgraphOverlay subgraph(&graph); + assert(subgraph.get_node_count() == 0); + subgraph.for_each_handle([&](const handle_t &h) { assert(false); }); + assert(!subgraph.has_node(graph.get_id(h1))); + assert(!subgraph.has_node(graph.get_id(h2))); + assert(!subgraph.has_node(graph.get_id(h3))); + assert(!subgraph.has_node(graph.get_id(h4))); + + subgraph.add_node(h1); + + assert(subgraph.get_node_count() == 1); + bool found1 = false; + subgraph.for_each_handle([&](const handle_t &h) { + if (subgraph.get_id(h) == graph.get_id(h1)) { + found1 = true; + assert(graph.get_sequence(h) == graph.get_sequence(h1)); + } else { + assert(false); + } + }); + assert(found1); + found1 = false; + + assert(subgraph.has_node(graph.get_id(h1))); + assert(!subgraph.has_node(graph.get_id(h2))); + assert(!subgraph.has_node(graph.get_id(h3))); + assert(!subgraph.has_node(graph.get_id(h4))); + + subgraph.follow_edges(h1, true, [&](const handle_t &h) { assert(false); }); + subgraph.follow_edges(h1, false, [&](const handle_t &h) { assert(false); }); + + assert(subgraph.get_degree(h1, true) == 0); + assert(subgraph.get_degree(h1, false) == 0); + + subgraph.add_node(h4); + + assert(subgraph.get_node_count() == 2); + bool found2 = false; + subgraph.for_each_handle([&](const handle_t &h) { + if (subgraph.get_id(h) == graph.get_id(h1)) { + found1 = true; + assert(graph.get_sequence(h) == graph.get_sequence(h1)); + } else if (subgraph.get_id(h) == graph.get_id(h4)) { + found2 = true; + assert(graph.get_sequence(h) == graph.get_sequence(h4)); + } else { + assert(false); + } + }); + assert(found1); + assert(found2); + found1 = false; + found2 = false; + + assert(subgraph.has_node(graph.get_id(h1))); + assert(!subgraph.has_node(graph.get_id(h2))); + assert(!subgraph.has_node(graph.get_id(h3))); + assert(subgraph.has_node(graph.get_id(h4))); + + subgraph.follow_edges(h1, true, [&](const handle_t &h) { assert(false); }); + subgraph.follow_edges(h1, false, [&](const handle_t &h) { assert(false); }); + subgraph.follow_edges(h4, true, [&](const handle_t &h) { assert(false); }); + subgraph.follow_edges(h4, false, [&](const handle_t &h) { assert(false); }); + + assert(subgraph.get_degree(h1, true) == 0); + assert(subgraph.get_degree(h1, false) == 0); + assert(subgraph.get_degree(h4, true) == 0); + assert(subgraph.get_degree(h4, false) == 0); + + subgraph.add_node(graph.flip(h2)); + + assert(subgraph.get_node_count() == 3); + bool found3 = false; + subgraph.for_each_handle([&](const handle_t &h) { + if (subgraph.get_id(h) == graph.get_id(h1)) { + found1 = true; + assert(graph.get_sequence(h) == graph.get_sequence(h1)); + } else if (subgraph.get_id(h) == graph.get_id(h2)) { + found2 = true; + assert(graph.get_sequence(h) == graph.get_sequence(h2)); + } else if (subgraph.get_id(h) == graph.get_id(h4)) { + found3 = true; + assert(graph.get_sequence(h) == graph.get_sequence(h4)); + } else { + assert(false); + } + }); + assert(found1); + assert(found2); + assert(found3); + found1 = false; + found2 = false; + found3 = false; + + assert(subgraph.has_node(graph.get_id(h1))); + assert(subgraph.has_node(graph.get_id(h2))); + assert(!subgraph.has_node(graph.get_id(h3))); + assert(subgraph.has_node(graph.get_id(h4))); + + bool found4 = false; + subgraph.follow_edges(h1, true, [&](const handle_t &h) { assert(false); }); + subgraph.follow_edges(h1, false, [&](const handle_t &h) { + if (subgraph.get_id(h) == graph.get_id(h2) && !graph.get_is_reverse(h)) { + found1 = true; + } else { + assert(false); + } + }); + subgraph.follow_edges(h2, true, [&](const handle_t &h) { + if (subgraph.get_id(h) == graph.get_id(h1) && !graph.get_is_reverse(h)) { + found2 = true; + } else { + assert(false); + } + }); + subgraph.follow_edges(h2, false, [&](const handle_t &h) { + if (subgraph.get_id(h) == graph.get_id(h4) && !graph.get_is_reverse(h)) { + found3 = true; + } else { + assert(false); + } + }); + subgraph.follow_edges(h4, true, [&](const handle_t &h) { + if (subgraph.get_id(h) == graph.get_id(h2) && !graph.get_is_reverse(h)) { + found4 = true; + } else { + assert(false); + } + }); + subgraph.follow_edges(h4, false, [&](const handle_t &h) { assert(false); }); + + assert(subgraph.get_degree(h1, true) == 0); + assert(subgraph.get_degree(h1, false) == 1); + assert(subgraph.get_degree(h2, true) == 1); + assert(subgraph.get_degree(h2, false) == 1); + assert(subgraph.get_degree(h4, true) == 1); + assert(subgraph.get_degree(h4, false) == 0); + + assert(found1); + assert(found2); + assert(found3); + assert(found4); + found1 = false; + found2 = false; + found3 = false; + found4 = false; + + subgraph.remove_node(h1); + + assert(subgraph.get_node_count() == 2); + subgraph.for_each_handle([&](const handle_t &h) { + if (subgraph.get_id(h) == graph.get_id(h2)) { + found1 = true; + assert(graph.get_sequence(h) == graph.get_sequence(h2)); + } else if (subgraph.get_id(h) == graph.get_id(h4)) { + found2 = true; + assert(graph.get_sequence(h) == graph.get_sequence(h4)); + } else { + assert(false); + } + }); + assert(found1); + assert(found2); + found1 = false; + found2 = false; + + assert(!subgraph.has_node(graph.get_id(h1))); + assert(subgraph.has_node(graph.get_id(h2))); + assert(!subgraph.has_node(graph.get_id(h3))); + assert(subgraph.has_node(graph.get_id(h4))); + + subgraph.follow_edges(h2, true, [&](const handle_t &h) { assert(false); }); + subgraph.follow_edges(h2, false, [&](const handle_t &h) { + if (subgraph.get_id(h) == graph.get_id(h4) && !graph.get_is_reverse(h)) { + found1 = true; + } else { + assert(false); + } + }); + subgraph.follow_edges(h4, true, [&](const handle_t &h) { + if (subgraph.get_id(h) == graph.get_id(h2) && !graph.get_is_reverse(h)) { + found2 = true; + } else { + assert(false); + } + }); + subgraph.follow_edges(h4, false, [&](const handle_t &h) { assert(false); }); + + assert(subgraph.get_degree(h2, true) == 0); + assert(subgraph.get_degree(h2, false) == 1); + assert(subgraph.get_degree(h4, true) == 1); + assert(subgraph.get_degree(h4, false) == 0); + + assert(found1); + assert(found2); + found1 = false; + found2 = false; + } + + cerr << "PackedSubgraphOverlay tests successful!" << endl; +} + +void test_mapped_packed_graph() { + auto check_graph = [](const MappedPackedGraph &mpg) { + // Dump it into this map + unordered_map graph_contents; + mpg.for_each_handle([&](const handle_t &h) { + graph_contents[mpg.get_id(h)] = mpg.get_sequence(h); + }); + + // Make sure it has the right things + assert(graph_contents.at(1) == "GATTACA"); + assert(graph_contents.at(2) == "CATTAG"); + }; + + char filename[] = "tmpXXXXXX"; + int fd = mkstemp(filename); + assert(fd != -1); + { + // Make a graph + MappedPackedGraph mpg; + // Give it a node + mpg.create_handle("GATTACA", 1); + // Save it to an FD + mpg.serialize(fd); + // Make sure write-back works + mpg.create_handle("CATTAG", 2); + + // Make sure it looks right now + check_graph(mpg); + } + { + // Make a graph again + MappedPackedGraph mpg; + // Load it from the fd + mpg.deserialize(fd); + // Make sure it looks right + check_graph(mpg); + } + assert(close(fd) == 0); + { + // Make a graph again + MappedPackedGraph mpg; + // Load it from the file + mpg.deserialize(filename); + // Make sure it looks right + check_graph(mpg); + } + { + // Make a graph again + MappedPackedGraph mpg; + // Load it from a stream + std::ifstream stream(filename); + mpg.deserialize(stream); + // Make sure it looks right + check_graph(mpg); + } + unlink(filename); + + cerr << "MappedPackedGraph tests successful!" << endl; } void test_hash_graph() { - - // make sure the copy and moves work as expected - - HashGraph g; - - handle_t h1 = g.create_handle("A"); - handle_t h2 = g.create_handle("T"); - handle_t h3 = g.create_handle("G"); - - g.create_edge(h1, h2); - g.create_edge(h2, h3); - - path_handle_t p = g.create_path_handle("p"); - g.append_step(p, h1); - g.append_step(p, h2); - g.append_step(p, h3); - - HashGraph g_copy_1 = g; - HashGraph g_copy_2(g); - HashGraph g_copy_3(g); - HashGraph g_copy_4(g); - - HashGraph g_move_1 = std::move(g_copy_3); - HashGraph g_move_2(std::move(g_copy_4)); - - assert(handlegraph::algorithms::are_equivalent_with_paths(&g, &g_copy_1, true)); - assert(handlegraph::algorithms::are_equivalent_with_paths(&g, &g_copy_2, true)); - assert(handlegraph::algorithms::are_equivalent_with_paths(&g, &g_move_1, true)); - assert(handlegraph::algorithms::are_equivalent_with_paths(&g, &g_move_2, true)); - - // delete a handle on a path to trigger the occurrence index to be accessed - g_copy_1.destroy_handle(g_copy_1.get_handle(g.get_id(h2))); - g_copy_2.destroy_handle(g_copy_2.get_handle(g.get_id(h2))); - g_move_1.destroy_handle(g_move_1.get_handle(g.get_id(h2))); - g_move_2.destroy_handle(g_move_2.get_handle(g.get_id(h2))); - g.destroy_handle(h2); - - assert(handlegraph::algorithms::are_equivalent_with_paths(&g, &g_copy_1, true)); - assert(handlegraph::algorithms::are_equivalent_with_paths(&g, &g_copy_2, true)); - assert(handlegraph::algorithms::are_equivalent_with_paths(&g, &g_move_1, true)); - assert(handlegraph::algorithms::are_equivalent_with_paths(&g, &g_move_2, true)); - - cerr << "HashGraph tests successful!" << endl; + + // make sure the copy and moves work as expected + + HashGraph g; + + handle_t h1 = g.create_handle("A"); + handle_t h2 = g.create_handle("T"); + handle_t h3 = g.create_handle("G"); + + g.create_edge(h1, h2); + g.create_edge(h2, h3); + + path_handle_t p = g.create_path_handle("p"); + g.append_step(p, h1); + g.append_step(p, h2); + g.append_step(p, h3); + + HashGraph g_copy_1 = g; + HashGraph g_copy_2(g); + HashGraph g_copy_3(g); + HashGraph g_copy_4(g); + + HashGraph g_move_1 = std::move(g_copy_3); + HashGraph g_move_2(std::move(g_copy_4)); + + assert( + handlegraph::algorithms::are_equivalent_with_paths(&g, &g_copy_1, true)); + assert( + handlegraph::algorithms::are_equivalent_with_paths(&g, &g_copy_2, true)); + assert( + handlegraph::algorithms::are_equivalent_with_paths(&g, &g_move_1, true)); + assert( + handlegraph::algorithms::are_equivalent_with_paths(&g, &g_move_2, true)); + + // delete a handle on a path to trigger the occurrence index to be accessed + g_copy_1.destroy_handle(g_copy_1.get_handle(g.get_id(h2))); + g_copy_2.destroy_handle(g_copy_2.get_handle(g.get_id(h2))); + g_move_1.destroy_handle(g_move_1.get_handle(g.get_id(h2))); + g_move_2.destroy_handle(g_move_2.get_handle(g.get_id(h2))); + g.destroy_handle(h2); + + assert( + handlegraph::algorithms::are_equivalent_with_paths(&g, &g_copy_1, true)); + assert( + handlegraph::algorithms::are_equivalent_with_paths(&g, &g_copy_2, true)); + assert( + handlegraph::algorithms::are_equivalent_with_paths(&g, &g_move_1, true)); + assert( + handlegraph::algorithms::are_equivalent_with_paths(&g, &g_move_2, true)); + + cerr << "HashGraph tests successful!" << endl; } -void test_snarl_distance_index() { +void test_hub_labeling() { + + // To make the tests easier to write we have a widget that does the full dance + // to build a packed label vector. + auto get_packed_labels = [](const HashGraph &test_g) { + // test HashGraph -> Boost graph + CHOverlay bg = make_boost_graph(test_g); + + auto [edges_start, edges_end] = boost::edges(bg); + std::for_each(edges_start, edges_end, [&](auto e) { + cerr << source(e, bg) << " -> " << target(e, bg) << endl; + }); + + make_contraction_hierarchy(bg); + // cerr << " - made contraction hierarchy" << endl; + + vector> labels_fwd; + labels_fwd.resize(num_vertices(bg)); + vector> labels_back; + labels_back.resize(num_vertices(bg)); + + /* + for (auto v: labels_fwd) { + for (auto sz: v) { + cerr << "(" << sz.hub << "," << sz.dist << ") "; + } + cerr << " | "; + } + cerr << endl; + cerr<<"back:" << endl; + for (auto v: labels_back) { + for (auto sz: v) { + cerr << "(" << sz.hub << "," << sz.dist << ") "; + } + cerr << " | "; + } + cerr << endl; + cerr << "pack:" << endl; + for (auto sz: packed_labels) { + cerr << sz << " "; + } + cerr << endl; */ + + create_labels(labels_fwd, labels_back, bg); + + // linearization + return pack_labels(labels_fwd, labels_back); + }; + + // We also use this to let us write our tests in therms of index into the + // handles vector, and orientation. This converts that to a hub labeling + // rank. + auto rank = [](size_t node_index, bool is_reverse) -> size_t { + return node_index * 2 + (is_reverse ? 1 : 0); + }; + + { + // Simple stick graph of 3 nodes + HashGraph test_g; + vector handles; + handles.resize(3); + for (auto n : {0, 1, 2}) { + handles[n] = test_g.create_handle("A"); + } + test_g.create_edge(handles[0], handles[1]); + test_g.create_edge(handles[1], handles[2]); + + vector packed_labels = get_packed_labels(test_g); + + // 0th forward to 1st forward: no intervening bases + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(1, false)) == + 0); + + // When asking about the same node twice, we look for self loops. + // Here there aren't any. + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(0, false)) == + INF_INT); + + // 2nd reverse to 1st reverse: 1 intervening base + assert(hhl_query(packed_labels.begin(), rank(2, true), rank(0, true)) == 1); + + // 0th reverse to 1st forward: no connection + assert(hhl_query(packed_labels.begin(), rank(0, true), rank(1, false)) == + INF_INT); + + // TODO: check that error occurs when nodeside out of range is given + } + { + // Graph with several nodes but only one edge + HashGraph test_g; + vector handles; + handles.resize(8); + for (auto n : {0, 1, 2, 3, 4, 5, 6, 7}) { + handles[n] = test_g.create_handle(string(n + 1, 'A')); + } + vector> edges = {{1, 3}}; + for (auto e : edges) { + auto [s, t] = e; + test_g.create_edge(handles[s], handles[t]); + } - char filename[] = "tmpXXXXXX"; - int fd = -1; - { - // Make an empty index - SnarlDistanceIndex index; - - // Set it up for a completely empty graph. - vector empty_temp_indexes; - HashGraph empty_graph; - index.get_snarl_tree_records(empty_temp_indexes, &empty_graph); - - // It should be empty but working - assert(index.get_max_tree_depth() == 0); - - // Save it - fd = mkstemp(filename); - assert(fd != -1); - index.serialize(fd); + vector packed_labels = get_packed_labels(test_g); + + // 1st forward to 3rd forward: the only edge there is + assert(hhl_query(packed_labels.begin(), rank(1, false), rank(3, false)) == + 0); + // nonexistent path + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(7, false)) == + INF_INT); + } + { + // Graph with several nodes and several edges + HashGraph test_g; + vector handles; + handles.resize(8); + for (auto n : {0, 1, 2, 3, 4, 5, 6, 7}) { + handles[n] = test_g.create_handle(string(n + 1, 'A')); } - assert(close(fd) == 0); - - { - // Load it again - SnarlDistanceIndex index2; - index2.deserialize(filename); - - // It should be empty but working - assert(index2.get_max_tree_depth() == 0); + vector> edges = {{0, 1}, {0, 2}, {1, 0}, {2, 0}, + {1, 3}, {1, 4}, {4, 1}, {5, 5}}; + for (auto e : edges) { + auto [s, t] = e; + test_g.create_edge(handles[s], handles[t]); } - - // Make the file un-writable. - assert(chmod(filename, S_IRUSR) == 0); - - { - // Load it a third time - SnarlDistanceIndex index2; - index2.deserialize(filename); - - // It should be empty but working - assert(index2.get_max_tree_depth() == 0); + + vector packed_labels = get_packed_labels(test_g); + + // 1st forward to 3rd forward: direct connection + assert(hhl_query(packed_labels.begin(), rank(1, false), rank(3, false)) == + 0); + // 0th forward to 7th forward: nonexistent path + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(7, false)) == + INF_INT); + + // check node lengths are taken into account + // 0th forward to 3rd forward: should need to go through 1st which has + // length 2 + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(3, false)) == + 2); + + // check u -> v and v -> u are different + // 3rd forward to 1st forward: shouldn't connect because nothing is + // downstream of 3rd + assert(hhl_query(packed_labels.begin(), rank(3, false), rank(1, false)) == + INF_INT); + // 1st forward to 3rd forward: direct connection + assert(hhl_query(packed_labels.begin(), rank(1, false), rank(3, false)) == + 0); + + // need to debug + for (size_t a = 0; a < handles.size() * 2; a++) { + cerr << hhl_query(packed_labels.begin(), rank(1, false), a) << endl; } - - // Make the file writable again - assert(chmod(filename, S_IRUSR | S_IWUSR) == 0); - - // And remove it - unlink(filename); - - cerr << "SnarlDistanceIndex tests successful!" << endl; + + // node to itself in the same direction (edge exists) + assert(hhl_query(packed_labels.begin(), rank(5, false), rank(5, false)) == + 0); + // node to itself in the same direction (edge doesn't exist) + assert(hhl_query(packed_labels.begin(), rank(3, false), rank(3, false)) == + INF_INT); + } + { + // Test case: Cycle back to the same node with minimum distance > 0 + // Creates a triangle: 0 -> 1 -> 2 -> 0 + // Node lengths: 0=1, 1=2, 2=3 + HashGraph test_g; + vector handles; + handles.resize(3); + for (auto n : {0, 1, 2}) { + handles[n] = test_g.create_handle(string(n + 1, 'A')); + } + test_g.create_edge(handles[0], handles[1]); + test_g.create_edge(handles[1], handles[2]); + test_g.create_edge(handles[2], handles[0]); + + vector packed_labels = get_packed_labels(test_g); + + // Forward cycle: 0->1->2->0 + // 0_fwd to 0_fwd via cycle: intermediate nodes 1 and 2, lengths 2+3=5 + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(0, false)) == + 5); + // 1_fwd to 1_fwd via cycle: intermediate nodes 2 and 0, lengths 3+1=4 + assert(hhl_query(packed_labels.begin(), rank(1, false), rank(1, false)) == + 4); + // 2_fwd to 2_fwd via cycle: intermediate nodes 0 and 1, lengths 1+2=3 + assert(hhl_query(packed_labels.begin(), rank(2, false), rank(2, false)) == + 3); + + // The same cycle is visible in reverse. + assert(hhl_query(packed_labels.begin(), rank(0, true), rank(0, true)) == 5); + assert(hhl_query(packed_labels.begin(), rank(1, true), rank(1, true)) == 4); + assert(hhl_query(packed_labels.begin(), rank(2, true), rank(2, true)) == 3); + } + { + // Test case: Forward and reverse orientations of different nodes reaching + // each other Node 0 (len 1) and Node 1 (len 2) Edges: 0_fwd -> 1_rev, 2_fwd + // -> 1_fwd This creates a "reversing" pattern where you enter one side and + // exit the other + HashGraph test_g; + vector handles; + handles.resize(3); + handles[0] = test_g.create_handle("A"); + handles[1] = test_g.create_handle("AA"); + handles[2] = test_g.create_handle("AAA"); + + test_g.create_edge(handles[0], test_g.flip(handles[1])); + test_g.create_edge(handles[2], handles[1]); + + vector packed_labels = get_packed_labels(test_g); + + // We see the 1st node attached the right way around + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(1, true)) == + 0); + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(1, false)) == + INF_INT); + + // We see the 0th node connected to the 2nd node the right way around + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(2, true)) == + 2); + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(2, false)) == + INF_INT); + + // We see the same thing looking the other way + assert(hhl_query(packed_labels.begin(), rank(2, false), rank(0, true)) == + 2); + assert(hhl_query(packed_labels.begin(), rank(2, false), rank(0, false)) == + INF_INT); + } + { + // Test case: Diamond graph with multiple paths of different lengths + // Tests that we find the minimum distance, not just any path + // + // 1 (len 2) + // / \ + // / \ + // 0 3 + // \ / + // \ / + // 2 (len 10) + // + // Node lengths: 0=1, 1=2, 2=10, 3=1 + // Path 0->1->3 has intermediate length 2 + // Path 0->2->3 has intermediate length 10 + // Should find minimum = 2 + HashGraph test_g; + vector handles; + handles.resize(4); + handles[0] = test_g.create_handle("A"); + handles[1] = test_g.create_handle("AA"); + handles[2] = test_g.create_handle("AAAAAAAAAA"); + handles[3] = test_g.create_handle("A"); + + test_g.create_edge(handles[0], handles[1]); + test_g.create_edge(handles[0], handles[2]); + test_g.create_edge(handles[1], handles[3]); + test_g.create_edge(handles[2], handles[3]); + + vector packed_labels = get_packed_labels(test_g); + + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(3, false)) == + 2); + + // Same paths in reverse, should also be 2 + assert(hhl_query(packed_labels.begin(), rank(3, true), rank(0, true)) == 2); + } + { + // Test case: Graph requiring traversal through a node and back again + // + // 0_fwd -> 1_rev (entering 1 from the right) + // 1_fwd -> 2_fwd (exiting 1 from the right) + // 1_rev -> 1_fwd (turning around) + // This means you can go 0_fwd -> 1_rev -> (through 1) -> 1_fwd -> 2_fwd + HashGraph test_g; + vector handles; + handles.resize(3); + handles[0] = test_g.create_handle("A"); + handles[1] = test_g.create_handle("AAA"); + handles[2] = test_g.create_handle("AA"); + + test_g.create_edge(handles[0], test_g.flip(handles[1])); + test_g.create_edge(handles[1], handles[2]); + test_g.create_edge(test_g.flip(handles[1]), handles[1]); + + vector packed_labels = get_packed_labels(test_g); + + // Must go through 1, turn around, and come back through 1 + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(2, false)) == + 6); + } + { + // Test case: Graph requiring traversal through a node and back again, but + // without the hairpin edge. + HashGraph test_g; + vector handles; + handles.resize(3); + handles[0] = test_g.create_handle("A"); + handles[1] = test_g.create_handle("AAA"); + handles[2] = test_g.create_handle("AA"); + + test_g.create_edge(handles[0], test_g.flip(handles[1])); + test_g.create_edge(handles[1], handles[2]); + + vector packed_labels = get_packed_labels(test_g); + + // We can't turn around inside 1, so we can't make it. + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(2, false)) == + INF_INT); + } + { + // Test case: Disconnected components + // Nodes 0,1 are connected; Nodes 2,3 are connected; No edges between + // components + HashGraph test_g; + vector handles; + handles.resize(4); + handles[0] = test_g.create_handle("A"); + handles[1] = test_g.create_handle("AA"); + handles[2] = test_g.create_handle("AAA"); + handles[3] = test_g.create_handle("AAAA"); + + test_g.create_edge(handles[0], handles[1]); + test_g.create_edge(handles[2], handles[3]); + + vector packed_labels = get_packed_labels(test_g); + + // Within first component + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(1, false)) == + 0); // 0_fwd to 1_fwd + assert(hhl_query(packed_labels.begin(), rank(1, true), rank(0, true)) == + 0); // 1_rev to 0_rev + + // Within second component + assert(hhl_query(packed_labels.begin(), rank(2, false), rank(3, false)) == + 0); // 2_fwd to 3_fwd + assert(hhl_query(packed_labels.begin(), rank(3, true), rank(2, true)) == + 0); // 3_rev to 2_rev + + // Between components: no path + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(2, false)) == + INF_INT); // 0_fwd to 2_fwd + assert(hhl_query(packed_labels.begin(), rank(0, false), rank(3, false)) == + INF_INT); // 0_fwd to 3_fwd + assert(hhl_query(packed_labels.begin(), rank(1, false), rank(2, false)) == + INF_INT); // 1_fwd to 2_fwd + assert(hhl_query(packed_labels.begin(), rank(2, true), rank(0, true)) == + INF_INT); // 2_rev to 0_rev + } + + cerr << "HubLabeling tests successful!" << endl; +} + +void test_snarl_distance_index() { + + char filename[] = "tmpXXXXXX"; + int fd = -1; + { + // Make an empty index + SnarlDistanceIndex index; + + // Set it up for a completely empty graph. + vector + empty_temp_indexes; + HashGraph empty_graph; + index.get_snarl_tree_records(empty_temp_indexes, &empty_graph); + + // It should be empty but working + assert(index.get_max_tree_depth() == 0); + + // Save it + fd = mkstemp(filename); + assert(fd != -1); + index.serialize(fd); + } + assert(close(fd) == 0); + + { + // Load it again + SnarlDistanceIndex index2; + index2.deserialize(filename); + + // It should be empty but working + assert(index2.get_max_tree_depth() == 0); + } + + // Make the file un-writable. + assert(chmod(filename, S_IRUSR) == 0); + + { + // Load it a third time + SnarlDistanceIndex index2; + index2.deserialize(filename); + + // It should be empty but working + assert(index2.get_max_tree_depth() == 0); + } + + // Make the file writable again + assert(chmod(filename, S_IRUSR | S_IWUSR) == 0); + + // And remove it + unlink(filename); + + cerr << "SnarlDistanceIndex tests successful!" << endl; } int main(void) { - test_reference_path_overlay(); - test_bit_packing(); - test_mapped_structs(); - test_int_vector(); - test_packed_vector>(); - test_packed_vector>(); - test_packed_vector>(); - test_iterators>(); - test_iterators>(); - test_iterators>(); - test_paged_vector>(); - test_paged_vector>(); - test_paged_vector>(); - test_paged_vector>(); - test_paged_vector>(); - test_paged_vector>(); - test_paged_vector>(); - test_iterators>(); - test_iterators>(); - test_iterators>(); - test_iterators>(); - test_iterators>(); - test_iterators>(); - test_iterators>(); - test_iterators>(); - test_iterators>(); - test_iterators>(); - test_iterators>(); - test_iterators>(); - test_iterators>(); - test_iterators>(); - test_packed_deque(); - test_iterators>(); - test_iterators>(); - test_iterators>(); - test_packed_set(); - test_mutable_path_handle_graphs(); - test_deletable_handle_graphs(); - test_serializable_handle_graphs(); - test_packed_graph(); - test_path_position_overlays(); - test_packed_reference_path_overlay(); - test_vectorizable_overlays(); - test_packed_subgraph_overlay(); - test_multithreaded_overlay_construction(); - test_mapped_packed_graph(); - test_hash_graph(); - test_snarl_distance_index(); + test_reference_path_overlay(); + test_bit_packing(); + test_mapped_structs(); + test_int_vector(); + test_packed_vector>(); + test_packed_vector>(); + test_packed_vector>(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_paged_vector>(); + test_paged_vector>(); + test_paged_vector>(); + test_paged_vector>(); + test_paged_vector>(); + test_paged_vector>(); + test_paged_vector>(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_packed_deque(); + test_iterators>(); + test_iterators>(); + test_iterators>(); + test_packed_set(); + test_mutable_path_handle_graphs(); + test_deletable_handle_graphs(); + test_serializable_handle_graphs(); + test_packed_graph(); + test_path_position_overlays(); + test_packed_reference_path_overlay(); + test_vectorizable_overlays(); + test_packed_subgraph_overlay(); + test_multithreaded_overlay_construction(); + test_mapped_packed_graph(); + test_hash_graph(); + test_hub_labeling(); + test_snarl_distance_index(); } diff --git a/bdsg/src/vectorizable_overlays.cpp b/bdsg/src/vectorizable_overlays.cpp index 71da1ff9..d480f228 100644 --- a/bdsg/src/vectorizable_overlays.cpp +++ b/bdsg/src/vectorizable_overlays.cpp @@ -4,6 +4,8 @@ namespace bdsg { +const size_t VectorizableOverlay::MIN_ITEMS_PER_THREAD = 1024; + VectorizableOverlay::VectorizableOverlay(const HandleGraph* graph) : underlying_graph(graph) { assert(underlying_graph != nullptr); @@ -173,16 +175,21 @@ void VectorizableOverlay::index_nodes_and_edges() { } } + // We limit threading on small inputs. + auto limited_threads = [&](size_t batch) { + return std::max(1, std::min(batch / MIN_ITEMS_PER_THREAD, get_thread_count())); + }; + // Make edge PMHF. Does its own threading. Do it first so we can drop the edge buffer. // note: we're mapping to 0-based rank, so need to add one after lookup edge_to_rank.reset(new boomphf::mphf, pair>, boomph_pair_pair_hash>( - edge_buffer.size(), edge_buffer, get_thread_count(), 2.0, false, false)); + edge_buffer.size(), edge_buffer, limited_threads(edge_buffer.size()), 2.0, false, false)); edge_buffer.clear(); // Make node PMHF. Does its own threading. // Note: we're mapping to 0-based rank, so need to add one after lookup node_to_rank.reset(new boomphf::mphf>(rank_to_node.size(), rank_to_node, - get_thread_count(), 2.0, false, false)); + limited_threads(rank_to_node.size()), 2.0, false, false)); // Add one slot to keep ranks in this table 1-based.