diff --git a/CMakeLists.txt b/CMakeLists.txt index 52fc59f..4f2fb12 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,11 +2,13 @@ cmake_minimum_required(VERSION 3.15) project(DynamicQueriesCC) include (FetchContent) +set(Boost_USE_STATIC_LIBS ON) +find_package(Boost REQUIRED COMPONENTS regex context) # Force IPO is enabled cmake_policy(SET CMP0069 NEW) set(CMAKE_POLICY_DEFAULT_CMP0069 NEW) -set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS ON) #set(INTERPROCEDURAL_OPTIMIZATION TRUE) @@ -20,9 +22,10 @@ if(NOT CMAKE_BUILD_TYPE) endif() message(STATUS "DynamicQueries Build Type: ${CMAKE_BUILD_TYPE}") +# controversial choice: return-type warnings should be promoted to errors if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") message("Adding GNU compiler flags") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -W -Wall") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -W -Wall -Werror=return-type") elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") message("Adding MSVC compiler flags") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Wall") @@ -30,6 +33,12 @@ else() message("${CMAKE_CXX_COMPILER_ID} not recognized, no flags added") endif() + +# add_compile_options(-fsanitize=address) +# add_link_options(-fsanitize=address) +# add_compile_options(-fsanitize=undefined) +# add_link_options(-fsanitize=undefined) + ###### # Get MPI for distributed communication ###### @@ -47,15 +56,32 @@ FetchContent_Declare( GraphZeppelinVerifyCC GIT_REPOSITORY https://github.com/GraphStreamingProject/GraphZeppelin - GIT_TAG db06e662aa7563716e49e3f5036e773a97a7dd64 #main + # GIT_TAG 2c633f5814f7edc79632cceb629280f6221b0281 #ksparse_recovery + # GIT_TAG 892b5b71b139100c309c79bb6ab94a5596fc606d + GIT_TAG c31add74bae9d826b07216acca0558747d34efc0 +) +add_compile_definitions(GLOG_USE_GLOG_EXPORT) + + +# NOTE - not sure if this is gonna work +# Install DynamicConnectivity Package +# it doesn't. the + +FetchContent_Declare( + dycon + GIT_REPOSITORY https://github.com/GraphStreamingProject/DynamicConnectivity + GIT_TAG 733d99e4a6985ef656df716ffe23719a1f29786b ) + +# TODO - THERE should be a better way to do this that doesn't rely on populate and add_subdirectory +FetchContent_Populate(dycon) +add_subdirectory(${dycon_SOURCE_DIR} ${dycon_BINARY_DIR} EXCLUDE_FROM_ALL) + FetchContent_MakeAvailable(GraphZeppelinVerifyCC) -#add_compile_options(-fsanitize=address) -#add_link_options(-fsanitize=address) -#add_compile_options(-fsanitize=undefined) -#add_link_options(-fsanitize=undefined) + +find_package(TBB REQUIRED) add_executable(dynamicCC_tests test/test_runner.cpp @@ -66,13 +92,18 @@ add_executable(dynamicCC_tests src/skiplist.cpp src/euler_tour_tree.cpp + src/sketchless_skiplist.cpp + src/sketchless_euler_tour_tree.cpp src/link_cut_tree.cpp src/graph_tiers.cpp + src/batch_tiers.cpp ) - target_include_directories(dynamicCC_tests PUBLIC include ${MPI_C_INCLUDE_PATH}) -add_dependencies(dynamicCC_tests GraphZeppelinVerifyCC) -target_link_libraries(dynamicCC_tests PRIVATE GraphZeppelinVerifyCC ${MPI_LIBRARIES}) +add_dependencies(dynamicCC_tests GraphZeppelinVerifyCC dycon) +target_link_libraries(dynamicCC_tests PRIVATE GraphZeppelinVerifyCC dycon ${MPI_LIBRARIES} TBB::tbb) + +target_compile_options(dynamicCC_tests PUBLIC -fopenmp) +target_link_options(dynamicCC_tests PUBLIC -fopenmp) add_executable(mpi_dynamicCC_tests test/mpi_test_runner.cpp @@ -86,10 +117,56 @@ add_executable(mpi_dynamicCC_tests src/input_node.cpp src/tier_node.cpp ) - +target_compile_definitions(mpi_dynamicCC_tests PUBLIC SKETCH_BUFFER_SIZE=${SKETCH_BUFFER_SIZE}) target_include_directories(mpi_dynamicCC_tests PUBLIC include ${MPI_C_INCLUDE_PATH}) -add_dependencies(mpi_dynamicCC_tests GraphZeppelinVerifyCC) -target_link_libraries(mpi_dynamicCC_tests PRIVATE GraphZeppelinVerifyCC ${MPI_LIBRARIES}) +add_dependencies(mpi_dynamicCC_tests GraphZeppelinVerifyCC dycon) +target_link_libraries(mpi_dynamicCC_tests PRIVATE GraphZeppelinVerifyCC dycon ${MPI_LIBRARIES}) + +target_compile_options(mpi_dynamicCC_tests PUBLIC -fopenmp) +target_link_options(mpi_dynamicCC_tests PUBLIC -fopenmp) + + +add_executable(hybrid_mpi_dynamicCC_tests + test/hybrid_test_runner.cpp + test/hybrid_tests.cpp + + src/skiplist.cpp + src/sketchless_skiplist.cpp + src/euler_tour_tree.cpp + src/sketchless_euler_tour_tree.cpp + src/link_cut_tree.cpp + src/input_node.cpp + src/tier_node.cpp +) +target_compile_definitions(hybrid_mpi_dynamicCC_tests PUBLIC SKETCH_BUFFER_SIZE=${SKETCH_BUFFER_SIZE}) +target_include_directories(hybrid_mpi_dynamicCC_tests PUBLIC include ${MPI_C_INCLUDE_PATH}) +add_dependencies(hybrid_mpi_dynamicCC_tests GraphZeppelinVerifyCC dycon) +target_link_libraries(hybrid_mpi_dynamicCC_tests PRIVATE GraphZeppelinVerifyCC dycon ${MPI_LIBRARIES}) + +target_compile_options(hybrid_mpi_dynamicCC_tests PUBLIC -fopenmp) +target_link_options(hybrid_mpi_dynamicCC_tests PUBLIC -fopenmp) + +add_executable(hybrid_shmem_dynamicCC_tests + test/hybrid_shmem_test_runner.cpp + test/hybrid_shmem_tests.cpp + + src/skiplist.cpp + src/graph_tiers.cpp + src/batch_tiers.cpp + src/sketchless_skiplist.cpp + src/euler_tour_tree.cpp + src/sketchless_euler_tour_tree.cpp + src/link_cut_tree.cpp + # src/input_node.cpp + # src/tier_node.cpp +) +target_compile_definitions(hybrid_shmem_dynamicCC_tests PUBLIC SKETCH_BUFFER_SIZE=${SKETCH_BUFFER_SIZE}) +target_include_directories(hybrid_shmem_dynamicCC_tests PUBLIC include ${MPI_C_INCLUDE_PATH}) +add_dependencies(hybrid_shmem_dynamicCC_tests GraphZeppelinVerifyCC dycon) +target_link_libraries(hybrid_shmem_dynamicCC_tests PRIVATE GraphZeppelinVerifyCC dycon ${MPI_LIBRARIES} TBB::tbb) + +target_compile_options(hybrid_shmem_dynamicCC_tests PUBLIC -fopenmp) +target_link_options(hybrid_shmem_dynamicCC_tests PUBLIC -fopenmp) ####### # TODO: Is MPI INCLUDE PATH necessary? diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..b58ad80 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,119 @@ +# build +FROM ubuntu:24.04 AS build +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + build-essential \ + ca-certificates \ + cmake \ + git \ + sudo \ + gnupg \ + # libboost-context-dev \ + # libboost-filesystem-dev \ + # libboost-program-options-dev \ + # libboost-system-dev \ + # libboost-thread-dev \ + libboost-all-dev \ + libdouble-conversion-dev \ + libfast-float-dev \ + libevent-dev \ + libfmt-dev \ + libgflags-dev \ + libgoogle-glog-dev \ + libjemalloc-dev \ + libmimalloc-dev \ + libssl-dev \ + libunwind-dev \ + libzstd-dev \ + ninja-build \ + openmpi-bin \ + libopenmpi-dev \ + python3 \ + python3-venv \ + wget \ + libtbb-dev \ + && rm -rf /var/lib/apt/lists/* + + +WORKDIR /opt/deps + +# install the latest version of fast-float + +# TODO - checkout a specific tag/release +RUN git clone https://github.com/fastfloat/fast_float.git \ + && cd fast_float \ + && cmake -B build -DFASTFLOAT_TEST=OFF \ + && sudo cmake --build build --target install +# build folly + install dependencies we may have missed +# TODO - would be good to enumerate them +# RUN git clone https://github.com/facebook/folly.git +# WORKDIR /opt/deps/folly +# RUN python3 ./build/fbcode_builder/getdeps.py install-system-deps --recursive +# ENV FOLLY_PREFIX=/opt/deps/folly/_build/opt/facebook +# ENV CMAKE_PREFIX_PATH=${FOLLY_PREFIX}:${CMAKE_PREFIX_PATH} + + +# build DynamicQueriesCC in Release +WORKDIR /opt/dynamiccc +COPY . . +# remove old build director +RUN rm -rf build +RUN cmake -S . -B build \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_PREFIX_PATH=${FOLLY_PREFIX} \ + -DSKETCH_BUFFER_SIZE=5 \ + -DPARLAY_TBB=On \ + + && cmake --build build --target \ + dynamicCC_tests \ + mpi_dynamicCC_tests \ + hybrid_mpi_dynamicCC_tests \ + hybrid_shmem_dynamicCC_tests \ + -j "$(nproc)" + + +# ------------------------------ + +# runtime +FROM ubuntu:24.04 AS runtime +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + ca-certificates \ + git \ + libboost-context-dev \ + libboost-filesystem-dev \ + libboost-program-options-dev \ + libboost-system-dev \ + libboost-thread-dev \ + libdouble-conversion-dev \ + libevent-dev \ + libfmt-dev \ + libgflags-dev \ + libgoogle-glog-dev \ + libjemalloc-dev \ + libmimalloc-dev \ + libssl-dev \ + libunwind-dev \ + libzstd-dev \ + openmpi-bin \ + libopenmpi-dev \ + python3 \ + wget \ + libtbb-dev \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /opt/dynamiccc + +# project binaries and libs +COPY --from=build /opt/dynamiccc/build/*tests /opt/dynamiccc/bin/ +COPY scripts ./scripts + +ENV PATH="/opt/dynamiccc/bin:${PATH}" +ENV LD_LIBRARY_PATH="/opt/dynamiccc/lib:${LD_LIBRARY_PATH}" + + +ENTRYPOINT ["/bin/bash"] \ No newline at end of file diff --git a/include/batch_tiers.h b/include/batch_tiers.h new file mode 100644 index 0000000..1101056 --- /dev/null +++ b/include/batch_tiers.h @@ -0,0 +1,206 @@ +#pragma once +#include "types.h" +#include +#include +#include +#include +// #include +// #include +#include "parlay_hash/unordered_map.h" + +#include "euler_tour_tree.h" +// #include "link_cut_tree.h" +#include "lct_v2.h" +#include "union_find_local.h" +#include "sketchless_euler_tour_tree.h" +// #include "parlay_hash/unordered_set.h" + +template requires(SketchColumnConcept) +class BatchTiers { + private: + size_t num_nodes; + uint64_t seed; + // size_t maximum_batch_size = 512; + // size_t maximum_batch_size = 100; + // size_t maximum_batch_size = 1 << 20; + size_t maximum_batch_size = 1 << 20; + // size_t maximum_batch_size = 200; + // size_t maximum_batch_size = 1 << 14; + // size_t maximum_batch_size = 1024; + size_t granularity = 1 << 11; // suggested number of tier-updates per thread + std::vector> ett; // one ETT for each tier + LinkCutTreeMaxAgg link_cut_tree; + SketchlessEulerTourTree<> query_ett; + std::mutex lct_and_query_ett_lock; + parlay::sequence _unique_update_ids; + + std::vector transaction_log; + + // TODO - add the sketchless ETT for querying + // + + // "root" nodes for each candidate component at each tier. + union_find_local _component_reps_dsu; + + // static thread_local parlay::sequence _deltas_buffer; + // static thread_local SketchClass _scratch_sketch; + // matrix of [num_tiers x ( batch_size * 2 )] + std::vector*>> _root_nodes; + + // jagged array: track isolated components/probably isolated components. + // why are we doing this instead of just using root_nodes? + + // a vector mapping each tier to the set of its components that need + // to be checked for isolation + // parlay::sequence*>> _updated_components; + // TODO - see if we can get rid of redundant checks + // and only do one PER component. ie if some components share the same + // root, we need not check them. + // parlay::sequence*>> _updated_components; + parlay::sequence> _updated_components; + + // tracks components that were already checked for isolation and had their + // associated link/cut instructions logged. + // parlay::sequence*> _current_isolated_components; + + // key: a root node ptr (to identify same component at current tier) + // folly::ConcurrentHashMap _already_checked_components; + parlay::parlay_unordered_map_direct _already_checked_components; + + + // links to "broadcast" to all higher tiers + parlay::sequence _pending_links; + // cuts to "broadcast" to all higher tiers, plus the index of the first tier + // where the cut should be made + parlay::sequence> _pending_cuts; + + parlay::sequence update_buffer; + + + + public: + BatchTiers(node_id_t num_nodes, uint64_t seed); + BatchTiers(node_id_t num_nodes, uint32_t num_tiers, int batch_size, size_t seed); + ~BatchTiers(); + + bool is_initialized(node_id_t u) { + // no-op with vector implementation + return ett[0].is_initialized(u); + }; + + void initialize_node(node_id_t u) { + for (auto &tree: ett) { + tree.initialize_node(u); + } + query_ett.initialize_node(u); + link_cut_tree.initialize_node(u); + } + + void uninitialize_node(node_id_t u) { + for (auto &tree: ett) { + tree.uninitialize_node(u); + } + query_ett.uninitialize_node(u); + link_cut_tree.uninitialize_node(u); + } + + void initialize_all_nodes() { + // TODO - parallel_for? + for (auto &tree: ett) { + tree.initialize_all_nodes(num_nodes); + } + query_ett.initialize_all_nodes(num_nodes); + link_cut_tree.initialize_all_nodes(num_nodes); + } + + void flush_transaction_log() { + transaction_log.clear(); + } + + const std::vector& get_transaction_log() const { + return transaction_log; + } + + void process_all_updates() { + if (update_buffer.size() > 0) { + update_batch(update_buffer); + update_buffer.clear(); + } + } + + void update_batch(const parlay::sequence &updates); + + bool is_tree_edge(node_id_t a, node_id_t b) { + return query_ett.has_edge(a, b); + } + + void update(const GraphUpdate &update) { + // if (!is_initialized(update.edge.src) || !is_initialized(update.edge.dst)) { + // std::cout << "ruh oh" << std::endl; + // } + assert(this->is_initialized(update.edge.src)); + assert(this->is_initialized(update.edge.dst)); + // add to buffer: + update_buffer.push_back(update); + // bool is_tree_edge_deletion = (update.type == DELETE && + // is_tree_edge(update.edge.src, update.edge.dst)); + // if (update_buffer.size() >= maximum_batch_size || is_tree_edge_deletion) { + if (update_buffer.size() >= maximum_batch_size) { + // std::cout << "is_tree_edge_deletion: " << is_tree_edge_deletion << ", buffer size: " << update_buffer.size() << std::endl; + // process the batch + update_batch(update_buffer); + // clear the buffer + update_buffer.clear(); + } + } + + void flush_buffer() { + if (update_buffer.size() > 0) { + update_batch(update_buffer); + update_buffer.clear(); + } + } + size_t space_usage_bytes() { + size_t total = sizeof(BatchTiers); + for (auto &tree: ett) { + total += tree.space_usage_bytes(); + } + // total += query_ett.space_usage_bytes(); + // total += link_cut_tree.space_usage_bytes(); + // total += _component_reps_dsu.space_usage_bytes(); + // total += _already_checked_components.max_size() * (sizeof(size_t) + sizeof(node_id_t) + sizeof(void*)); // rough estimate + // total += ((parlay::unordered_map_internal) _already_checked_components).size(); + // TODO - measure the overhead of the actual batch_ttiers class` + return total; + } + + bool is_connected(node_id_t a, node_id_t b); + + // query for the connected components of the graph + std::vector> get_cc(); + + // return the number of tiers + size_t num_tiers() const { + return ett.size(); + } + // find the index of the highest everywhere-maximal tier. + + + private: + SkipListNode*& root_node(size_t tier, size_t update_idx, bool src_or_dst) { + return _root_nodes[tier][update_idx * 2 + (src_or_dst ? 0 : 1)]; + }; + void _process_sketch_aggs_only(const parlay::sequence &updates); + + + // same thing but seperates by tiers. this avoids the needs for atomics. + void _process_sketch_aggs_tier_sequential(const parlay::sequence &updates); + + // same thing but use our CAS tricks + void _process_sketch_aggs_with_cas(const parlay::sequence &updates); + + uint32_t _search_for_isolated_components(const parlay::sequence &updates); + + bool _fix_isolations_at_tier(const parlay::sequence &updates, uint32_t tier_idx); +}; + diff --git a/include/euler_tour_tree.h b/include/euler_tour_tree.h index 98d8ff1..59a26ca 100644 --- a/include/euler_tour_tree.h +++ b/include/euler_tour_tree.h @@ -3,64 +3,211 @@ #include #include +#include "sketch/sketch_concept.h" +#include "sketch_interfacing.h" +#include + + +template requires(SketchColumnConcept) class EulerTourNode { FRIEND_TEST(EulerTourTreeSuite, random_links_and_cuts); FRIEND_TEST(EulerTourTreeSuite, get_aggregate); FRIEND_TEST(SkipListSuite, join_split_test); FRIEND_TEST(GraphTiersSuite, mini_correctness_test); - std::unordered_map edges; + std::unordered_map*, SkipListNode*> edges; - Sketch* temp_sketch = nullptr; long seed = 0; - SkipListNode* make_edge(EulerTourNode* other, Sketch* temp_sketch); - void delete_edge(EulerTourNode* other, Sketch* temp_sketch); + SkipListNode* make_edge(EulerTourNode* other, SketchClass &temp_sketch); + SkipListNode* make_edge(EulerTourNode* other); + void delete_edge(EulerTourNode* other, SketchClass &temp_sketch); public: const node_id_t vertex = 0; const uint32_t tier = 0; - SkipListNode* allowed_caller = nullptr; + SkipListNode* allowed_caller = nullptr; EulerTourNode(long seed, node_id_t vertex, uint32_t tier); EulerTourNode(long seed); ~EulerTourNode(); - bool link(EulerTourNode& other, Sketch* temp_sketch); - bool cut(EulerTourNode& other, Sketch* temp_sketch); + bool link(EulerTourNode& other, SketchClass &temp_sketch); + bool cut(EulerTourNode& other, SketchClass &temp_sketch); bool isvalid() const; - Sketch* get_sketch(SkipListNode* caller); - SkipListNode* update_sketch(vec_t update_idx); + SketchClass& get_sketch(SkipListNode* caller); + SkipListNode* update_sketch(vec_t update_idx); + SkipListNode* update_sketch(const ColumnEntryDelta &delta); + SkipListNode* update_sketch(const ColumnEntryDeltas &deltas); + SkipListNode* update_sketch(const SketchClass &sketch); + SkipListNode* update_sketch_atomic(vec_t update_idx); + SkipListNode* update_sketch_atomic(const ColumnEntryDelta &delta); + SkipListNode* update_sketch_atomic(const ColumnEntryDeltas &deltas); + + SkipListNode* get_allowed_caller() { + return this->allowed_caller; + } + + // update just this node's sketch + // plus return the allowed caller + SkipListNode* update_sketch_noagg_atomic(const ColumnEntryDelta &delta); + // void update_sketch_noagg_atomic(const SketchClass &sketch); + SkipListNode* update_sketch_atomic_to_level(const ColumnEntryDelta &delta, uint32_t level); + + //recompute the parent aggregates + void recompute_aggregates_parallel(); + + const ColumnEntryDelta generate_entry_delta(vec_t update) const { + return this->allowed_caller->sketch_agg.generate_entry_delta(update); + } - SkipListNode* get_root(); + SkipListNode* get_root() const; - Sketch* get_aggregate(); + const SketchClass& get_aggregate(); uint32_t get_size(); - bool has_edge_to(EulerTourNode* other); + bool has_edge_to(EulerTourNode* other); - std::set get_component(); + std::set*> get_component(); long get_seed() {return seed;}; - friend std::ostream& operator<<(std::ostream& os, const EulerTourNode& ett); + template requires(SketchColumnConcept) + friend std::ostream& operator<<(std::ostream& os, const EulerTourNode& ett); }; + +using VectorContainer = std::vector>; +using HashmapContainer = absl::flat_hash_map*>; + +template >> +typename Container = absl::flat_hash_map*>> +requires(SketchColumnConcept) class EulerTourTree { - Sketch* temp_sketch; + SketchClass temp_sketch; +private: + size_t seed; + node_id_t max_num_nodes; + uint32_t tier_num; public: - std::vector ett_nodes; + // std::vector> ett_nodes; + // absl::flat_hash_map*> ett_nodes; + Container ett_nodes; + - EulerTourTree(node_id_t num_nodes, uint32_t tier_num, int seed); + EulerTourTree(node_id_t max_num_nodes, uint32_t tier_num, int seed); + + EulerTourNode& ett_node(node_id_t u) { + if constexpr (std::is_same_v>>) { + assert(u < ett_nodes.size()); + return ett_nodes[u]; + } else { + assert(ett_nodes.find(u) != ett_nodes.end()); + return *ett_nodes[u]; + } + } + + void initialize_node(node_id_t u) { + // no-op with vector implementation + if constexpr (!std::is_same_v>>) { + // assert(ett_nodes.find(u) == ett_nodes.end()); + // TODO - this is kinda gross - fix later + if (ett_nodes.find(u) == ett_nodes.end()) + ett_nodes[u] = new EulerTourNode(this->seed, u, this->tier_num); + } + }; + void uninitialize_node(node_id_t u) { + // no-op with vector implementation + if constexpr (!std::is_same_v>>) { + assert(ett_nodes.find(u) != ett_nodes.end()); + delete ett_nodes[u]; + // TODO - actually delete form ett + } + }; + + void initialize_all_nodes() { + for (node_id_t i = 0; i < max_num_nodes; ++i) { + initialize_node(i); + } + }; + void initialize_all_nodes(node_id_t until) { + assert(until <= max_num_nodes); + for (node_id_t i = 0; i < until; ++i) { + initialize_node(i); + } + } + bool is_initialized(node_id_t u) { + // no-op with vector implementation + if constexpr (std::is_same_v>>) { + return true; + } else { + return ett_nodes.find(u) != ett_nodes.end(); + } + }; void link(node_id_t u, node_id_t v); void cut(node_id_t u, node_id_t v); bool has_edge(node_id_t u, node_id_t v); - SkipListNode* update_sketch(node_id_t u, vec_t update_idx); - std::pair update_sketches(node_id_t u, node_id_t v, vec_t update_idx); - SkipListNode* get_root(node_id_t u); - Sketch* get_aggregate(node_id_t u); + SkipListNode* update_sketch(node_id_t u, vec_t update_idx); + SkipListNode* update_sketch(node_id_t u, const ColumnEntryDelta &delta); + SkipListNode* update_sketch(node_id_t u, const ColumnEntryDeltas &deltas); + SkipListNode* update_sketch(node_id_t u, const SketchClass &sketch); + SkipListNode* update_sketch_atomic(node_id_t u, vec_t update_idx); + SkipListNode* update_sketch_atomic(node_id_t u, const ColumnEntryDelta &delta); + SkipListNode* update_sketch_atomic(node_id_t u, const ColumnEntryDeltas &deltas); + + // returns the allowed caller + // SkipListNode* update_sketch_noagg_atomic(const ColumnEntryDelta &delta); + // void update_sketch_noagg_atomic(const SketchClass &sketch); + + //recompute the parent aggregates + void recompute_aggregates_parallel(); + + ColumnEntryDelta generate_entry_delta(node_id_t u, vec_t update) { + // TODO - the specific node isnt actually meaningful here. + return ett_node(u).generate_entry_delta(update); + } + + std::pair*, SkipListNode*> update_sketches(node_id_t u, node_id_t v, vec_t update_idx); + SkipListNode* get_root(node_id_t u); + const SketchClass& get_aggregate(node_id_t u); uint32_t get_size(node_id_t u); + uint32_t num_components() { + std::set roots; + for (node_id_t i = 0; i < ett_nodes.size(); ++i) { + if (!is_initialized(i)) { + continue; + } + auto root = ett_node(i).get_root(); + roots.insert(root); + } + return roots.size(); + } + size_t space_usage_bytes() { + size_t total = 0; + if constexpr (std::is_same_v>>) { + total += sizeof(EulerTourNode) * ett_nodes.capacity(); + } else { + size_t num_buckets = ett_nodes.bucket_count(); + total += sizeof(std::pair>*) * num_buckets; + } + std::unordered_set*> roots; + for (node_id_t i = 0; i < ett_nodes.size(); ++i) { + if constexpr (!std::is_same_v>>) { + if (ett_nodes.find(i) == ett_nodes.end()) { + continue; + } + } + SkipListNode* root = ett_node(i).get_root(); + roots.insert(root); + } + for (SkipListNode* root : roots) { + total += root->compute_space_usage(); + } + return total; + } }; + diff --git a/include/graph_tiers.h b/include/graph_tiers.h index 2d71859..8b627ec 100644 --- a/include/graph_tiers.h +++ b/include/graph_tiers.h @@ -6,6 +6,7 @@ #include "euler_tour_tree.h" #include "link_cut_tree.h" +// #include "lct_v2.h" // Global variables for performance testing @@ -24,16 +25,18 @@ extern std::atomic num_sketch_batches; // maintains the tiers of the algorithm // and the spanning forest of the entire graph +template requires(SketchColumnConcept) class GraphTiers { - FRIEND_TEST(GraphTiersSuite, mini_correctness_test); + // FRIEND_TEST(GraphTiersSuite, mini_correctness_test); private: - std::vector ett; // one ETT for each tier - std::vector root_nodes; - LinkCutTree link_cut_tree; - void refresh(GraphUpdate update); + std::vector> ett; // one ETT for each tier + std::vector*> root_nodes; + // LinkCutTreeMaxAgg link_cut_tree; + LinkCutTree<> link_cut_tree; + void refresh(GraphUpdate update, bool did_cut); public: - GraphTiers(node_id_t num_nodes); + GraphTiers(node_id_t num_nodes, uint64_t seed); ~GraphTiers(); // apply an edge update diff --git a/include/lct_v2.h b/include/lct_v2.h new file mode 100644 index 0000000..fca4dc1 --- /dev/null +++ b/include/lct_v2.h @@ -0,0 +1,381 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "types.h" +#include "util.h" + +#include + + +template +class NodeMaxLCT { + public: + using NodePair = std::pair*, NodeMaxLCT*>; + // static_assert(std::is_integral_v, "WeightT must be an integral type"); + + NodeMaxLCT(node_id_t node_id); + + void link(NodeMaxLCT* child, WeightT weight); + void cut(NodeMaxLCT* neighbor); + void evert(); // reroot + node_id_t get_node_id() const { return node_id; }; + NodeMaxLCT* get_root(); + std::pair path_query(NodeMaxLCT* other); + + private: + + static constexpr WeightT sentinel() { + return std::numeric_limits::lowest(); + } + + NodeMaxLCT* par; // parent + NodeMaxLCT* c[2]; // children + WeightT w[2]; // store the weights of the up and down preferred edges + WeightT max; // maintain the maximum edge weight in the splay tree subtree rooted at this + node_id_t node_id; + bool head; // whether the node is a head of a path, so don't use value w[0] + bool flip; // whether children are reversed; used for evert() + + NodeMaxLCT* get_real_par(); + NodeMaxLCT* get_leftmost(); + NodeMaxLCT* get_predecessor(); + NodeMaxLCT* get_successor(); + NodePair get_edge_with_weight(WeightT weight); + void rot(); + void splay(); + NodeMaxLCT* expose(); + void fix_c(); + void recompute_max(); + void push_flip(); + + }; + + +template < +typename WeightT, +typename Container = absl::flat_hash_map*> +// typename Container = std::vector> +> +class LinkCutTreeMaxAgg { + public: + static_assert(std::is_integral_v, "WeightT must be an integral type"); + + explicit LinkCutTreeMaxAgg(int _num_verts); + LinkCutTreeMaxAgg(node_id_t n) : LinkCutTreeMaxAgg(static_cast(n)) {} + ~LinkCutTreeMaxAgg(); + + void link(node_id_t u, node_id_t v, WeightT weight = WeightT{}); + void link(node_id_t u, node_id_t v, std::pair weight) { link(u, v, weight.second); } + void cut(node_id_t u, node_id_t v); + bool connected(node_id_t u, node_id_t v); + std::pair path_query(node_id_t u, node_id_t v); + size_t space_usage_bytes() const; + private: + Container verts; + size_t num_verts; + NodeMaxLCT& vert(node_id_t id) { + if constexpr (std::is_same_v>>) { + assert(id < verts.size()); + return verts[id]; + } else { + assert(verts.find(id) != verts.end()); + return *verts[id]; + } + } + NodeMaxLCT* vert_ptr(node_id_t id) { + if constexpr (std::is_same_v>>) { + assert(id < verts.size()); + return &verts[id]; + } else { + assert(verts.find(id) != verts.end()); + return verts[id]; + } + } +public: + void initialize_node(node_id_t v) { + // no-op with vector implementation + if constexpr (!std::is_same_v>>) { + assert(verts.find(v) == verts.end()); + verts[v] = new NodeMaxLCT(v); + } + } + void uninitialize_node(node_id_t v) { + // no-op with vector implementation + if constexpr (!std::is_same_v>>) { + assert(verts.find(v) != verts.end()); + delete verts[v]; + } + } + void initialize_all_nodes() { + for (node_id_t i = 0; i < num_verts; ++i) { + initialize_node(i); + } + } + void initialize_all_nodes(node_id_t until) { + assert(until <= num_verts); + for (node_id_t i = 0; i < until; ++i) { + initialize_node(i); + } + } +}; + +template +NodeMaxLCT::NodeMaxLCT(node_id_t node_id) : par(nullptr), c{nullptr, nullptr}, w{sentinel(), sentinel()}, + max(sentinel()), node_id(node_id), head(true), flip(false) {} + +template +NodeMaxLCT* NodeMaxLCT::get_real_par() { + return par != nullptr && this != par->c[0] && this != par->c[1] ? nullptr : par; + } + +template +NodeMaxLCT* NodeMaxLCT::get_leftmost() { + NodeMaxLCT* left = this; + push_flip(); + while (left->c[0] != nullptr) { + left = left->c[0]; + left->push_flip(); + } + left->splay(); + return left; + } + +template +NodeMaxLCT* NodeMaxLCT::get_predecessor() { + push_flip(); + NodeMaxLCT* curr = c[0]; + curr->push_flip(); + while (curr->c[1] != nullptr) { + curr = curr->c[1]; + curr->push_flip(); + } + curr->splay(); + return curr; + } + +template +NodeMaxLCT* NodeMaxLCT::get_successor() { + push_flip(); + NodeMaxLCT* curr = c[1]; + curr->push_flip(); + while (curr->c[0] != nullptr) { + curr = curr->c[0]; + curr->push_flip(); + } + curr->splay(); + return curr; + } + +template +typename NodeMaxLCT::NodePair NodeMaxLCT::get_edge_with_weight(WeightT weight) { + NodeMaxLCT* node = this; + while (node->w[0] != weight && node->w[1] != weight) { + for (int i = 0; i < 2; i++) + if (node->c[i] != nullptr && node->c[i]->max == weight) + node = node->c[i]; + } + node->splay(); + if (node->w[0] == weight) + return {node, node->get_predecessor()}; + return {node, node->get_successor()}; + } + + +template +void NodeMaxLCT::fix_c() { + for (int i = 0; i < 2; i++) + if (c[i] != nullptr) + c[i]->par = this; + } + +template +void NodeMaxLCT::recompute_max() { + max = head ? w[1]: std::max(w[0], w[1]); + for (int i = 0; i < 2; i++) + if (c[i] != nullptr) + max = std::max(max, c[i]->max); + } + +template +void NodeMaxLCT::push_flip() { + if (flip) { + flip = false; + std::swap(c[0], c[1]); + std::swap(w[0], w[1]); + for (int i = 0; i < 2; i++) + if (c[i] != nullptr) + c[i]->flip = !c[i]->flip; + } + } + +template +void NodeMaxLCT::rot() { // rotate v towards its parent; v must have real parent + NodeMaxLCT* p = get_real_par(); + par = p->par; + if (par != nullptr) + for (int i = 0; i < 2; i++) + if (par->c[i] == p) { + par->c[i] = this; + par->fix_c(); + } + const bool rot_dir = this == p->c[0]; + p->c[!rot_dir] = c[rot_dir]; + c[rot_dir] = p; + p->fix_c(); + p->recompute_max(); + fix_c(); + recompute_max(); + } + +template +void NodeMaxLCT::splay() { + NodeMaxLCT* p, * gp; + push_flip(); // guarantee flip bit isn't set after calling splay() + while ((p = get_real_par()) != nullptr) { + gp = p->get_real_par(); + if (gp != nullptr) + gp->push_flip(); + p->push_flip(); + push_flip(); + if (gp != nullptr) + ((gp->c[0] == p) == (p->c[0] == this) ? p : this)->rot(); + rot(); + } + } + +// returns the root of the tree +template +NodeMaxLCT* NodeMaxLCT::expose() { + NodeMaxLCT* curr = this; + NodeMaxLCT* prev = nullptr; + while (curr) { + curr->splay(); + NodeMaxLCT* lower = curr->c[1]; + curr->c[1] = prev; + curr->w[1] = sentinel(); + if (prev) { + curr->w[1] = prev->w[0]; + prev->head = false; + prev->recompute_max(); + } + curr->recompute_max(); + if (lower) { + NodeMaxLCT* left = lower->get_leftmost(); + left->head = true; + left->recompute_max(); + } + prev = curr->get_leftmost(); + curr = prev->par; + } + return prev; + } + +template +void NodeMaxLCT::evert() { + NodeMaxLCT* head_node = expose(); + head_node->flip = !head_node->flip; + head_node->push_flip(); + } + +template +NodeMaxLCT* NodeMaxLCT::get_root() { + return expose(); + } + +template +auto NodeMaxLCT::path_query(NodeMaxLCT* other) -> std::pair*, NodeMaxLCT*>, WeightT> { + evert(); + other->expose(); + std::pair*, NodeMaxLCT*>, WeightT> max_edge; + max_edge.first = get_edge_with_weight(max); + max_edge.second = max; + return max_edge; + } + +template +void NodeMaxLCT::cut(NodeMaxLCT* neighbor) { + neighbor->evert(); + evert(); + neighbor->push_flip(); + push_flip(); + neighbor->c[0] = nullptr; + neighbor->w[0] = sentinel(); + neighbor->recompute_max(); + par = nullptr; + w[1] = sentinel(); + recompute_max(); + } + +template +void NodeMaxLCT::link(NodeMaxLCT* child, WeightT weight) { + child->evert(); + child->splay(); + child->par = this; + child->w[0] = weight; + child->head = true; + } + +template +LinkCutTreeMaxAgg::LinkCutTreeMaxAgg(int _num_verts) : num_verts(static_cast(_num_verts)) { + if constexpr (std::is_same_v>>) { + verts.resize(static_cast(_num_verts), NodeMaxLCT(0)); + for (node_id_t i = 0; i < num_verts; ++i) { + verts[static_cast(i)] = NodeMaxLCT(i); + } + } +} + +template +LinkCutTreeMaxAgg::~LinkCutTreeMaxAgg() { + if constexpr (std::is_same_v>>) { + verts.clear(); + } +} + +template +void LinkCutTreeMaxAgg::link(node_id_t u, node_id_t v, WeightT weight) { + const auto u_idx = static_cast(u); + const auto v_idx = static_cast(v); + vert(u_idx).link(vert_ptr(v_idx), weight); +} + +template +void LinkCutTreeMaxAgg::cut(node_id_t u, node_id_t v) { + const auto u_idx = static_cast(u); + const auto v_idx = static_cast(v); + vert(u_idx).cut(vert_ptr(v_idx)); +} + +template +bool LinkCutTreeMaxAgg::connected(node_id_t u, node_id_t v) { + const auto u_idx = static_cast(u); + const auto v_idx = static_cast(v); + return vert(u_idx).get_root() == vert(v_idx).get_root(); +} + +template +std::pair LinkCutTreeMaxAgg::path_query(node_id_t u, node_id_t v) { + const auto u_idx = static_cast(u); + const auto v_idx = static_cast(v); + auto pointer_edge = vert(u_idx).path_query(vert_ptr(v_idx)); + std::pair edge; + edge.first.src = static_cast(pointer_edge.first.first->get_node_id()); + edge.first.dst = static_cast(pointer_edge.first.second->get_node_id()); + edge.second = pointer_edge.second; + return edge; +} + +template +size_t LinkCutTreeMaxAgg::space_usage_bytes() const{ + size_t max_space = sizeof(LinkCutTreeMaxAgg) + (num_verts * (sizeof(NodeMaxLCT*) + sizeof(NodeMaxLCT))); + return max_space; +} + + +template class LinkCutTreeMaxAgg; \ No newline at end of file diff --git a/include/link_cut_tree.h b/include/link_cut_tree.h index bf4356a..a89e11e 100644 --- a/include/link_cut_tree.h +++ b/include/link_cut_tree.h @@ -1,12 +1,15 @@ #pragma once #include +#include #include "types.h" #include "util.h" +#include + #define MAX_UINT64 (std::numeric_limits::max()) -class LinkCutTree; -class SplayTree; +// class LinkCutTree<>; +// class SplayTree; class LinkCutNode { FRIEND_TEST(LinkCutTreeSuite, random_links_and_cuts); @@ -36,6 +39,10 @@ class LinkCutNode { void rotate_up(); public: + // delete copy constructor and assignment operator + // LinkCutNode(const LinkCutNode&) = delete; + // LinkCutNode& operator=(const LinkCutNode&) = delete; + LinkCutNode* splay(); void link_left(LinkCutNode* left); @@ -70,12 +77,16 @@ class LinkCutNode { bool get_reversed(); }; +template < +// typename Container = std::vector> +typename Container = absl::flat_hash_map> class LinkCutTree { FRIEND_TEST(LinkCutTreeSuite, join_split_test); FRIEND_TEST(LinkCutTreeSuite, expose_simple_test); FRIEND_TEST(LinkCutTreeSuite, random_links_and_cuts); - std::vector nodes; + Container nodes; + node_id_t max_nodes; // Concatenate the paths with aux trees rooted at v and w and return the root of the combined aux tree LinkCutNode* join(LinkCutNode* v, LinkCutNode* w); @@ -104,4 +115,60 @@ class LinkCutTree { // Query for the CC algorithm std::vector> get_cc(); + + LinkCutNode& node(node_id_t u) { + if constexpr (std::is_same_v>) { + assert(u < nodes.size()); + return nodes[u]; + } else { + assert(nodes.find(u) != nodes.end()); + return *nodes[u]; + } + } + + LinkCutNode* get_node_ptr(node_id_t u) { + if constexpr (std::is_same_v>) { + assert(u < nodes.size()); + return &nodes[u]; + } else { + assert(nodes.find(u) != nodes.end()); + return nodes[u]; + } + } + + void initialize_node(node_id_t u) { + // no-op with vector implementation + if constexpr (!std::is_same_v>) { + nodes[u] = new LinkCutNode(); + } + }; + void uninitialize_node(node_id_t u) { + // no-op with vector implementation + if constexpr (!std::is_same_v>) { + assert(nodes.find(u) != nodes.end()); + delete nodes[u]; + } + }; + + void initialize_all_nodes() { + for (node_id_t i = 0; i < max_nodes; ++i) { + initialize_node(i); + } + }; + + void initialize_all_nodes(node_id_t upto) { + for (node_id_t i = 0; i < upto; ++i) { + initialize_node(i); + } + }; + + bool is_initialized(node_id_t u) { + // no-op with vector implementation + if constexpr (std::is_same_v>) { + return true; + } else { + return nodes.find(u) != nodes.end(); + } + }; + }; diff --git a/include/mpi_hybrid_conn.h b/include/mpi_hybrid_conn.h new file mode 100644 index 0000000..5af1d0c --- /dev/null +++ b/include/mpi_hybrid_conn.h @@ -0,0 +1,559 @@ +#include "mpi_nodes.h" +#include "graph_tiers.h" +#include +#include "recovery.h" + +template +concept DynamicSketchConcept = requires(T t) { + { t.process_all_updates()} -> std::same_as; + { t.initialize_node( std::declval() ) } -> std::same_as; + { t.uninitialize_node( std::declval() ) } -> std::same_as; + { t.initialize_all_nodes() } -> std::same_as; + { t.get_transaction_log() } -> std::same_as&>; + { t.update( std::declval() ) } -> std::same_as; + { t.space_usage_bytes() } -> std::same_as; +}; + +template requires(DynamicSketchConcept) +class HybridConnectivityManager { + // TODO + public: + // TODO - make this not public + SketchAlgoClass sketching_algo; + SCCWN<> cf_algo; + + void set_threshold(size_t threshold) { + // TODO - do this in an aesthetically better way lol. + DENSE_THRESHOLD = threshold; + } + node_id_t sketched_node_count() const { + return this->recovery_sketches.size(); + } + private: + // TODO - this aint a great way + size_t MOVE_TO_SKETCH = 40; + size_t DENSE_THRESHOLD = 2000; + // size_t MOVE_TO_SKETCH = 1000000; + + size_t seed; + node_id_t num_nodes; + // GraphTiers sketching_algo; + // TODO - move semantics for sparserecovery? + absl::flat_hash_map recovery_sketches; + + + // tracks which of our CF edges are from the sketching algo + absl::flat_hash_set edges_from_sketch; + + // tracks how many dense edges are still in the CF + // generate plot with varying batch size + // keeping a global buffer is likely sufficient + // doing vertex-level might make checkpointing harder - think about this + std::vector num_pending_dense_edges; + + // tracks total amount of edges incident. + // we WONT rely on the CF to track edges. + std::vector num_edges; + std::vector num_cf_edges; + + size_t total_num_edges = 0; + size_t total_sketched_edges = 0; + + // buffer for when we need to collect all neighbors + std::vector _neighbors_buffer; + + // non-tree deletion buffer + std::vector non_tree_deletion_buffer; + + // TODO - this might be replaced by something internal to modified-cupcake + // can also just be a vector probably + absl::flat_hash_set _is_vertex_sketched; + + + size_t count_explicit_neighbors(node_id_t vertex) { + // std::cout << "count_explicit_neighbors for vertex: " << vertex << std::endl; + localTree *cf_leaf = cf_algo.leaves[vertex]; + size_t count = 0; + for (auto &level_edges: cf_leaf->vertex->E) { + count += level_edges.second->size(); + } + // if (num_cf_edges[vertex] > 1000) { + // if (count > 1000) { + // std::cout << "THIS IS WAY TOO HIGH: " << num_cf_edges[vertex] << std::endl; + // std::cout << " counted as" << count << std::endl; + // std::cout << " total degree: " << num_edges[vertex] << std::endl; + // std::cout << " num pending dense edges: " << num_pending_dense_edges[vertex] << std::endl; + // // std::cout << "count_explicit_neighbors for vertex: " << vertex << " returning cached value: " << num_cf_edges[vertex] << std::endl; + // // return cf_algo.leaves[vertex]->getEdgeLevelCount(); + // // return cf_algo.leaves[vertex]->getEdgeLevelCount(); + // } + return num_cf_edges[vertex]; + // return count; + } + + inline void insert_to_sketch(node_id_t u, node_id_t v) { + node_id_t src = std::min(u, v); + node_id_t dst = std::max(u, v); + sketching_algo.update(GraphUpdate{Edge{u, v}, INSERT}); + auto edge_id = concat_pairing_fn(u, v); + recovery_sketches[u]->update(edge_id); + recovery_sketches[v]->update(edge_id); + total_sketched_edges++; + } + inline void delete_from_sketch(node_id_t u, node_id_t v) { + node_id_t src = std::min(u, v); + node_id_t dst = std::max(u, v); + sketching_algo.update(GraphUpdate{Edge{u, v}, DELETE}); + auto edge_id = concat_pairing_fn(u, v); + recovery_sketches[u]->update(edge_id); + recovery_sketches[v]->update(edge_id); + total_sketched_edges--; + } + + bool is_forest_edge_from_sketch(Edge edge) { + // TODO - watch out for performance penalty of this. + // might be a reason to use an alternate scheme + return edges_from_sketch.find(concat_pairing_fn(edge.src, edge.dst)) != edges_from_sketch.end(); + } + + bool is_edge_in_cf(Edge edge) { + // TODO - watch out for performance penalty of this. + // might be a reason to use an alternate scheme + // return cf_algo.leaves[edge.src]->getEdgeLevel(edge.dst) != MAX_LEVEL + 2; + // auto ret = cf_algo.leaves[edge.src]->getEdgeLevel(edge.dst) <= MAX_LEVEL; + return cf_algo.leaves[edge.src]->getEdgeLevel(edge.dst) <= MAX_LEVEL; + } + + bool is_vertex_sketched(node_id_t vertex) { + return _is_vertex_sketched.find(vertex) != _is_vertex_sketched.end(); + } + + void initialize_vertex_sketch(node_id_t vertex) { + // std::cout << "Initializing sketch for vertex " << vertex << std::endl << " with neighbors count " + // << count_explicit_neighbors(vertex) << std::endl; + // TODO - is basically a no-op from the perspective of the sketching algo + if (is_vertex_sketched(vertex)) { + return; + } + sketching_algo.initialize_node(vertex); + _is_vertex_sketched.insert(vertex); + // TODO - realistically, we dont need THAT many recovery sketches + // i think 5 sample should be enough? + double cleanup_adjustment_factor = 5.0 / (log2(num_nodes)); + // double cleanup_adjustment_factor = 1.0; + recovery_sketches[vertex] = new SparseRecovery(num_nodes, 128, cleanup_adjustment_factor, seed); + + // update your neighbors' dense edge counts + for (auto &level_edges: cf_algo.leaves[vertex]->vertex->E) { + for (node_id_t neighbor: *level_edges.second) { + if (is_vertex_sketched(neighbor)) { + num_pending_dense_edges[neighbor]++; + } + } + } + // for (size_t level=0; level < MAX_LEVEL; level++) { + // auto edge_set = localTree::getEdgeSet(cf_algo.leaves[vertex], level); + // if (edge_set) { + // for (node_id_t neighbor: *edge_set) { + // if (is_vertex_sketched(neighbor)) { + // num_pending_dense_edges[neighbor]++; + // } + // } + // } + // } + } + + void uninitialize_vertex_sketch(node_id_t vertex) { + // WEIRD CASE - even though this doesnt put the edges into the CF from the sketch, + // it takes responsibility of updating dense edge counts. + // WHICH MEANS - it's gonna remove a pending dense edge that was NEVER counted. + // unless unintiialize is called before flushing + // std::cout << "Uninitializing sketch for vertex " << vertex << std::endl; + unlikely_if (!is_vertex_sketched(vertex)) { + return; + } + _is_vertex_sketched.erase(vertex); + // TODO - for now, the cleanup sketch isnt deleted by destructing + delete recovery_sketches[vertex]->cleanup_sketch; + delete recovery_sketches[vertex]; + recovery_sketches.erase(vertex); + // std::cout << "Uninitialized sketch for vertex " << vertex << std::endl; + + //update your neighbors' dense edge counts + for (auto &level_edges: cf_algo.leaves[vertex]->vertex->E) { + for (node_id_t neighbor: *level_edges.second) { + // note that we do this for EVERY edge in the CF + // EXCEPT for the ones that are because of the sketching algo + if (!is_forest_edge_from_sketch(Edge{vertex, neighbor})) { + num_pending_dense_edges[neighbor]--; + } + } + } + sketching_algo.uninitialize_node(vertex); + } + + void flush_transaction_log() { + // std::cout << "Flushing transaction log of size: " << sketching_algo.get_transaction_log().size() << std::endl; + // TODO - maybe get rid of this line, but rn we need it for correctness potentially: + // sketching_algo.process_all_updates(); + for (auto &update: sketching_algo.get_transaction_log()) { + if (update.type == DELETE) { + remove_from_cf(update.edge.src, update.edge.dst); + edges_from_sketch.erase(concat_pairing_fn(update.edge.src, update.edge.dst)); + } + else { + insert_to_cf(update.edge.src, update.edge.dst); + edges_from_sketch.insert(concat_pairing_fn(update.edge.src, update.edge.dst)); + } + } + sketching_algo.flush_transaction_log(); + } + + public: + HybridConnectivityManager(node_id_t num_nodes, uint32_t num_tiers, int batch_size, size_t seed) + : num_nodes(num_nodes), sketching_algo(num_nodes, num_tiers, batch_size, seed), cf_algo(num_nodes), seed(seed) { + num_pending_dense_edges.resize(num_nodes, 0); + num_cf_edges.resize(num_nodes, 0); + num_edges.resize(num_nodes, 0); + } + + ~HybridConnectivityManager() {} + void flush_edges_to_sketch(node_id_t vertex_to_flush) { + // 1) find all edges incident to vertex_to_flush AND to a dense edge + _neighbors_buffer.clear(); + for (auto &level_edges: cf_algo.leaves[vertex_to_flush]->vertex->E) { + for (node_id_t neighbor: *level_edges.second) { + // TODO - double check if this is the right way to do this + if (is_vertex_sketched(neighbor) && !is_forest_edge_from_sketch(Edge{vertex_to_flush, neighbor})) { + // if the edge is not from the sketching algo, and it's connected to a dense vertex + // add it to the buffer and + // and increment the pending dense edge count + _neighbors_buffer.push_back(neighbor); + } + } + } + + // remove duplicates + // std::sort(_neighbors_buffer.begin(), _neighbors_buffer.end()); + // auto last = std::unique(_neighbors_buffer.begin(), _neighbors_buffer.end()); + // _neighbors_buffer.resize(std::distance(_neighbors_buffer.begin(), last)); + // reason for separate loops: see if improvements can be had from figuring out + // a bulk insertion strategy + + // 2) increment their pending_dense_edge counts (but don't flush them yourself) + // (since this vertex is about to densify) + // for (node_id_t neighbor: _neighbors_buffer) { + // num_pending_dense_edges[neighbor]++; + // } + // remove edges from the cluster forest + // NO longer doing step 2 since we initialized elsewhere + for (node_id_t neighbor: _neighbors_buffer) { + remove_from_cf(vertex_to_flush, neighbor); + } + + // 3) insert them into the sketching algo + // AND the recovery sketches + for (node_id_t neighbor: _neighbors_buffer) { + if (neighbor != vertex_to_flush) { + insert_to_sketch(vertex_to_flush, neighbor); + } + } + // clear pending_num_dense_edges for this vertex + num_pending_dense_edges[vertex_to_flush] = 0; + // apply the transaction log + // flush_transaction_log(); + // TODO - just do this in reads for now. + // we should think about this + + } + + bool check_and_perform_recovery(node_id_t vertex) { + // TODO - there is still a bug with updating pending dense edges + return false; + /* + Assumes the vertex is sketched + Checks if the recovery sketch is sufficiently sparse + If so, performs a recovery attempt + */ + // or use the explicit degree because of well-formed stream assumption + if (!is_vertex_sketched(vertex)) { + return false; + } + // std::cout << "Checking recovery for vertex " << vertex << std::endl; + // std::cout << "num edges for vertex " << vertex << " is " << num_edges[vertex] << std::endl; + likely_if (num_edges[vertex] > MOVE_TO_SKETCH / 4) { + return false; + } + // likely_if (!recovery_sketches[vertex]->worth_recovery_attempt()) { + // return false; + // } + auto recovery_attempt = recovery_sketches[vertex]->recover(); + unlikely_if (recovery_attempt.result == FAILURE) { + // TODO - handle failure case + return false; + } + // std::cout << "RECOVERY SUCCEEDED YA HURD" << std::endl; + // std::cout << "edge count for vertex " << vertex << " is " << num_edges[vertex] << std::endl; + // std::cout << "edge count in cf for vertex " << vertex << " is " << num_cf_edges[vertex] << std::endl; + // std::cout << "recovered: " << recovery_attempt.recovered_indices.size() << std::endl; + // then remove the edge from neighbors' recovery structures + for (vec_t &vec: recovery_attempt.recovered_indices) { + Edge edge = inv_concat_pairing_fn(vec); + node_id_t other_vertex = edge.src == vertex ? edge.dst : edge.src; + recovery_sketches[other_vertex]->update(vec); + } + // and flush the edges out of the sketching algo + for (vec_t &vec: recovery_attempt.recovered_indices) { + Edge edge = inv_concat_pairing_fn(vec); + sketching_algo.update(GraphUpdate{edge, DELETE}); + } + // before we flush the transaction log - uninitialize + // this has to happen here by current designs, since we only want to decrement + // pending_dense_edges for edges that WERE NOT already part of the recovery process + // std::cout << "Spooky: Uninitializing sketch for vertex " << vertex << std::endl; + uninitialize_vertex_sketch(vertex); + + // and apply the transaction log + flush_transaction_log(); + // and add the edges back to the cluster forest + // NOTE - WE KNOW THAT none of the edges are already in the cluster forest + // this is because we applied the transaction log, so any edges in the forest that + // came for a sketch forest were removed. + // TODO - it might be worth thinking about this and optimizing + // i.e. if we just apply the transaction log, we might delete an edge from the cf, + // and then put it right back here later. + // NOTE - THERE MIGHT BE DOUBLE-DIPPED EDGES + // + for (vec_t &vec: recovery_attempt.recovered_indices) { + Edge edge = inv_concat_pairing_fn(vec); + insert_to_cf(edge.src, edge.dst); + } + return true; + } + + inline void insert_to_cf(node_id_t src, node_id_t dst) { + cf_algo.insert(src, dst); + num_cf_edges[src]++; + num_cf_edges[dst]++; + } + inline void remove_from_cf(node_id_t src, node_id_t dst) { + cf_algo.remove(src, dst); + num_cf_edges[src]--; + num_cf_edges[dst]--; + } + + void update(GraphUpdate update) { + // external garauntee: well-formed stream. a remove is only called if the edge exists + // would be nice to get rid of assumption + if (update.edge.src == update.edge.dst) { + // no self-loops + std::cout << "WARNING: self-loop detected on vertex " << update.edge.src << std::endl; + return; + } + if (update.edge.src > update.edge.dst) { + std::swap(update.edge.src, update.edge.dst); + } + if (update.type == INSERT) { + num_edges[update.edge.src]++; + num_edges[update.edge.dst]++; + total_num_edges++; + + // if both endpoints are sketched AND the endpoints are connected in the cf + // we can shortcut and just insert into the sketching algo + if (is_vertex_sketched(update.edge.src) && is_vertex_sketched(update.edge.dst)) { + if (cf_algo.is_connected(update.edge.src, update.edge.dst)) { + // std::cout << "Inserting edge from sketching algo: " << update.edge.src << ", "<< update.edge.dst << std::endl; + insert_to_sketch(update.edge.src, update.edge.dst); + return; + } + } + + insert_to_cf(update.edge.src, update.edge.dst); + + // update num_pending_dense_edges to reflect the edge + // being inserted + if (is_vertex_sketched(update.edge.src)) { + num_pending_dense_edges[update.edge.dst]++; + } + if (is_vertex_sketched(update.edge.dst)) { + num_pending_dense_edges[update.edge.src]++; + } + // i.e. the state of this should be correct BEFORE we + // potentially initialize the sketches below + + // check to see if we densified the vertices enough to initialize their sketches + unlikely_if (!is_vertex_sketched(update.edge.src) && num_edges[update.edge.src] >= DENSE_THRESHOLD) { + // these functions should be no-ops on dense edges + // std::cout << "neighbor count for " << update.edge.src << " is " << count_explicit_neighbors(update.edge.src) << std::endl; + initialize_vertex_sketch(update.edge.src); + flush_edges_to_sketch(update.edge.src); + + } + unlikely_if (!is_vertex_sketched(update.edge.dst) && num_edges[update.edge.dst] >= DENSE_THRESHOLD) { + // std::cout << "neighbor count for " << update.edge.dst << " is " << count_explicit_neighbors(update.edge.dst) << std::endl; + initialize_vertex_sketch(update.edge.dst); + flush_edges_to_sketch(update.edge.dst); + } + + // logic for updating pending dense edge counts + potentially flushing out + // dense edges to the sketching structure + if (is_vertex_sketched(update.edge.dst)) { + if (num_pending_dense_edges[update.edge.dst] >= MOVE_TO_SKETCH) { + flush_edges_to_sketch(update.edge.dst); + } + } + if (is_vertex_sketched(update.edge.src)) { + if (num_pending_dense_edges[update.edge.src] >= MOVE_TO_SKETCH) { + flush_edges_to_sketch(update.edge.src); + } + } + } + else if (update.type == DELETE) { + num_edges[update.edge.src]--; + num_edges[update.edge.dst]--; + total_num_edges--; + + // TODO - eventually do more precise casework + // if edge exists in the CF (1): + // * a) edge originally comes from the sketch forest: update the sketch algo; apply transaction log + // * b) edge originally comes from the CF: remove it from the CF and you're done. + + // if not in cluster forest (2): + // TODO - this logic should check the cf for which edges exist in it + // if (cf_edges[update.edge.src].find(update.edge.dst) != cf_edges[update.edge.src].end()) { + // if (cf_algo.has_edge(update.edge.src, update.edge.dst)) { + if (this->is_edge_in_cf(update.edge)) { + edge_id_t edge_id = concat_pairing_fn(update.edge.src, update.edge.dst); + // if edge comes from sketching algo: + if (edges_from_sketch.find(edge_id) != edges_from_sketch.end()) { + // std::cout << "Connectivity edge from sketching algo: " << update.edge.src << ", "<< update.edge.dst << std::endl; + // case a) + // deleting from sketching algo + delete_from_sketch(update.edge.src, update.edge.dst); + // TODO - can we be lazier about this? + sketching_algo.process_all_updates(); + flush_transaction_log(); + check_and_perform_recovery(update.edge.src); + check_and_perform_recovery(update.edge.dst); + // can we do defered work: yes + // do we have to: ??? figure out + } + else { + //case b) edge does not come from sketching algo + remove_from_cf(update.edge.src, update.edge.dst); + + // TODO - same logic is needed as above to DECREMENT pending dense edges + // in the sparse part, if this were the case. + + if (is_vertex_sketched(update.edge.dst)) + { + num_pending_dense_edges[update.edge.src]--; + } + + if (is_vertex_sketched(update.edge.src)) + { + num_pending_dense_edges[update.edge.dst]--; + } + } + } + // 2) edge does not exist in the CF: + // * it must be in the sketch algo, so update the sketch algo and apply transaction log. + else { + // we can buffer this deletion as long as: + // 1) we know the edge does not disconnect two components + + // non_tree_deletion_buffer.push_back(concat_pairing_fn(update.edge.src, update.edge.dst)); + if (non_tree_deletion_buffer.size() >= 100) { + // std::cout << "Flushing non-tree deletion buffer of size: " << non_tree_deletion_buffer.size() << std::endl; + for (edge_id_t edge_id: non_tree_deletion_buffer) { + Edge edge = inv_concat_pairing_fn(edge_id); + delete_from_sketch(edge.src, edge.dst); + check_and_perform_recovery(edge.src); + check_and_perform_recovery(edge.dst); + } + non_tree_deletion_buffer.clear(); + flush_transaction_log(); + } + // sketching_algo.update(update); + // delete_from_sketch(update.edge.src, update.edge.dst); + // TODO - verify that we don't need to flush transaction log + // flush_transaction_log(); + // check_and_perform_recovery(update.edge.src); + // check_and_perform_recovery(update.edge.dst); + } + // TODO - eventually implement a check to see if we need to remove + // one of the vertices from the sketch algo and dump the edges out. + } + } + + bool connectivity_query(node_id_t a, node_id_t b) { + sketching_algo.process_all_updates(); + flush_transaction_log(); + return cf_algo.is_connected(a, b); + } + + std::vector> cc_query() { + sketching_algo.process_all_updates(); + flush_transaction_log(); + // TODO - this aint great. + std::vector> ret; + std::unordered_map> component_map; + for (node_id_t i=0; i < num_nodes; i++) { + localTree *root = localTree::getRoot(cf_algo.leaves[i]); + uint64_t root_id = (uint64_t) root; + // std::cout << "root_id " << root_id << " for node " << i << std::endl; + auto it = component_map.find(root_id); + if (it == component_map.end()) { + component_map[root_id] = std::set(); + } + component_map[root_id].insert(i); + } + for (auto &pair: component_map) { + ret.push_back(pair.second); + } + return ret; + } + + size_t num_sketched_vertices() const { + return _is_vertex_sketched.size(); + } + size_t total_edges() const { + return total_num_edges; + } + size_t num_sketched_edges() const { + return total_sketched_edges; + } + + size_t get_space_usage_cf() { + return cf_algo.getMemUsage(); + } + size_t get_space_usage_driver() { + // get the space usage of the driver itself + size_t total = sizeof(*this); + + total += num_pending_dense_edges.capacity() * sizeof(uint16_t); + total += num_edges.capacity() * sizeof(uint32_t); + total += num_cf_edges.capacity() * sizeof(uint32_t); + + total += _neighbors_buffer.capacity() * sizeof(node_id_t); + total += non_tree_deletion_buffer.capacity() * sizeof(edge_id_t); + + total += _is_vertex_sketched.bucket_count() * sizeof(node_id_t); + total += edges_from_sketch.bucket_count() * sizeof(edge_id_t); + + + return total; + } + size_t space_usage_conn_sketch() { + return sketching_algo.space_usage_bytes(); + } + size_t space_usage_recovery_sketch() { + size_t total = 0; + for (auto &pair: recovery_sketches) { + total += pair.second->space_usage_bytes(); + } + total += recovery_sketches.bucket_count() * sizeof(std::pair); + return total; + } + +}; diff --git a/include/mpi_nodes.h b/include/mpi_nodes.h index 3800365..be5873b 100644 --- a/include/mpi_nodes.h +++ b/include/mpi_nodes.h @@ -6,12 +6,16 @@ #include "types.h" #include "euler_tour_tree.h" #include "sketchless_euler_tour_tree.h" -#include "link_cut_tree.h" +// #include "link_cut_tree.h" +#include "lct_v2.h" #include "mpi_functions.h" +#include "sketch/sketch_concept.h" +#include "sketch/sketch_columns.h" +#include "sketch_interfacing.h" enum TreeOperationType { - NOT_ISOLATED=0, ISOLATED=1, EMPTY, LINK, CUT, LCT_QUERY + NOT_ISOLATED=0, ISOLATED=1, EMPTY, LINK, CUT, LCT_QUERY, MAXIMIZED }; typedef struct { @@ -41,7 +45,7 @@ typedef struct { typedef struct { node_id_t v = 0; uint32_t prev_tier_size = 0; - SketchSample sketch_query_result; + SketchSample sketch_query_result; } RefreshEndpoint; typedef struct { @@ -56,9 +60,14 @@ typedef struct { class InputNode { node_id_t num_nodes; uint32_t num_tiers; - LinkCutTree link_cut_tree; - SketchlessEulerTourTree query_ett; + // LinkCutTree<> link_cut_tree; + LinkCutTreeMaxAgg link_cut_tree; + SketchlessEulerTourTree<> query_ett; UpdateMessage* update_buffer; + + std::vector transaction_log; + + int buffer_size; int buffer_capacity; int* split_revert_buffer; @@ -70,15 +79,42 @@ class InputNode { public: InputNode(node_id_t num_nodes, uint32_t num_tiers, int batch_size, int seed); ~InputNode(); + // TODO - in reality, the input node needs to communicate + // wihh its tier nodes to initialize data structures. + // in any hybrid tests, we're just gonna do this ahead of time. + void initialize_node(node_id_t u) { + query_ett.initialize_node(u); + link_cut_tree.initialize_node(u); + }; // no-op + void uninitialize_node(node_id_t u) { + query_ett.uninitialize_node(u); + link_cut_tree.uninitialize_node(u); + }; // no-op + void initialize_all_nodes() { + query_ett.initialize_all_nodes(num_nodes); + link_cut_tree.initialize_all_nodes(num_nodes); + }; // no-op void update(GraphUpdate update); void process_all_updates(); bool connectivity_query(node_id_t a, node_id_t b); std::vector> cc_query(); void end(); + + void flush_transaction_log() { + transaction_log.clear(); + }; + size_t space_usage_bytes() const { + return 0; // TODO - implement + } + + const std::vector& get_transaction_log() const { + return transaction_log; + } + }; class TierNode { - EulerTourTree ett; + EulerTourTree ett; uint32_t tier_num; uint32_t num_tiers; int batch_size; @@ -88,6 +124,18 @@ class TierNode { SampleResult* query_result_buffer; bool* split_revert_buffer; bool using_sliding_window = false; + void initialize_node(node_id_t u) { + ett.initialize_node(u); + }; + void uninitialize_node(node_id_t u) { + ett.uninitialize_node(u); + }; + void initialize_all_nodes(node_id_t max_num_nodes) { + ett.initialize_all_nodes(max_num_nodes); + }; + bool is_initialized(node_id_t u) { + return ett.is_initialized(u); + }; void update_tier(GraphUpdate update); void ett_update_tier(EttUpdateMessage message); void refresh_tier(RefreshMessage messsage); diff --git a/include/parlay_hash/BUILD b/include/parlay_hash/BUILD new file mode 100644 index 0000000..67b6627 --- /dev/null +++ b/include/parlay_hash/BUILD @@ -0,0 +1,22 @@ +cc_library( + name = "epoch", + hdrs = ["epoch.h"], + deps = [ + "@parlaylib//parlay:primitives", + ], +) + +cc_library( + name = "lock", + hdrs = ["lock.h"], +) + +cc_library( + name = "unordered_map", + hdrs = ["unordered_map.h"], + deps = [ + ":epoch", + ":lock", + ], + visibility = ["//visibility:public"], +) diff --git a/include/parlay_hash/bigatomic.h b/include/parlay_hash/bigatomic.h new file mode 100644 index 0000000..32e90f6 --- /dev/null +++ b/include/parlay_hash/bigatomic.h @@ -0,0 +1,95 @@ +// An implementation of big_atomic using a SeqLock. +// +// Supports: +// - Blocking loads (loads never obstruct each-other, but can be indefinitely blocked by writers) +// - Blocking stores +// - Blocking CAS +// +// No additional space usage +// + +#ifndef PARLAYATOMIC_H_ +#define PARLAYATOMIC_H_ + +#include +#include +#include +#include +#include + +namespace parlay { + +template> +struct alignas(32) big_atomic { + + using vtype = long; + using tag = vtype; + + std::atomic version; + V val; + + big_atomic(const V& v) : version(0), val(v) {} + big_atomic() : version(0) {} + + void store_sequential(const V& v) { val = v; } + + V load() { + while (true) { + vtype ver = version.load(std::memory_order_acquire); + V v = val; + std::atomic_thread_fence(std::memory_order_acquire); + if ((ver & 1) == 0 && version.load(std::memory_order_relaxed) == ver) return v; + } + } + + std::pair ll_speculative() { + vtype ver = version.load(std::memory_order_acquire); + V v = val; + std::atomic_thread_fence(std::memory_order_acquire); + return std::pair(v, ver); + } + + std::pair ll() { + while (true) { + int delay = 100; + vtype ver = version.load(std::memory_order_acquire); + V v = val; + std::atomic_thread_fence(std::memory_order_acquire); + if ((ver & 1) == 0 && version.load(std::memory_order_relaxed) == ver) + return std::pair(v,ver); + for (volatile int i = 0; i < delay; i++); + delay = std::min(2 * delay, 1000); + } + } + + bool lv(tag tg) { + return version.load() == tg; + } + + bool sc(tag expected_tag, const V& v) { + bool result = true; + int delay = 100; + while (true) { + vtype ver = version.load(); + if (ver != expected_tag) return false; + if (get_locks().try_lock((long)this, [&] { + if (version.load(std::memory_order_acquire) != expected_tag) + result = false; + else { + version.store(ver + 1, std::memory_order_relaxed); + std::atomic_thread_fence(std::memory_order_release); + val = v; + version.store(ver + 2, std::memory_order_release); + } + return true; + })) + return result; + for (volatile int i = 0; i < delay; i++); + delay = std::min(2 * delay, 2000); + } + } + +}; + +} // namespace parlay +#endif // PARLAYATOMIC_H_ diff --git a/include/parlay_hash/parallel.h b/include/parlay_hash/parallel.h new file mode 100644 index 0000000..0394791 --- /dev/null +++ b/include/parlay_hash/parallel.h @@ -0,0 +1,36 @@ +#ifdef USE_PARLAY +#include +#include +#include +namespace parlay { +#define PARLAY_USE_STD_ALLOC 1 + + using scheduler_type = internal::scheduler_type; + + template + long tabulate_reduce(long n, const F& f) { + return parlay::reduce(parlay::delayed::tabulate(n, [&] (size_t i) { + return f(i);})); + } +} +#else +namespace parlay { + + struct scheduler_type { + scheduler_type(int num_procs) {} + }; + + template + long tabulate_reduce(long n, const F& f) { + long r = 0; + for (long i=0; i < n; i++) + r += f(i); + return r; + } + + template + void parallel_for(long n, const F& f) { + for (long i=0; i < n; i++) f(i); + } +} +#endif diff --git a/include/parlay_hash/parlay_hash.h b/include/parlay_hash/parlay_hash.h new file mode 100644 index 0000000..31ede06 --- /dev/null +++ b/include/parlay_hash/parlay_hash.h @@ -0,0 +1,1181 @@ +#ifndef PARLAY_HASH_H_ +#define PARLAY_HASH_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "bigatomic.h" +#include "parallel.h" + +constexpr bool PrintGrow = false; + +namespace parlay { + +template +struct parlay_hash { + using Entry = typename Entries::Entry; + using K = typename Entry::Key; + + // ********************************************* + // Various parameters + // ********************************************* + + // set to grow by factor of 8 (2^3) + static constexpr int log_grow_factor = 2; + static constexpr int grow_factor = 1 << log_grow_factor; + + // groups of block_size buckets are copied over by a single thread + // the block size typically grows with size, but starts here + static constexpr long min_block_size = 4; + + // buffer_size is picked so state fits in a cache line (if it can) + static constexpr long buffer_size = (sizeof(Entry) > 24) ? 1 : 48 / sizeof(Entry); + + // log_2 of the expected number of entries in a bucket (<= buffer_size) + static constexpr long log_bucket_size = + (buffer_size == 1) ? 0 : ((buffer_size == 2) ? 1 : ((buffer_size <= 4) ? 2 : ((buffer_size <= 8) ? 3 : 3))); + + static long get_block_size(int num_bits) { + return num_bits < 16 ? 16 : 256; } + + // The size of a bucket that causes the table to grow, i.e. if any + // insert causes the bucket to reach the given size, then start + // growing. + // Technically this should be something like c log (n) / log(log n)) + // for a small constant c if each bucket is expected to hold 1 + // element, but.... each bucket can be expected to hold more than one. + static long get_overflow_size(int num_bits) { + if constexpr (log_bucket_size == 0) return num_bits < 18 ? 10 : 16; + else if constexpr (log_bucket_size == 1) return num_bits < 18 ? 11 : 18; + else if constexpr (log_bucket_size == 2) return num_bits < 18 ? 12 : 20; + else if constexpr (log_bucket_size == 3) return num_bits < 18 ? 14 : 22; + else return num_bits < 18 ? 20 : 24; + } + + // clear_at_end will cause the scheduler and epoch-based collector + // to clear their state on destruction + static constexpr bool default_clear_at_end = true; + bool clear_memory_and_scheduler_at_end; + + // a reference to the scheduler (null if not to be cleared) + parlay::scheduler_type* sched_ref; + + // ********************************************* + // The state structure for each bucket + // ********************************************* + + // for overflow lists for each bucket + struct link { + Entry entry; + link* next; + link(const Entry& entry, link* next) : entry(entry), next(next) { } + }; + + // for delayed reclamation of links using an epoch-based collector + epoch::memory_pool* link_pool; + + link* new_link(const Entry& entry, link* l) { + return link_pool->New(entry, l); } + void retire_link(link* l) { link_pool->Retire(l);} + + // Each bucket contains a "state", which consists of a fixed size + // buffer of entries (buffer_size) and an overflow list. The first + // buffer_size entries in the bucket are kept in the buffer, and any + // overflow goes to the list. The head stores both the pointer to + // the overflow list (lower 56 bits) and the number of elements in + // the buffer, or buffer_size+1 if overfull (top 8 bits). + struct state { + public: + size_t list_head; + Entry buffer[buffer_size]; + state() : list_head(0) {} + state(const Entry& e) : list_head(1ul << 48) { + buffer[0] = e; + } + static constexpr size_t forwarded_val = 1ul; + + size_t make_head(link* l, size_t bsize) { + return (((size_t) l) | (bsize << 48)); } + + // update overflow list with new ptr (assumes buffer is full) + state(const state& s, link* ptr) + : list_head(make_head(ptr, buffer_size + (ptr != nullptr))) { + for (int i=0; i < buffer_size; i++) + buffer[i] = s.buffer[i]; + } + + // add entry to the bucket state (in buffer if fits, otherwise at head of overflow list) + template + state(const state& s, Entry e, const NL& new_link) { + for (int i=0; i < std::min(s.buffer_cnt(), buffer_size); i++) + buffer[i] = s.buffer[i]; + if (s.buffer_cnt() < buffer_size) { + buffer[s.buffer_cnt()] = e; + list_head = make_head(nullptr, s.buffer_cnt() + 1); + } else { + link* l = new_link(e, s.overflow_list()); + list_head = make_head(l, buffer_size + 1); + } + } + + // add entry to buffer (assumes it fits) -- specialization of above + state(const state& s, Entry e) : list_head(make_head(nullptr, s.buffer_cnt() + 1)) { + for (int i=0; i < s.buffer_cnt(); i++) + buffer[i] = s.buffer[i]; + buffer[s.buffer_cnt()] = e; + } + + // remove buffer entry j, replace with first from overflow list (assumes there is overflow) + state(const state& s, link* ptr, int j) + : list_head(make_head(ptr->next, buffer_size + (ptr->next != nullptr))) { + for (int i=0; i < buffer_size; i++) + buffer[i] = s.buffer[i]; + buffer[j] = Entry{ptr->entry}; + } + + // remove buffer entry j, replace with last entry in buffer (assumes no overflow) + state(const state& s, int j) : list_head(make_head(nullptr, s.buffer_cnt() - 1)) { + if (s.overflow_list() != nullptr) abort(); + for (int i=0; i < s.buffer_cnt(); i++) + buffer[i] = s.buffer[i]; + buffer[j] = buffer[s.buffer_cnt() - 1]; + } + + state(bool x) : list_head(forwarded_val) {} + + bool is_forwarded() const {return list_head == forwarded_val ;} + + // number of entries in buffer, or buffer_size+1 if overflow + long buffer_cnt() const {return (list_head >> 48) & 255ul ;} + + // number of entries in bucket (includes those in the overflow list) + long size() const { + if (buffer_cnt() <= buffer_size) return buffer_cnt(); + return buffer_size + list_length(overflow_list()); + } + + // get the overflow list + link* overflow_list() const { + return (link*) (list_head & ((1ul << 48) - 1));} + }; + + // returns std::optional(f(entry)) for entry with given key + template + static auto find_in_list(const link* nxt, const K& k, const F& f) { + using rtype = typename std::invoke_result::type; + long cnt = 0; + while (nxt != nullptr && !nxt->entry.equal(k)) { + nxt = nxt->next; + cnt++; + } + if (nxt == nullptr) + return std::pair(std::optional(), cnt); + else + return std::pair(std::optional(f(nxt->entry)), 0l); + } + + // If k is found copies list elements up to k, and keeps the old + // tail past k. Returns the number of new nodes that will need to + // be reclaimed, the head of the new list, and the link that is removed. + // Returns [0, nullptr, nullptr] if k is not found + std::tuple remove_from_list(link* nxt, const K& k) { + if (nxt == nullptr) + return std::tuple(0, nullptr, nullptr); + else if (nxt->entry.equal(k)) + return std::tuple(1, nxt->next, nxt); + else { + auto [len, ptr, removed] = remove_from_list(nxt->next, k); + if (len == 0) return std::tuple(0, nullptr, nullptr); + return std::tuple(len + 1, new_link(nxt->entry, ptr), removed); + } + } + + // update element with a given key in a list. Uses path copying. + // Returns a triple consisting of the position of the key in the list (1 based), + // the head of the new list with the key updated, and the old link that is replaced. + // If the key is not found, nothing is done, the last two results are nullptr, and + // the first result is the length of the list. + template + std::tuple update_list(link* nxt, const K& k, const Constr& constr) { + if (nxt == nullptr) + return std::tuple(0, nullptr, nullptr); + else if (nxt->entry.equal(k)) + return std::tuple(1, link_pool->New(constr(std::optional(nxt->entry)), nxt->next), nxt); + else { + auto [len, ptr, updated] = update_list(nxt->next, k, constr); + if (ptr == nullptr) return std::tuple(len + 1, nullptr, nullptr); + return std::tuple(len + 1, link_pool->New(nxt->entry, ptr), updated); + } + } + + // retires first n elements of a list, but not the entries + void retire_list_n(link* nxt, int n) { + while (n > 0) { + n--; + link* tmp = nxt->next; + retire_link(nxt); + nxt = tmp; + } + } + + // Retires full list and their entries. Used when destructing the + // table. + void retire_list_all(link* nxt) { + while (nxt != nullptr) { + link* tmp = nxt->next; + entries_->retire_entry(nxt->entry); + retire_link(nxt); + nxt = tmp; + } + } + + // Retires full list, but not their entries. Used when copying to a + // new list during expansion, i.e. the entries will be in the new + // list and don't need to be retired. + void retire_list(link* nxt) { + while (nxt != nullptr) { + link* tmp = nxt->next; + retire_link(nxt); + nxt = tmp; + } + } + + static long list_length(link* nxt) { + long len = 0; + while (nxt != nullptr) { + len++; + nxt = nxt->next; + } + return len; + } + + // Find key if it is in the buffer. Return index. + int find_in_buffer(const state& s, const K& k) { + long len = s.buffer_cnt(); + for (long i = 0; i < std::min(len, buffer_size); i++) + if (s.buffer[i].equal(k)) + return i; + return -1; + } + + // Apply f to all entries in the state. + template + void static for_each_in_state(const state& s, const F& f) { + for (long i = 0; i < std::min(s.buffer_cnt(), buffer_size); i++) + f(s.buffer[i]); + link* l = s.overflow_list(); + while (l != nullptr) { + f(l->entry); + l = l->next; + } + } + + // Find entry with given key if in the bucket (state). Return + // optional of f applied to the entry if found, otherwise + // std::nullopt. + template + auto find_in_state(const state& s, const K& k, const F& f) + -> std::optional::type> + { + long len = s.buffer_cnt(); + for (long i = 0; i < std::min(len, buffer_size); i++) + if (s.buffer[i].equal(k)) + return std::optional(f(s.buffer[i])); + if (len <= buffer_size) return std::nullopt; + return find_in_list(s.overflow_list(), k, f).first; + } + + // A bucket is just an "atomic" state. + // a big_atomic is sort of like an std::atomic but supports + // load-linked, store-conditional, and is efficient when the x does + // not fit in a machine word. + using bckt = big_atomic; + + // used for load-linked, store-conditionals + using tag_type = typename big_atomic::tag; + + // wrapper to ensure alignment + struct alignas(64) bucket { bckt v; }; + + // initialize an uninitialized bucket + static void initialize(bucket& bck) { + new (&bck.v) big_atomic(state()); + } + + // ********************************************* + // The table structures + // Each version increases in size, by grow_factor + // ********************************************* + + // status of a block of buckets, used when initializing and when copying to a new version + enum status : char {Uninit, Initializing, Empty, Working, Done}; + + // A single version of the table. + // A version includes a sequence of "size" "buckets". + // New versions are added as the hash table grows, and each holds a + // pointer to the next larger version, if one exists. + struct table_version { + std::atomic next; // points to next version if created + std::atomic finished_block_count; //number of blocks finished copying + long num_bits; // log_2 of size + size_t size; // number of buckets + long block_size; // size of each block used for copying + int overflow_size; // size of bucket to trigger next expansion + bucket* buckets; // sequence of buckets + //sequence buckets; // sequence of buckets + std::atomic* block_status; // status of each block while copying + + // The index of a key is the highest num_bits of the lowest + // 48-bits of the hash value. Using the highest num_bits ensures + // that when growing, a bucket will go to grow_factor contiguous + // buckets in the next table. + long get_index(const K& k) { + size_t h = Entry::hash(k); + return (h >> (48 - num_bits)) & (size-1u);} + + bckt* get_bucket(const K& k) { + return &buckets[get_index(k)].v; } + + // initial table version, n indicating size + table_version(long n) + : next(nullptr), + finished_block_count(0), + num_bits(std::max((long) std::ceil(std::log2(min_block_size-1)), + (long) std::ceil(std::log2(1.5*n)) - log_bucket_size)), + size(1ul << num_bits), + block_size(num_bits < 10 ? min_block_size : get_block_size(num_bits)), + overflow_size(get_overflow_size(num_bits)) + { + //if (PrintGrow) std::cout << "initial size: " << size << std::endl; + buckets = (bucket*) malloc(sizeof(bucket)*size); + block_status = (std::atomic*) malloc(sizeof(std::atomic) * size/block_size); + parallel_for(size, [&] (long i) { initialize(buckets[i]);}); + parallel_for(size/block_size, [&] (long i) { block_status[i] = Empty;}); + } + + // expanded table version copied from smaller version t + table_version(table_version* t) + : next(nullptr), + finished_block_count(0), + num_bits(t->num_bits + log_grow_factor), + size(t->size * grow_factor), + block_size(get_block_size(num_bits)), + overflow_size(get_overflow_size(num_bits)) + { + buckets = (bucket*) malloc(sizeof(bucket)*size); + block_status = (std::atomic*) malloc(sizeof(std::atomic) * size/min_block_size); + } + + ~table_version() { + free(buckets); + free(block_status); + } + }; + + // the current table version + std::atomic current_table_version; + + // the initial table version, used for cleanup on destruction + table_version* initial_table_version; + + // ********************************************* + // Functions for expanding the table + // ********************************************* + + // Called when table should be expanded (i.e. when some bucket is too large). + // Allocates a new table version and links the old one to it. + void expand_table(table_version* ht) { + table_version* htt = current_table_version.load(); + if (htt->next == nullptr) { + long n = ht->size; + // if fail on lock, someone else is working on it, so skip + get_locks().try_lock((long) ht, [&] { + if (ht->next == nullptr) { + ht->next = new table_version(ht); + //if (PrintGrow) + // std::cout << "expand to: " << n * grow_factor << std::endl; + } + return true;}); + } + } + + // Copies a bucket into grow_factor new buckets. + void copy_bucket(table_version* t, table_version* next, long i) { + long exp_start = i * grow_factor; + // Clear grow_factor buckets in the next table version to put them in. + for (int j = exp_start; j < exp_start + grow_factor; j++) + initialize(next->buckets[j]); + // copy bucket to grow_factor new buckets in next table version + while (true) { + // the bucket to copy + auto [s, tag] = t->buckets[i].v.ll(); + + // insert into grow_factor buckets (states) for next larger table + state hold[grow_factor]; + size_t mask = grow_factor-1; + for_each_in_state(s, [&] (const Entry& entry) { + size_t idx = next->get_index(entry.get_key()) & mask; + hold[idx] = state(hold[idx], entry, + [&] (const Entry& e, link* l) {return new_link(e,l);}); + }); + + // now store the buckets into table + for (int j = 0; j < grow_factor; j++) + next->buckets[grow_factor * i + j].v.store_sequential(hold[j]); + + // try to replace original bucket with forwarded marker + if (t->buckets[i].v.sc(tag, state(true))) { + retire_list(s.overflow_list()); + break; + } + + // If the attempt failed then someone updated bucket in the meantime so need to retry. + // Before retrying need to clear out already added buckets. + for (int j = exp_start; j < exp_start + grow_factor; j++) { + state ss = next->buckets[j].v.load(); + retire_list(ss.overflow_list()); + next->buckets[j].v.store_sequential(state()); + } + } + } + + // If copying is ongoing (i.e., next is not null), and if the the + // hash bucket given by hashid is not already copied, tries to copy + // the block_size buckets that containing hashid to the next larger + // table version. + void copy_if_needed(table_version* t, long hashid) { + table_version* next = t->next.load(); + if (next != nullptr) { + long num_blocks = t->size/t->block_size; + long block_num = hashid & (num_blocks -1); + long start = block_num * t->block_size; + status st = t->block_status[block_num]; + status old = Empty; + if (st == Done) return; + + // if data is uninitialized, need to initialize + // if (st == Uninit || st == Initializing) { + // status x = Uninit; + // if (t->block_status[block_num].compare_exchange_strong(x, Working)) { + // for (int i = start; i < start + t->block_size; i++) + // initialize(t->buckets[i]); + // t->block_status[block_num] = Empty; + // } else { + // while (t->block_status[block_num] == Initializing) + // for (volatile int i=0; i < 100; i++); + // } + // } + + // This is effectively a try lock on the block_num. + // It blocks other updates on the buckets associated with the block. + else if (st == Empty && + t->block_status[block_num].compare_exchange_strong(old, Working)) { + + // initialize block_status for next grow round + for (int i = 0; i < grow_factor; i++) + next->block_status[grow_factor*block_num + i] = Empty; + + // copy block_size buckets + for (int i = start; i < start + t->block_size; i++) { + copy_bucket(t, next, i); + } + t->block_status[block_num] = Done; + + // If all blocks have been copied then can set current table + // to next. Note: this atomic fetch-and-add can be a + // bottleneck and is the reason the block sizes are reasonably + // large (e.g. 256). A smarter combining tree could be used + // if smaller block sizes are needed. + if (++next->finished_block_count == num_blocks) { + //std::cout << "expand done" << std::endl; + current_table_version = next; + } + } else { + // If another thread is working on the block, wait until Done + while (t->block_status[block_num] == Working) { + for (volatile int i=0; i < 100; i++); + } + } + } + } + + // ********************************************* + // Construction and Destruction + // ********************************************* + + // Clear bucket, assuming it is not forwarded. + void clear_bucket(bckt* b) { + auto [s, tag] = b->ll(); + if (!s.is_forwarded() && b->sc(tag, state())) { + for (int j=0; j < std::min(s.buffer_cnt(), buffer_size); j++) { + entries_->retire_entry(s.buffer[j]); + } + retire_list_all(s.overflow_list()); + } + } + + // Clears bucket or if the bucket is forwarded (during copying) + // then clear the forwarded buckets. + void clear_bucket_rec(table_version* t, long i) { + bckt* b = &(t->buckets[i].v); + state head = b->load(); + if (!head.is_forwarded()) + clear_bucket(b); + else { + table_version* next = t->next.load(); + for (int j = 0; j < grow_factor; j++) + clear_bucket_rec(next, grow_factor * i + j); + } + } + + void clear_buckets() { + table_version* ht = current_table_version.load(); + // clear buckets from current and future versions + parallel_for(ht->size, [&] (size_t i) { + clear_bucket_rec(ht, i);}); + } + + // Clear all memory. + // Reinitialize to table of size 1 if specified, and by default. + void clear(bool reinitialize = true) { + clear_buckets(); + + // now reclaim the arrays + table_version* tv = initial_table_version; + while (tv != nullptr) { + table_version* tv_next = tv->next; + delete tv; + tv = tv_next; + } + // reinitialize + if (reinitialize) { + current_table_version = new table_version(1); + initial_table_version = current_table_version; + } + } + + Entries* entries_; + + // Creates initial table version for the given size. The + // clear_at_end allows to free up the epoch-based collector's + // memory, and the scheduler. + parlay_hash(long n, Entries* entries, bool clear_at_end = default_clear_at_end) + : entries_(entries), + clear_memory_and_scheduler_at_end(clear_at_end), + sched_ref(clear_at_end ? + new parlay::scheduler_type(std::thread::hardware_concurrency()) : + nullptr), + link_pool(clear_at_end ? + new epoch::memory_pool() : + &epoch::get_default_pool()), + current_table_version(new table_version(n)), + initial_table_version(current_table_version.load()) + { } + + ~parlay_hash() { + clear(false); + if (clear_memory_and_scheduler_at_end) { + delete sched_ref; + delete link_pool; + } + } + + // ********************************************* + // Operations + // ********************************************* + + // Updates b, s, tag, and idx to the correct bucket, state, tag and + // index if the the state s is forwarded. Is called recursively, + // but unlikely to go more than one level, and when not growing will + // return immediately. + void check_bucket_and_state(table_version* t, const K& k, + big_atomic*& b, state& s, tag_type& tag, long& idx) { + if (s.is_forwarded()) { + table_version* nxt = t->next.load(); + idx = nxt->get_index(k); + b = &(nxt->buckets[idx].v); + std::tie(s, tag) = b->ll(); + check_bucket_and_state(nxt, k, b, s, tag, idx); + } + } + + // find in the bucket, or if forwarded (during copying) then follow + // through to the next table, possibly reapeatedly, although + // unlikely. + template + auto find_in_bucket_rec(table_version* t, bckt* s, const K& k, const F& f) + -> std::optional::type> + { + state x = s->load(); + //if bucket is forwarded, go to next version + if (x.is_forwarded()) { + table_version* nxt = t->next.load(); + return find_in_bucket_rec(nxt, nxt->get_bucket(k), k, f); + } + return find_in_state(x, k, f); + } + + // Finds the entry with the key + // Returns an optional which is empty if the key is not in the table, + // and contains f(e) otherwise, where e is the entry matching the key + // NOTE: this is the most important function to opmitize for performance + // Hence one hand inline and one prefetch (not used anywhere else in code). + template + auto Find(const K& k, const F& f) + -> std::optional::type> + { + table_version* ht = current_table_version.load(); + long idx = ht->get_index(k); + bckt* b = &(ht->buckets[idx].v); + // if entries are direct, then safe to scan the buffer without epoch protection + if constexpr (Entry::Direct) { + auto [s, tag] = b->ll(); + if (s.is_forwarded()) + check_bucket_and_state(ht, k, b, s, tag, idx); + for (long i = 0; i < std::min(s.buffer_cnt(), buffer_size); i++) + if (s.buffer[i].equal(k)) + return std::optional(f(s.buffer[i])); + // if not found and not overfull, then done + if (s.buffer_cnt() <= buffer_size) return std::nullopt; + // otherwise need to search overflow, which requires protection + return epoch::with_epoch([&, tag=tag, &s = s] { + // if state has not changed, then just search list + if (b->lv(tag)) return find_in_list(s.overflow_list(), k, f).first; + return find_in_bucket_rec(ht, b, k, f); + }); + } else { // if using indirection always use protection + __builtin_prefetch(b); // allows read to be pipelined with epoch announcement + return epoch::with_epoch([&] () -> std::optional::type> { + return find_in_bucket_rec(ht, b, k, f);}); + + + } + } + + // Inserts at key, and does nothing if key already in the table. + // The constr function construct the entry to be inserted if needed. + // Returns an optional, which is empty if sucessfully inserted or + // contains f(e) if not, where e is the entry matching the key. + template + auto Insert(const K& key, const Constr& constr, const F& f) + -> std::optional::type> + { + using rtype = std::optional::type>; + return epoch::with_epoch([&] () -> rtype { + auto [e, flag] = insert_(key, constr); + if (flag) return {}; + return rtype(f(e));}); + } + + template + auto insert_(const K& key, const Constr& constr) -> std::pair { + table_version* ht = current_table_version.load(); + long idx = ht->get_index(key); + auto b = &(ht->buckets[idx].v); + int delay = 200; + while (true) { + auto [s, tag] = b->ll(); + copy_if_needed(ht, idx); + check_bucket_and_state(ht, key, b, s, tag, idx); + long len = s.buffer_cnt(); + // if found in buffer then done + for (long i = 0; i < std::min(len, buffer_size); i++) + if (s.buffer[i].equal(key)) return std::pair(s.buffer[i], false); + if (len < buffer_size) { // buffer has space, insert to end of buffer + Entry new_e = constr(); + if (b->sc(tag, state(s, new_e))) return std::pair(new_e, true); + entries_->retire_entry(new_e); // if failed need to ty again + } else if (len == buffer_size) { // buffer full, insert new link + Entry new_e = constr(); + link* new_head = new_link(new_e, nullptr); + if (b->sc(tag, state(s, new_head))) + return std::pair(new_e, true); + entries_->retire_entry(new_head->entry); // if failed need to try again + retire_link(new_head); + } else { // buffer overfull, need to check if in list + auto [x, list_len] = find_in_list(s.overflow_list(), key, identity); + if (list_len + buffer_size > ht->overflow_size) expand_table(ht); + if (x.has_value()) return std::pair(*x, false); // if in list, then done + Entry new_e = constr(); + link* new_head = new_link(new_e, s.overflow_list()); + if (b->sc(tag, state(s, new_head))) // try to add to head of list + return std::pair(new_e, true); + entries_->retire_entry(new_head->entry); // if failed need to ty again + retire_link(new_head); + } + // delay before trying again, only marginally helps + for (volatile int i=0; i < delay; i++); + delay = std::min(2*delay, 5000); // 1000-10000 are about equally good + } + } + + template + auto Upsert(const K& key, const Constr& constr, G& g) + -> std::optional::type> + { + using rtype = std::optional::type>; + table_version* ht = current_table_version.load(); + long idx = ht->get_index(key); + auto b = &(ht->buckets[idx].v); + return epoch::with_epoch([&] () -> rtype { + int delay = 200; + while (true) { + auto [s, tag] = b->ll(); + state out_s = s; + copy_if_needed(ht, idx); + check_bucket_and_state(ht, key, b, s, tag, idx); + long len = s.buffer_cnt(); + bool cont = false; + for (long i = 0; i < std::min(len, buffer_size); i++) { + if (s.buffer[i].equal(key)) { + Entry new_e = constr(std::optional(s.buffer[i])); + out_s.buffer[i] = new_e; + if (b->sc(tag, out_s)) return g(s.buffer[i]); + else { + entries_->retire_entry(new_e); + cont = true; + break; + } + } + } + if (cont) continue; + if (len < buffer_size) { // buffer has space, insert to end of buffer + Entry new_e = constr(std::optional()); + if (b->sc(tag, state(s, new_e))) return std::nullopt; + entries_->retire_entry(new_e); // if failed need to ty again + } else if (len == buffer_size) { // buffer just full, insert new link + link* new_head = new_link(constr(std::optional()), nullptr); + if (b->sc(tag, state(s, new_head))) + return std::nullopt; + entries_->retire_entry(new_head->entry); // if failed need to try again + retire_link(new_head); + } else { // buffer overfull, need to check if in list + link* old_head = s.overflow_list(); + auto [list_len, new_head, updated] = update_list(old_head, key, constr); + if (new_head != nullptr) { + if (b->sc(tag, state(s, new_head))) {// try to add to head of list + rtype r = std::optional(g(updated->entry)); + retire_list_n(old_head, list_len); // retire old list + return r; + } else retire_list_n(new_head, list_len); + } else { + if (list_len + buffer_size > ht->overflow_size) expand_table(ht); + new_head = new_link(constr(std::optional()), old_head); + if (b->sc(tag, state(s, new_head))) // try to add to head of list + return std::nullopt; + entries_->retire_entry(new_head->entry); // if failed need to ty again + retire_link(new_head); + } + } + // delay before trying again, only marginally helps + for (volatile int i=0; i < delay; i++); + delay = std::min(2*delay, 5000); // 1000-10000 are about equally good + } + }); + } + + // Removes entry with given key + // Returns an optional which is empty if the key is not in the table, + // and contains f(e) otherwise, where e is the entry that is removed. + template + auto Remove(const K& key, const F& f) + -> std::optional::type> + { + using rtype = std::optional::type>; + table_version* ht = current_table_version.load(); + long idx = ht->get_index(key); + auto b = &(ht->buckets[idx].v); + // if entries are direct safe to scan the buffer without epoch protection + if constexpr (Entry::Direct) { + auto [s, tag] = b->ll(); + copy_if_needed(ht, idx); + check_bucket_and_state(ht, key, b, s, tag, idx); + if (s.buffer_cnt() <= buffer_size) { + int i = find_in_buffer(s, key); + if (i == -1) return std::nullopt; + if (b->sc(tag, state(s, i))) { + rtype r = f(s.buffer[i]); + entries_->retire_entry(s.buffer[i]); + return r; + } // if sc failed, will need to try again + } + } + // if buffer overfull, or indirect, then need to protect + return epoch::with_epoch([&] () -> rtype { + int delay = 200; + while (true) { + auto [s, tag] = b->ll(); + copy_if_needed(ht, idx); + check_bucket_and_state(ht, key, b, s, tag, idx); + int i = find_in_buffer(s, key); + if (i >= 0) { // found in buffer + if (s.buffer_cnt() > buffer_size) { // need to backfill from list + link* l = s.overflow_list(); + if (b->sc(tag, state(s, l, i))) { + rtype r = f(s.buffer[i]); + entries_->retire_entry(s.buffer[i]); + retire_link(l); + return r; + } // if sc failed, will need to try again + } else { // buffer not overfull, can backfill within buffer + if (b->sc(tag, state(s, i))) { + rtype r = f(s.buffer[i]); + entries_->retire_entry(s.buffer[i]); + return r; + } // if sc failed, will need to try again + } + } else { // not found in buffer + if (s.buffer_cnt() <= buffer_size) // if not overful, then done + return std::nullopt; + auto [cnt, new_list, removed] = remove_from_list(s.overflow_list(), key); + if (cnt == 0) // if not found in list then done + return std::nullopt; + // if found, try to update with the new list that has the element removed + if (b->sc(tag, state(s, new_list))) { + rtype r = f(removed->entry); + entries_->retire_entry(removed->entry); + retire_list_n(s.overflow_list(), cnt); // retire old list + return r; + } // if sc failed, will need to try again + retire_list_n(new_list, cnt - 1); // failed, retire new list + } + for (volatile int i=0; i < delay; i++); + delay = std::min(2*delay, 5000); // 1000-10000 are about equally good + } + }); + } + + // Size of bucket, or if forwarded, then sum sizes of all forwarded + // buckets, recursively. + long bucket_size_rec(table_version* t, long i) { + state head = t->buckets[i].v.load(); + if (!head.is_forwarded()) + return head.size(); + else { + long sum = 0; + table_version* next = t->next.load(); + for (int j = 0; j < grow_factor; j++) + sum += bucket_size_rec(next, grow_factor * i + j); + return sum; + } + } + + long size() { + table_version* ht = current_table_version.load(); + return epoch::with_epoch([&] { + return parlay::tabulate_reduce(ht->size, [&] (size_t i) { + return bucket_size_rec(ht, i);});}); + } + + template + void static for_each_bucket_rec(table_version* t, long i, const F& f) { + state s = t->buckets[i].v.load(); + if (!s.is_forwarded()) + for_each_in_state(s, f); + else { + table_version* next = t->next.load(); + for (int j = 0; j < grow_factor; j++) + for_each_bucket_rec(next, grow_factor * i + j, f); + } + } + + // Apply function f to all entries of the table. Works while updates are going on, and guarantees that: + // any element whose delete linearizes before the invocation will not be included + // any element whose insert linearizes after the response will not be included + // any element that is present from invocation to response will be included + // Elements that are inserted or deleted between the invocation and response might or might not appear. + // template + // parlay::sequence entries(const F& f) { + // table_version* ht = current_table_version.load(); + // return epoch::with_epoch([&] { + // auto s = parlay::tabulate(ht->size, [&] (size_t i) { + // parlay::sequence r; + // for_each_in_bucket_rec(ht, i, [&] (const Entry& entry) { + // r.push_back(f(entry));}); + // return r;}); + // return flatten(s);}); + // } + + // Applies f to all elments in table. + // Same pseudo-linearizable guarantee as entries and size. + template + void for_each(const F& f) { + table_version* ht = current_table_version.load(); + return epoch::with_epoch([&] { + parallel_for(ht->size, [&] (long i) { + for_each_bucket_rec(ht, i, f);});}); + } + + // ********************************************* + // Iterator + // ********************************************* + + struct Iterator { + public: + using value_type = typename Entries::Data; + using iterator_category = std::forward_iterator_tag; + using pointer = value_type*; + using reference = value_type&; + using difference_type = long; + + private: + std::vector entries; + Entry entry; + table_version* t; + int i; + long bucket_num; + bool single; + bool end; + void get_next_bucket() { + auto g = [&] (const Entry& e) {entries.push_back(e);}; + while (entries.size() == 0 && ++bucket_num < t->size) + for_each_bucket_rec(t, bucket_num, g); + if (bucket_num == t->size) end = true; + } + + public: + Iterator(bool end) : i(0), bucket_num(-2l), single(false), end(true) {} + Iterator(table_version* t) : t(t), + i(0), bucket_num(-1l), single(false), end(false) { + get_next_bucket(); + } + Iterator(Entry entry) : entry(entry), single(true), end(false) {} + Iterator& operator++() { + if (single) end = true; + else if (++i == entries.size()) { + i = 0; + entries.clear(); + get_next_bucket(); + } + return *this; + } + Iterator& operator++(int) { + Iterator tmp = *this; + if (single) end = true; + else if (++i == entries.size()) { + i = 0; + entries.clear(); + get_next_bucket(); + } + return tmp; + } + template = 0> + const value_type operator*() { + if (single) return entry.get_entry(); + return entries[i].get_entry();} + + template = 0> + const value_type& operator*() { + if (single) return entry.get_entry(); + return entries[i].get_entry();} + + bool operator!=(const Iterator& iterator) { + return !(end ? iterator.end : (bucket_num == iterator.bucket_num && + i == iterator.i)); + } + bool operator==(const Iterator& iterator) { + return !(*this != iterator);} + }; + + Iterator begin() { return Iterator(current_table_version.load());} + Iterator end() { return Iterator(true);} + + static constexpr auto identity = [] (const Entry& entry) {return entry;}; + static constexpr auto true_f = [] (const Entry& entry) {return true;}; + + + template + std::pair insert(const K& key, const Constr& constr) { + return epoch::with_epoch([&] { + auto [e,flag] = insert_(key, constr); + return std::pair(Iterator(e), flag);}); + } + + Iterator erase(Iterator pos) { + Remove(*pos.first, true_f); + return Iterator(true); + } + + size_t erase(const K& key) { + return Remove(key, true_f).has_value(); + } + + Iterator find(const K& k) { + auto r = Find(k, identity); + if (!r.has_value()) return Iterator(true); + auto x = Iterator(*r); + return x; + } + +}; + + static constexpr bool default_clear_at_end = true; + + // conditionally rehash if type Hash::avalanching is not defined + template + struct rehash { + size_t operator()(size_t h) { + size_t x = h * UINT64_C(0xbf58476d1ce4e5b9); // linear transform + return (x ^ (x >> 31)); // non-linear transform + }}; + + template + struct rehash { + size_t operator()(size_t i) {return i;}}; + + // Definition where entries of the hash table are stored indirectly + // through a pointer. This means the entries themselves will never + // move, but requires a level of indirection when accessing them. + // Tags the high-bits of pointers with part of the hash function so + // one can avoid the indirection if the tags do not match. + // Currently used for all types that are not trivially copyable. + template + struct IndirectEntries { + using DataS = EntryData; + using Data = typename DataS::value_type; + using Hash = typename DataS::Hash; + using KeyEqual = typename DataS::KeyEqual; + + struct Entry { + using K = typename DataS::K; + using Key = std::pair; + static constexpr bool Direct = false; + Data* ptr; + static Data* tag_ptr(size_t hashv, Data* data) { + return (Data*) (((hashv >> 48) << 48) | ((size_t) data)); + } + Data* get_ptr() const { + return (Data*) (((size_t) ptr) & ((1ul << 48) - 1)); } + static unsigned long hash(const Key& k) { + return k.second;} + bool equal(const Key& k) const { + return (((k.second >> 48) == (((size_t) ptr) >> 48)) && + KeyEqual{}(DataS::get_key(*get_ptr()), *k.first)); } + Key get_key() const { return make_key(DataS::get_key(*get_ptr()));} + Data& get_entry() const { return *get_ptr();} + static Key make_key(const K& key) { + return Key(&key, rehash{}(Hash{}(key)));} + Entry(Key k, Data* data) : ptr(tag_ptr(hash(k), data)) {} + Entry() {} + }; + + bool clear_at_end; + using Key = typename Entry::Key; + + // a memory pool for the entries + epoch::memory_pool* data_pool; + + IndirectEntries(bool clear_at_end=false) + : clear_at_end(clear_at_end), + data_pool(clear_at_end ? + new epoch::memory_pool() : + &epoch::get_default_pool()) {} + ~IndirectEntries() { + if (clear_at_end) { delete data_pool;} + } + + // allocates memory for the entry + Entry make_entry(const Key& k, const Data& data) { + return Entry(k, data_pool->New(data)); } + + // retires the memory for the entry + void retire_entry(Entry& e) { + data_pool->Retire(e.get_ptr()); } + }; + + // Definition where entries of the hash table are stored directly. + // This means the entries might be moved during updates, including + // insersions, removals, and resizing. Currently used for trivially + // copyable types. + template + struct DirectEntries { + using DataS = EntryData; + using Data = typename DataS::value_type; + using Hash = typename DataS::Hash; + using KeyEqual = typename DataS::KeyEqual; + using K = typename DataS::K; + + struct Entry { + using K = typename DataS::K; + using Key = K; + static const bool Direct = true; + Data data; + static unsigned long hash(const Key& k) { + return rehash{}(Hash{}(k));} + bool equal(const Key& k) const { return KeyEqual{}(get_key(), k); } + static Key make_key(const K& k) {return k;} + const K& get_key() const {return DataS::get_key(data);} + const Data& get_entry() const { return data;} + Entry(const Data& data) : data(data) {} + Entry() {} + }; + + DirectEntries(bool clear_at_end=false) {} + Entry make_entry(const K& k, const Data& data) { + return Entry(data); } + + // retiring is a noop since no memory has been allocated for entries + void retire_entry(Entry& e) {} + }; + + // template + // struct DirectEntriesX { + // using DataS = EntryData; + // using Data = typename DataS::value_type; + // using Hash = typename DataS::Hash; + // using KeyEqual = typename DataS::KeyEqual; + // using K = typename DataS::K; + + // struct Entry { + // using K = typename DataS::K; + // using Key = K; + // static const bool Direct = true; + // std::array data; + // static unsigned long hash(const Key& k) { + // return rehash{}(Hash{}(k));} + // bool equal(const Key& k) const { return KeyEqual{}(get_key(), k); } + // static Key make_key(const K& k) {return k;} + // const K& get_key() const { return DataS::get_key(*((Data*) &data));} + // const Data& get_entry() const { return *((Data*) &data);} + // Entry(const Data& d) { new (&data) Data(d); } + // Entry() {} + // }; + + // bool clear_at_end; + + // // a memory pool for the entries + // epoch::retire_pool* data_pool; + + // DirectEntriesX(bool clear_at_end=false) + // : clear_at_end(clear_at_end), + // data_pool(clear_at_end ? + // new epoch::retire_pool() : + // &epoch::get_default_retire_pool()) + // {} + // ~DirectEntriesX() { + // if (clear_at_end) { delete data_pool;} + // } + + // // allocates memory for the entry + // Entry make_entry(const K& k, const Data& data) { + // return Entry(data);} + + // // retires the memory for the entry + // void retire_entry(Entry& e) { + // data_pool->Retire((Data*) &(e.data)); + // } + // }; + + +} // namespace parlay +#endif // PARLAY_HASH_H_ diff --git a/include/parlay_hash/unordered_map.h b/include/parlay_hash/unordered_map.h new file mode 100644 index 0000000..af6c1d3 --- /dev/null +++ b/include/parlay_hash/unordered_map.h @@ -0,0 +1,170 @@ +// Initial Author: Guy Blelloch +// Developed as part of the flock library +// +// A growable unordered_map using a hash table designed for scalability to large number of threads, and +// for high contention. On a key type K and value type V it supports: +// +// unordered_map, Equal=std::equal_to>(n) : +// constructor for table of initial size n +// +// Find(const K&) -> std::optional : +// returns value if key is found, and otherwise returns nullopt +// +// Insert(const K&, const V&) -> std::optional : +// if key not in the table it inserts the key with the given value +// and returns nullopt, otherwise it does not modify the table and +// returns the old value. +// +// Remove(const K&) -> std::optional : +// if key is in the table it removes the entry and returns its value. +// otherwise it does nothing and returns nullopt. +// +// size() -> long : returns the size of the table. Not linearizable with +// the other functions, and takes time proportional to the table size. +// +// clear() -> void : clears the table so its size is 0. +// +// for_each(F f) : applies functor f to each entry of the table. +// f should be of type (const std::pair&) -> void + +#ifndef PARLAY_UNORDERED_MAP_ +#define PARLAY_UNORDERED_MAP_ + +#include +#include +#include "parlay_hash.h" + +namespace parlay { + + // entries contain a key + template , class KeyEqual_ = std::equal_to> + struct MapData { + using K = K_; + using V = V_; + using Hash = Hash_; + using KeyEqual = KeyEqual_; + using value_type = std::pair; + static const K& get_key(const value_type& x) { return x.first;} + }; + + // Generic unordered_map that can be used with direct or indirect + // entries depending on the template argument. + template + struct unordered_map_internal { + using map = parlay_hash; + + Entries entries_; + map m; + + using Entry = typename Entries::Entry; + using K = typename Entries::DataS::K; + using V = typename Entries::DataS::V; + using key_type = K; + using mapped_type = V; + using value_type = std::pair; + using iterator = typename map::Iterator; + + static constexpr auto true_f = [] (const Entry& kv) {return true;}; + static constexpr auto identity = [] (const Entry& kv) {return kv;}; + static constexpr auto get_value = [] (const value_type& kv) {return kv.second;}; + + unordered_map_internal(long n, bool clear_at_end = default_clear_at_end) + : entries_(Entries(clear_at_end)), + m(map(n, &entries_, clear_at_end)) {} + + iterator begin() { return m.begin();} + iterator end() { return m.end();} + bool empty() { return size() == 0;} + bool max_size() { return (1ul << 47)/sizeof(Entry);} + void clear() { m.clear_buckets();} + long size() { return m.size();} + + template + //auto entries(const F& f = identity) { return m.entries(f);} + long count(const K& k) { return (contains(k)) ? 1 : 0; } + bool contains(const K& k) { return find(k, true_f).has_value();} + + template + auto Find(const K& k, const F& f = get_value) + // -> std::optional::type> + { + auto g = [&] (const Entry& e) {return f(e.get_entry());}; + return m.Find(Entry::make_key(k), g); + } + + auto Insert(const K& key, const V& value) -> std::optional + { + auto k = Entry::make_key(key); + auto g = [&] (const Entry& e) {return get_value(e.get_entry());}; + return m.Insert(k, [&] {return entries_.make_entry(k, value_type(key, value));}, g); + } + + template + auto Upsert(const K& key, const F& f) -> std::optional + { + auto k = Entry::make_key(key); + auto g = [&] (const Entry& e) {return get_value(e.get_entry());}; + auto constr = [&] (const std::optional& e) -> Entry { + if (e.has_value()) + return entries_.make_entry(k, value_type(key, f(std::optional(get_value((*e).get_entry()))))); + return entries_.make_entry(k, value_type(key, f(std::optional()))); + }; + return m.Upsert(k, constr, g); + } + + template + auto Insert(const K& key, const V& value, const F& f) + // -> std::optional::type> + { + auto k = Entry::make_key(key); + auto g = [&] (const Entry& e) {return f(e.get_entry());}; + return m.Insert(k, [&] {return entries_.make_entry(k, value_type(key, value));}, g); + } + + auto Remove(const K& k) -> std::optional + { + auto g = [&] (const Entry& e) {return get_value(e.get_entry());}; + return m.Remove(Entry::make_key(k), g); + } + + template + auto Remove(const K& k, const F& f) + // -> std::optional::type> + { + auto g = [&] (const Entry& e) {return f(e.get_entry());}; + return m.Remove(Entry::make_key(k), g); + } + + iterator find(const K& k) { return m.find(k); } + + std::pair insert(const value_type& entry) { + auto k = Entry::make_key(entry.first); + return m.insert(k, [&] {return entries_.make_entry(k, entry);});} + + iterator erase(iterator pos) { return m.erase(pos); } + size_t erase(const K& k) { return m.erase(k); } + + }; + + // Entries are stored directly in the bucket, avoiding a cache miss + // for indirection. Entries can be moved by updates even on + // different keys. + template , class KeyEqual = std::equal_to> + using parlay_unordered_map_direct = unordered_map_internal>>; + + // Entries are stored indirectly through a pointer. Pointers to + // entries wil remain valid until the entry is upserted or deleted + // (an upsert can be though of as a deletion followed by an + // insersion). + template , class KeyEqual = std::equal_to> + using parlay_unordered_map_indirect = unordered_map_internal>>; + + // specialization of unordered_map to use either direct or indirect + // entries depending on whether K and V are trivially copyable. + template , class KeyEqual = std::equal_to> + using parlay_unordered_map = std::conditional_t && + std::is_trivially_copyable_v, + parlay_unordered_map_direct, + parlay_unordered_map_indirect>; +} // namespace parlay +#endif // PARLAY_BIGATOMIC_HASH_LIST diff --git a/include/parlay_hash/unordered_set.h b/include/parlay_hash/unordered_set.h new file mode 100644 index 0000000..fbad87a --- /dev/null +++ b/include/parlay_hash/unordered_set.h @@ -0,0 +1,87 @@ +#ifndef PARLAY_UNORDERED_SET_ +#define PARLAY_UNORDERED_SET_ + +#include +#include +#include "parlay_hash.h" +#include + +namespace parlay { + + // entries just contain a key + template , class KeyEqual_ = std::equal_to> + struct SetData { + using K = K_; + using Hash = Hash_; + using KeyEqual = KeyEqual_; + using value_type = K; + static const K& get_key(const value_type& x) { return x;} + }; + + // Generic unordered_set that can be used with direct or indirect + // entries depending on the template argument. + template + struct unordered_set_internal { + using set = parlay_hash; + + Entries entries_; + set m; + + using Entry = typename Entries::Entry; + using K = typename Entries::DataS::K; + using key_type = K; + using value_type = K; + using iterator = typename set::Iterator; + + static constexpr auto true_f = [] (const Entry& kv) {return true;}; + static constexpr auto identity = [] (const Entry& kv) {return kv;}; + + unordered_set_internal(long n, bool clear_at_end = default_clear_at_end) + : entries_(Entries(clear_at_end)), + m(set(n, &entries_, clear_at_end)) {} + + iterator begin() { return m.begin();} + iterator end() { return m.end();} + bool empty() { return size() == 0;} + bool max_size() { return (1ul << 47)/sizeof(Entry);} + void clear() { m.clear_buckets();} + long size() { return m.size();} + + template + auto entries(const F& f = identity) { return m.entries(f);} + long count(const K& k) { return (contains(k)) ? 1 : 0; } + bool contains(const K& k) { return find(k, true_f).has_value();} + + bool Find(const K& k) { return m.Find(Entry::make_key(k), true_f).has_value(); } + bool Insert(const K& key) + { + auto k = Entry::make_key(key); + return !m.Insert(k, [&] {return entries_.make_entry(k, key);}, true_f).has_value(); + } + + bool Remove(const K& k) + { return m.Remove(Entry::make_key(k), true_f).has_value(); } + + iterator find(const K& k) { return m.find(k); } + + std::pair insert(const value_type& entry) { + return m.insert(entries_.make_entry(make_key(entry.first), entry)); } + + iterator erase(iterator pos) { return m.erase(pos); } + size_t erase(const K& k) { return m.erase(k); } + + }; + + template , class KeyEqual = std::equal_to> + using parlay_unordered_set_direct = unordered_set_internal>>; + + template , class KeyEqual = std::equal_to> + using parlay_unordered_set_indirect = unordered_set_internal>>; + + template , class KeyEqual = std::equal_to> + using parlay_unordered_set = std::conditional_t, + parlay_unordered_set_direct, + parlay_unordered_set_indirect>; +} // namespace parlay +#endif // PARLAY_BIGATOMIC_HASH_LIST + diff --git a/include/sketch_interfacing.h b/include/sketch_interfacing.h new file mode 100644 index 0000000..746c948 --- /dev/null +++ b/include/sketch_interfacing.h @@ -0,0 +1,8 @@ +#pragma once +#include "sketch.h" +#include "sketch/sketch_columns.h" +#include "sketch/sketch_concept.h" + + +// using DefaultSketchColumn = FixedSizeSketchColumn; +using DefaultSketchColumn = ResizeableSketchColumn; diff --git a/include/sketchless_euler_tour_tree.h b/include/sketchless_euler_tour_tree.h index 7bc0f61..ce742b2 100644 --- a/include/sketchless_euler_tour_tree.h +++ b/include/sketchless_euler_tour_tree.h @@ -6,6 +6,9 @@ #include #include "types.h" + +#include + class SketchlessEulerTourNode { std::unordered_map edges; @@ -39,12 +42,19 @@ class SketchlessEulerTourNode { friend std::ostream& operator<<(std::ostream& os, const SketchlessEulerTourNode& ett); }; + +template < +// typename Container = std::vector> +typename Container = absl::flat_hash_map> class SketchlessEulerTourTree { - long seed = 0; + // TODO - packing order fixes + size_t seed = 0; + uint32_t tier_num = 0; public: - std::vector ett_nodes; + node_id_t max_num_nodes; + Container ett_nodes; - SketchlessEulerTourTree(node_id_t num_nodes, uint32_t tier_num, int seed); + SketchlessEulerTourTree(node_id_t max_num_nodes, uint32_t tier_num, size_t seed); void link(node_id_t u, node_id_t v); void cut(node_id_t u, node_id_t v); @@ -52,4 +62,51 @@ class SketchlessEulerTourTree { SketchlessSkipListNode* get_root(node_id_t u); bool is_connected(node_id_t u, node_id_t v); std::vector> cc_query(); + + SketchlessEulerTourNode& ett_node(node_id_t u) { + if constexpr (std::is_same_v>) { + assert(u < ett_nodes.size()); + return ett_nodes[u]; + } else { + // if (ett_nodes.find(u) == ett_nodes.end()) { + // std::cout << "ruh oh" << std::endl; + // } + assert(ett_nodes.find(u) != ett_nodes.end()); + return *ett_nodes[u]; + } + } + + void initialize_node(node_id_t u) { + // no-op with vector implementation + if constexpr (!std::is_same_v>) { + ett_nodes[u] = new SketchlessEulerTourNode(this->seed, u, this->tier_num); + } + }; + void uninitialize_node(node_id_t u) { + // no-op with vector implementation + if constexpr (!std::is_same_v>) { + assert(ett_nodes.find(u) != ett_nodes.end()); + delete ett_nodes[u]; + } + }; + + void initialize_all_nodes() { + for (node_id_t i = 0; i < max_num_nodes; ++i) { + initialize_node(i); + } + }; + void initialize_all_nodes(node_id_t until) { + assert(until <= max_num_nodes); + for (node_id_t i = 0; i < until; ++i) { + initialize_node(i); + } + } + bool is_initialized(node_id_t u) { + // no-op with vector implementation + if constexpr (std::is_same_v>) { + return true; + } else { + return ett_nodes.find(u) != ett_nodes.end(); + } + }; }; diff --git a/include/skiplist.h b/include/skiplist.h index deafa22..81ec679 100644 --- a/include/skiplist.h +++ b/include/skiplist.h @@ -2,82 +2,353 @@ #include #include "sketch.h" +#include "sketch/sketch_columns.h" +#include "sketch_interfacing.h" +#include +#include + +// using ColumnEntryDeltas = parlay::sequence::const_view_type; +using ColumnEntryDeltas = parlay::sequence::view_type; + + + +#ifndef SKETCH_BUFFER_SIZE + #define SKETCH_BUFFER_SIZE 25 +#endif + +enum AggUpdateState { + NORMAL = 0, + // needs to be updated (normal cas logic) + NEEDS_UPDATE = 1, + // this one was updated but its parent needs to be FULLY updated + // since we changed in some non-trackable way (ie we atomically updated) + PARENT_IS_STALE = 2, + // this is applied to nodes where we don't need to reapply the aggregation + LEAVE_ALONE = 3 +}; + +template requires(SketchColumnConcept) class EulerTourNode; -constexpr int skiplist_buffer_cap = 25; extern long skiplist_seed; extern double height_factor; extern vec_t sketch_len; extern vec_t sketch_err; +template requires(SketchColumnConcept) class SkipListNode { + friend class EulerTourNode; - SkipListNode* left = nullptr; - SkipListNode* right = nullptr; - SkipListNode* up = nullptr; - SkipListNode* down = nullptr; + SkipListNode* left = nullptr; + SkipListNode* right = nullptr; + SkipListNode* up = nullptr; + SkipListNode* down = nullptr; // Store the first node to the left on the next level up - SkipListNode* parent = nullptr; + SkipListNode* parent = nullptr; - vec_t update_buffer[skiplist_buffer_cap]; int buffer_size = 0; int buffer_capacity; + vec_t update_buffer[SKETCH_BUFFER_SIZE]; + int8_t needs_update = AggUpdateState::NORMAL; public: - Sketch* sketch_agg = nullptr; + EulerTourNode* node; + SketchClass sketch_agg; uint32_t size = 1; + - EulerTourNode* node; - - SkipListNode(EulerTourNode* node, long seed, bool has_sketch); + SkipListNode(EulerTourNode* node, long seed, bool has_sketch); ~SkipListNode(); - static SkipListNode* init_element(EulerTourNode* node, bool is_allowed_caller); + static SkipListNode* init_element(EulerTourNode* node, bool is_allowed_caller); void uninit_element(bool delete_bdry); void uninit_list(); // Returns the closest node on the next level up at or left of the current - SkipListNode* get_parent(); + SkipListNode* get_parent() const; // Returns the top left root node of the skiplist - SkipListNode* get_root(); + SkipListNode* get_root() const; // Returns the bottom left boundary node of the skiplist - SkipListNode* get_first(); + SkipListNode* get_first() const; // Returns the bottom right node of the skiplist - SkipListNode* get_last(); + SkipListNode* get_last() const; // Return the aggregate size at the root of the list uint32_t get_list_size(); // Return the aggregate sketch at the root of the list - Sketch* get_list_aggregate(); + const SketchClass& get_list_aggregate(); // Update all the aggregate sketches with the input vector from the current node to its root - SkipListNode* update_path_agg(vec_t update_idx); + SkipListNode* update_path_agg(vec_t update_idx); + // // same, but atomically + SkipListNode* update_path_agg_atomic(vec_t update_idx); + // SkipListNode* update_path_agg_atomic(vec_t update_idx) // Add the given sketch to all aggregate sketches from the current node to its root - SkipListNode* update_path_agg(Sketch* sketch); + SkipListNode* update_path_agg(const SketchClass &sketch); + SkipListNode* update_path_agg(SketchClass &sketch); + + SkipListNode* update_path_agg(const ColumnEntryDelta &delta); + SkipListNode* update_path_agg(const ColumnEntryDeltas &deltas); + + SkipListNode* update_path_agg_atomic(const ColumnEntryDelta &delta); + SkipListNode* update_path_agg_atomic(const ColumnEntryDeltas &deltas); // Update just this node's aggregate sketch void update_agg(vec_t update_idx); + // Same but atomically + void update_agg_atomic(vec_t update_idx); + //Just apply the delta + void update_agg_entry_delta(const ColumnEntryDelta& delta) { + if (!this->sketch_agg.is_initialized()) // Only do something if this node has a sketch + return; + this->sketch_agg.apply_entry_delta(delta); + } + + void update_agg_entry_deltas(const ColumnEntryDeltas &deltas) { + if (!this->sketch_agg.is_initialized()) // Only do something if this node has a sketch + return; + size_t sz = deltas.size(); + for (const auto& delta : deltas) + this->sketch_agg.apply_entry_delta(delta); + } + // and the atomic versions: + void update_agg_atomic_entry_delta(const ColumnEntryDelta &delta) { + if (!this->sketch_agg.is_initialized()) // Only do something if this node has a sketch + return; + this->sketch_agg.atomic_apply_entry_delta(delta); + } + void update_agg_atomic_entry_deltas(const ColumnEntryDeltas &deltas) { + if (!this->sketch_agg.is_initialized()) // Only do something if this node has a sketch + return; + size_t sz = deltas.size(); + for (const auto& delta : deltas) + this->sketch_agg.atomic_apply_entry_delta(delta); + } // Apply all the sketch updates currently in the update buffer void process_updates(); + + bool _needs_full_recompute() { + if (this->down == nullptr) + return false; + SkipListNode* current = this->down; + do { + if (current->needs_update == AggUpdateState::PARENT_IS_STALE) { + return true; + } + current = current->right; + } while (current != nullptr && current != this->down && current->up == nullptr); + return false; + } + + void _do_full_prefetch() { + if (this->down == nullptr) + return; + SkipListNode* current = this->down; + do { + if (current->sketch_agg.is_initialized()) { + this->sketch_agg.prefetch(); + } + current = current->right; + } while (current != nullptr && current != this->down && current->up == nullptr); + } + + void _subtract_stale_children() { + // subtract the agg for any sketches that need to be updated. + if (this->down == nullptr) + return; + SkipListNode* current = this->down; + do { + assert(current->needs_update != AggUpdateState::PARENT_IS_STALE); + if (current->needs_update == AggUpdateState::NEEDS_UPDATE) { + if (current->sketch_agg.is_initialized()) { + this->sketch_agg.merge(current->sketch_agg); + } + } + else { + current->needs_update = AggUpdateState::LEAVE_ALONE; + } + current = current->right; + } while (current != nullptr && current != this->down && current->up == nullptr); + } + + void _do_full_reagg() { + if (this->down == nullptr) + return; + this->sketch_agg.clear(); + SkipListNode* current = this->down; + do { + if (current->sketch_agg.is_initialized()) { + this->sketch_agg.merge(current->sketch_agg); + } + current->needs_update = AggUpdateState::NORMAL; + current = current->right; + } while (current != nullptr && current != this->down && current->up == nullptr); + } + + void _full_recompute_aggs_topdown(int fork_levels) { + if (!this->sketch_agg.is_initialized()) + return; + if (this->down == nullptr) + return; + SkipListNode* current = this->down; + this->sketch_agg.clear(); + if (fork_levels > 0) { + tbb::task_group tg; + do { + if (current->needs_update == AggUpdateState::NEEDS_UPDATE) { + tg.run([current, fork_levels]() { + current->recompute_aggs_topdown(fork_levels-1); + }); + } + current = current->right; + } while (current != nullptr && current != this->down && current->up == nullptr); + tg.wait(); + // _do_full_prefetch(); + _do_full_reagg(); + } + else { + do { + if (current->needs_update == AggUpdateState::NEEDS_UPDATE) { + current->recompute_aggs_topdown(fork_levels - 1); + } + } while (current != nullptr && current != this->down && current->up == nullptr); + // _do_full_prefetch(); + _do_full_reagg(); + } + this->needs_update = false; + } + + void _recursive_recompute_children(int fork_levels) { + if (this->down == nullptr) + return; + SkipListNode* current = this->down; + if (fork_levels > 0) { + tbb::task_group tg; + do { + if (current->needs_update == AggUpdateState::NEEDS_UPDATE) { + tg.run([current, fork_levels]() { + current->recompute_aggs_topdown(fork_levels-1); + }); + } + current = current->right; + } while (current != nullptr && current != this->down && current->up == nullptr); + tg.wait(); + } + else { + if (current->needs_update == AggUpdateState::NEEDS_UPDATE) { + do { + current->recompute_aggs_topdown(fork_levels - 1); + current = current->right; + } while (current != nullptr && current != this->down && current->up == nullptr); + } + } + } + + // recompute your aggregate from your children. + void recompute_aggs_topdown(int fork_levels) { + assert(this != nullptr); + if (!this->sketch_agg.is_initialized()) + return; + // do not recompute for bottom level nodes + if (this->down == nullptr) + return; + // _full_recompute_aggs_topdown(fork_levels); + if (_needs_full_recompute()) { + // prefetch all the children + _full_recompute_aggs_topdown(fork_levels); + } + else { + _subtract_stale_children(); + _recursive_recompute_children(fork_levels); + SkipListNode* current = this->down; + do { + if (current->needs_update == AggUpdateState::LEAVE_ALONE) { + // do nothing + } + else { + if (current->sketch_agg.is_initialized()) { + this->sketch_agg.merge(current->sketch_agg); + } + } + current->needs_update = AggUpdateState::NORMAL; + current = current->right; + } while (current != nullptr && current != this->down && current->up == nullptr); + } + this->needs_update = AggUpdateState::NORMAL; + } + size_t compute_space_usage() { + size_t total = sizeof(SkipListNode); + if (this->sketch_agg.is_initialized()) + total += sketch_agg.space_usage_bytes(); + if (this->down != nullptr) { + SkipListNode* current = this->down; + do { + total += current->compute_space_usage(); + current = current->right; + } while (current != nullptr && current != this->down && current->up == nullptr); + } + return total; + } + + // we have to barrier on all of these finishing + SkipListNode* find_root_with_cas() { + SkipListNode* current = this; + while (current->parent != nullptr) { + current = current->parent; + std::atomic_ref atomic_needs_update(current->needs_update); + int8_t expected = static_cast(AggUpdateState::NORMAL); + bool cas_succeed = atomic_needs_update.compare_exchange_strong( + expected, + static_cast(AggUpdateState::NEEDS_UPDATE), + std::memory_order_seq_cst + ); + // __sync_bool_compare_and_swap( + // (bool *)¤t->needs_update, + // false, + // true + // ); + if (!cas_succeed) { + // someone else already set needs_update to true, so we can stop + return nullptr; + } + } + // TODO - dont make this hard-coded + return current; + } + + void clear_cas_flags() { + assert(this != nullptr); + this->needs_update = AggUpdateState::NORMAL; + SkipListNode* current = this->down; + do { + if (current->needs_update == AggUpdateState::NEEDS_UPDATE) { + current->clear_cas_flags(); + } + current = current->right; + } while (current != nullptr && current != this->down && current->up == nullptr); + } - std::set get_component(); + std::set*> get_component(); // Returns the root of a new skiplist formed by joining the lists containing left and right - static SkipListNode* join(SkipListNode* left, SkipListNode* right); - template - static SkipListNode* join(SkipListNode* head, T*... tail); + static SkipListNode* join(SkipListNode* left, SkipListNode* right); + + template requires((std::is_same_v*, Tail> && ...)) + static SkipListNode* join(SkipListNode* head, Tail... tail) { + return join(head, join(tail...)); + }; // Returns the root of the left list after splitting to the left of the given node - static SkipListNode* split_left(SkipListNode* node); + static SkipListNode* split_left(SkipListNode* node); // Returns the root of the right list after splitting to the right of the given node - static SkipListNode* split_right(SkipListNode* node); + static SkipListNode* split_right(SkipListNode* node); bool isvalid(); - SkipListNode* next(); + SkipListNode* next(); int print_list(); }; -template -SkipListNode* SkipListNode::join(SkipListNode* head, T*... tail) { - return join(head, join(tail...)); -} +// template requires((std::is_same_v*, Tail> && ...)) +// SkipListNode* SkipListNode::join(SkipListNode* head, Tail... tail) { +// return join(head, join(tail...)); +// } diff --git a/include/ufo_tree/types.h b/include/ufo_tree/types.h new file mode 100644 index 0000000..2eb1e0c --- /dev/null +++ b/include/ufo_tree/types.h @@ -0,0 +1,59 @@ +#pragma once +#include +#include + + +namespace ufo { + +typedef uint32_t vertex_t; +static vertex_t NONE = -1; + +struct empty_t { +}; +static empty_t empty; + +typedef uint64_t edge_t; + +enum UpdateType { + INSERT, + DELETE +}; + +struct Edge { +public: + vertex_t src; + vertex_t dst; + + bool operator==(const Edge& other) const { + return src == other.src && dst == other.dst; + } +}; + +struct Update { + UpdateType type; + Edge edge; +}; + +struct UpdateBatch { + UpdateType type; + parlay::sequence> edges; +}; + +struct UpdateBatchWithWeights{ + UpdateType type; + parlay::sequence> insert_edges; + parlay::sequence> delete_edges; +}; + +enum QueryType { + CONNECTIVITY, + PATH, + SUBTREE +}; + +struct Query { + vertex_t u; + vertex_t v; +}; + +} diff --git a/include/ufo_tree/ufo_cluster.h b/include/ufo_tree/ufo_cluster.h new file mode 100644 index 0000000..6a8ee93 --- /dev/null +++ b/include/ufo_tree/ufo_cluster.h @@ -0,0 +1,229 @@ +#pragma once +#include "ufo_tree/types.h" +#include "ufo_tree/util.h" +#include +#include + +/* These constants determines the maximum size of array of nieghbors and +the vector of neighbors for each UFOCluster. Any additional neighbors will +be stored in the hash set for efficiency. Minimum value is 3 for queries +function correctly. */ +#define UFO_ARRAY_MAX 3 + +// #define COLLECT_ROOT_CLUSTER_STATS +#ifdef COLLECT_ROOT_CLUSTER_STATS + static std::map root_clusters_histogram; +#endif + + +namespace ufo { + +template +class UFOCluster { +using Cluster = UFOCluster; +using NeighborSet = absl::flat_hash_map; +public: + // Query fields, note that the [[no_unique_address]] fields must be declared first + [[no_unique_address]] e_t edge_value1; + [[no_unique_address]] e_t edge_value2; + [[no_unique_address]] e_t edge_value3; + [[no_unique_address]] v_t value; + // Parent pointer + Cluster* parent = nullptr; + /* We tag the last neighbor pointer in the array with information about the degree of the cluster. + If it is 1, 2, or 3, that is the degree of the cluster. If it is 4, then the cluster has degree 4 + or higher and the last neighbor pointer is actually a pointer to the NeighborsSet object containing + the remaining neighbors of the cluster. */ + Cluster* neighbors[UFO_ARRAY_MAX]; + int degree = 0; + int fanout = 0; + // Constructors + UFOCluster() : parent(), neighbors(), degree(), fanout(), edge_value1(), edge_value2(), edge_value3(), value() {}; + UFOCluster(v_t val) : parent(), neighbors(), degree(), fanout(), edge_value1(), edge_value2(), edge_value3(), value(val) {}; + // Helper functions + Cluster* get_root(); + bool contracts(); + int get_degree(); + bool has_neighbor_set(); + NeighborSet* get_neighbor_set(); + bool parent_high_fanout(); + bool contains_neighbor(Cluster* c); + void insert_neighbor(Cluster* c); + void insert_neighbor_with_value(Cluster* c, e_t value); + void remove_neighbor(Cluster* c); + void set_edge_value(int index, e_t value); + e_t get_edge_value(int index); + size_t calculate_size(); +}; + +template +UFOCluster* UFOCluster::get_root() { + Cluster* curr = this; + while (curr->parent) curr = curr->parent; + return curr; +} + +template +bool UFOCluster::contracts() { + assert(get_degree() <= UFO_ARRAY_MAX); + for (auto neighborp : neighbors) { + auto neighbor = UNTAG(neighborp); + if (neighbor && neighbor->parent == parent) return true; + } + return false; +} + +template +int UFOCluster::get_degree() { + int tag = GET_TAG(neighbors[UFO_ARRAY_MAX-1]); + if (tag <= 3) [[likely]] return tag; + return 2 + get_neighbor_set()->size(); +} + +template +bool UFOCluster::has_neighbor_set() { + int tag = GET_TAG(neighbors[UFO_ARRAY_MAX-1]); + if (tag <= 3) [[likely]] return false; + return true; +} + +template +absl::flat_hash_map*,e_t>* UFOCluster::get_neighbor_set() { + return (NeighborSet*) UNTAG(neighbors[UFO_ARRAY_MAX-1]); +} + +template +bool UFOCluster::parent_high_fanout() { + assert(parent); + int parent_degree = parent->get_degree(); + if (get_degree() == 1) { + auto neighbor = neighbors[0]; + if (neighbor->parent == parent) + if (neighbor->get_degree() - parent_degree > 2) return true; + } else { + if (get_degree() - parent_degree > 2) return true; + } + return false; +} + +template +bool UFOCluster::contains_neighbor(Cluster* c) { + for (auto neighbor : neighbors) if (UNTAG(neighbor) == c) return true; + if (has_neighbor_set() && get_neighbor_set()->find(c) != get_neighbor_set()->end()) return true; + return false; +} + +template +void UFOCluster::insert_neighbor(Cluster* c) { + assert(!contains_neighbor(c)); + // degree++; + for (int i = 0; i < UFO_ARRAY_MAX; ++i) { + if (UNTAG(neighbors[i]) == nullptr) [[likely]] { + int deg = GET_TAG(neighbors[UFO_ARRAY_MAX-1]); + neighbors[i] = c; + neighbors[UFO_ARRAY_MAX-1] = TAG(UNTAG(neighbors[UFO_ARRAY_MAX-1]), deg+1); + return; + } + } + if (!has_neighbor_set()) { + auto neighbor_set = new NeighborSet(); + std::pair insert_pair; + insert_pair.first = UNTAG(neighbors[UFO_ARRAY_MAX-1]); + neighbor_set->insert(insert_pair); + neighbors[UFO_ARRAY_MAX-1] = TAG(neighbor_set, 4); + } + std::pair insert_pair; + insert_pair.first = c; + get_neighbor_set()->insert(insert_pair); +} + +template +void UFOCluster::insert_neighbor_with_value(Cluster* c, e_t value) { + if constexpr (!std::is_same::value) { + assert(!contains_neighbor(c)); + // degree++; + for (int i = 0; i < UFO_ARRAY_MAX; ++i) { + if (UNTAG(neighbors[i]) == nullptr) [[likely]] { + int deg = GET_TAG(neighbors[UFO_ARRAY_MAX-1]); + neighbors[i] = c; + set_edge_value(i, value); + neighbors[UFO_ARRAY_MAX-1] = TAG(UNTAG(neighbors[UFO_ARRAY_MAX-1]), deg+1); + return; + } + } + if (!has_neighbor_set()) { + auto neighbor_set = new NeighborSet(); + neighbor_set->insert({UNTAG(neighbors[UFO_ARRAY_MAX-1]), get_edge_value(UFO_ARRAY_MAX-1)}); + neighbors[UFO_ARRAY_MAX-1] = TAG(neighbor_set, 4); + } + get_neighbor_set()->insert({c,value}); + } +} + +template +void UFOCluster::remove_neighbor(Cluster* c) { + assert(contains_neighbor(c)); + // degree--; + for (int i = 0; i < UFO_ARRAY_MAX; ++i) { + if (UNTAG(neighbors[i]) == c) { + neighbors[i] = TAG(nullptr, GET_TAG(neighbors[i])); + if (has_neighbor_set()) [[unlikely]] { // Put an element from the set into the array + auto neighbor_set = get_neighbor_set(); + auto replacement = *neighbor_set->begin(); + neighbors[i] = replacement.first; + if constexpr (!std::is_same::value) + set_edge_value(i, replacement.second); + neighbor_set->erase(replacement.first); + if (neighbor_set->size() == 1) { + auto temp = *neighbor_set->begin(); + delete neighbor_set; + neighbors[UFO_ARRAY_MAX-1] = TAG(temp.first, 3); + if constexpr (!std::is_same::value) + set_edge_value(UFO_ARRAY_MAX-1, temp.second); + } + } else [[likely]] { + for (int j = UFO_ARRAY_MAX-1; j > i; --j) { + if (UNTAG(neighbors[j])) [[unlikely]] { + neighbors[i] = UNTAG(neighbors[j]); + neighbors[j] = TAG(nullptr, GET_TAG(neighbors[j])); + if constexpr (!std::is_same::value) + set_edge_value(i, get_edge_value(j)); + break; + } + } + neighbors[UFO_ARRAY_MAX-1] = TAG(UNTAG(neighbors[UFO_ARRAY_MAX-1]), GET_TAG(neighbors[UFO_ARRAY_MAX-1])-1); + } + return; + } + } + auto neighbor_set = get_neighbor_set(); + neighbor_set->erase(c); + if (neighbor_set->size() == 1) { + auto temp = *neighbor_set->begin(); + delete neighbor_set; + neighbors[UFO_ARRAY_MAX-1] = TAG(temp.first, 3); + if constexpr (!std::is_same::value) + set_edge_value(UFO_ARRAY_MAX-1, temp.second); + } +} + +template +void UFOCluster::set_edge_value(int index, e_t value) { + e_t* address = &edge_value1 + index; + *address = value; +} + +template +e_t UFOCluster::get_edge_value(int index) { + e_t* address = &edge_value1 + index; + return *address; +} + +template +size_t UFOCluster::calculate_size() { + size_t memory = sizeof(UFOCluster); + if (has_neighbor_set()) memory += get_neighbor_set()->bucket_count() * sizeof(std::pair); + return memory; +} + +} diff --git a/include/ufo_tree/ufo_tree.h b/include/ufo_tree/ufo_tree.h new file mode 100644 index 0000000..0aa6acf --- /dev/null +++ b/include/ufo_tree/ufo_tree.h @@ -0,0 +1,806 @@ +#pragma once +#include "ufo_tree/types.h" +#include "ufo_tree/util.h" +#include "ufo_tree/ufo_cluster.h" +#include +#include + + +namespace ufo { + +template +class UFOTree { +using Cluster = UFOCluster; +public: + // UFO tree interface + UFOTree( + vertex_t n, QueryType q = CONNECTIVITY, + std::function f_v = [](v_t x, v_t y) -> v_t {return x;}, + std::function f_e = [](e_t x, e_t y) -> e_t {return x;}); + UFOTree( + vertex_t n, QueryType q, + std::function f_v, std::function f_e, + v_t id_v, e_t id_e, v_t dval_v, e_t dval_e); + UFOTree(int n, QueryType q, std::function f, v_t id, v_t d_val); + ~UFOTree(); + void link(vertex_t u, vertex_t v); + void link(vertex_t u, vertex_t v, e_t value); + void cut(vertex_t u, vertex_t v); + bool connected(vertex_t u, vertex_t v); + e_t path_query(vertex_t u, vertex_t v); + // Testing helpers + size_t space(); + size_t count_nodes(); + size_t get_height(); + bool is_valid(); + void print_tree(); +private: + // Class data and parameters + std::vector leaves; + std::vector> root_clusters; + int max_level; + std::vector> lower_deg[2]; // lower_deg helps to identify clusters who became low degree during a deletion update + QueryType query_type; + std::function f_v; + v_t identity_v; + v_t default_v; + std::function f_e; + e_t identity_e; + e_t default_e; + // We preallocate UFO clusters and store unused clusters in free_clusters + std::vector free_clusters; + Cluster* allocate_cluster(); + void free_cluster(Cluster* c); + // Helper functions + void remove_ancestors(Cluster* c, int start_level = 0); + void recluster_tree(); + bool is_high_degree_or_high_fanout(Cluster* cluster, Cluster* child, int level); + void disconnect_siblings(Cluster* c, int level); + void insert_adjacency(Cluster* u, Cluster* v); + void insert_adjacency(Cluster* u, Cluster* v, e_t value); + void remove_adjacency(Cluster* u, Cluster* v); +}; + +template +UFOTree::UFOTree(vertex_t n, QueryType q, + std::function f_v, std::function f_e) + : query_type(q), f_v(f_v), f_e(f_e) { + leaves.resize(n); + root_clusters.resize(max_tree_height(n)); + for (int i = 0; i < n; ++i) + free_clusters.push_back(new Cluster()); +} + +template +UFOTree::UFOTree(vertex_t n, QueryType q, + std::function f_v, std::function f_e, + v_t id_v, e_t id_e, v_t dval_v, e_t dval_e) + : query_type(q), f_v(f_v), f_e(f_e), identity_v(id_v), identity_e(id_e), + default_v(dval_v), default_e(dval_e) { + leaves.resize(n, default_v); + root_clusters.resize(max_tree_height(n)); + for (int i = 0; i < n; ++i) + free_clusters.push_back(new Cluster()); +} + +template +UFOTree::UFOTree(int n, QueryType q, + std::function f, v_t id, v_t d_val) + : query_type(q), f_v(f), identity_v(id), default_v(d_val) { + if constexpr (std::is_same::value) { + f_e = f; + identity_e = id; + default_e = d_val; + } + leaves.resize(n, default_v); + root_clusters.resize(max_tree_height(n)); + for (int i = 0; i < n; ++i) + free_clusters.push_back(new Cluster()); +} + +template +UFOTree::~UFOTree() { + // Clear all memory + std::unordered_set clusters; + for (auto leaf : leaves) { + auto curr = leaf.parent; + while (curr) { + clusters.insert(curr); + curr = curr->parent; + } + } + for (auto cluster : clusters) delete cluster; + for (auto cluster : free_clusters) delete cluster; + #ifdef COLLECT_ROOT_CLUSTER_STATS + std::cout << "Number of root clusters: Frequency" << std::endl; + for (auto entry : root_clusters_histogram) + std::cout << entry.first << "\t" << entry.second << std::endl; + #endif +} + +template +UFOCluster* UFOTree::allocate_cluster() { + if (!free_clusters.empty()) { + auto c = free_clusters.back(); + free_clusters.pop_back(); + return c; + } + return new Cluster(); +} + +template +void UFOTree::free_cluster(UFOCluster* c) { + c->parent = nullptr; + if (c->has_neighbor_set()) [[unlikely]] delete c->get_neighbor_set(); + for (int i = 0; i < UFO_ARRAY_MAX; ++i) + c->neighbors[i] = nullptr; + c->degree = 0; + c->fanout = 0; + free_clusters.push_back(c); +} + +template +size_t UFOTree::space() { + std::unordered_set visited; + size_t memory = sizeof(UFOTree); + for (auto cluster : leaves) { + memory += cluster.calculate_size(); + auto parent = cluster.parent; + while (parent != nullptr && visited.count(parent) == 0) { + memory += parent->calculate_size(); + visited.insert(parent); + parent = parent->parent; + } + } + return memory; +} + +template +size_t UFOTree::count_nodes() { + std::unordered_set visited; + size_t node_count = 0; + for(auto cluster : leaves){ + node_count += 1; + auto parent = cluster.parent; + while(parent != nullptr && visited.count(parent) == 0){ + node_count += 1; + visited.insert(parent); + parent = parent->parent; + } + } + return node_count; +} + +template +size_t UFOTree::get_height() { + size_t max_height = 0; + for (vertex_t v = 0; v < leaves.size(); ++v) { + size_t height = 0; + Cluster* curr = &leaves[v]; + while (curr) { + height++; + curr = curr->parent; + } + max_height = std::max(max_height, height); + } + return max_height; +} + +/* Link vertex u and vertex v in the tree. Optionally include an +augmented value for the new edge (u,v). If no augmented value is +provided, the default value is 1. */ +template +void UFOTree::link(vertex_t u, vertex_t v) { + assert(u >= 0 && u < leaves.size() && v >= 0 && v < leaves.size()); + assert(u != v && !connected(u,v)); + max_level = 0; + remove_ancestors(&leaves[u]); + remove_ancestors(&leaves[v]); + insert_adjacency(&leaves[u], &leaves[v]); + recluster_tree(); +} +template +void UFOTree::link(vertex_t u, vertex_t v, e_t value) { + assert(u >= 0 && u < leaves.size() && v >= 0 && v < leaves.size()); + assert(u != v && !connected(u,v)); + max_level = 0; + remove_ancestors(&leaves[u]); + remove_ancestors(&leaves[v]); + insert_adjacency(&leaves[u], &leaves[v], value); + recluster_tree(); +} + +/* Cut vertex u and vertex v in the tree. */ +template +void UFOTree::cut(vertex_t u, vertex_t v) { + assert(u >= 0 && u < leaves.size() && v >= 0 && v < leaves.size()); + assert(leaves[u].contains_neighbor(&leaves[v])); + max_level = 0; + auto curr_u = &leaves[u]; + auto curr_v = &leaves[v]; + while (curr_u != curr_v) { + lower_deg[0].push_back({curr_u, curr_u->get_degree()-1}); + lower_deg[1].push_back({curr_v, curr_v->get_degree()-1}); + curr_u->degree = curr_u->get_degree()-1; + curr_v->degree = curr_v->get_degree()-1; + curr_u = curr_u->parent; + curr_v = curr_v->parent; + } + remove_ancestors(&leaves[u]); + remove_ancestors(&leaves[v]); + for (auto cluster: lower_deg[0]) cluster.first->degree = 0; + for (auto cluster: lower_deg[1]) cluster.first->degree = 0; + lower_deg[0].clear(); + lower_deg[1].clear(); + remove_adjacency(&leaves[u], &leaves[v]); + recluster_tree(); +} + +/* Removes the ancestors of cluster c that are not high degree nor +high fan-out and add them to root_clusters. */ +template +void UFOTree::remove_ancestors(Cluster* c, int start_level) { + int level = start_level; // level is always the level of cluster prev, 0 being the leaves + auto prev = c; + auto curr = c->parent; + bool del = false; + while (curr) { + // Different cases for if curr will or will not be deleted later + if (!is_high_degree_or_high_fanout(curr, prev, level)) [[likely]] { // We will delete curr next round + disconnect_siblings(prev, level); + if (del) [[likely]] { // Possibly delete prev + assert(prev->get_degree() <= UFO_ARRAY_MAX); + for (auto neighborp : prev->neighbors) { + auto neighbor = UNTAG(neighborp); + if (neighbor) neighbor->remove_neighbor(prev); // Remove prev from adjacency + } + auto position = std::find(root_clusters[level].begin(), root_clusters[level].end(), prev); + if (position != root_clusters[level].end()) root_clusters[level].erase(position); + free_cluster(prev); + } else [[unlikely]] { + prev->parent = nullptr; + curr->fanout--; + root_clusters[level].push_back(prev); + } + del = true; + } else [[unlikely]] { // We will not delete curr next round + if (del) [[likely]] { // Possibly delete prev + assert(prev->get_degree() <= UFO_ARRAY_MAX); + for (auto neighborp : prev->neighbors) { + auto neighbor = UNTAG(neighborp); + if (neighbor) neighbor->remove_neighbor(prev); // Remove prev from adjacency + } + auto position = std::find(root_clusters[level].begin(), root_clusters[level].end(), prev); + if (position != root_clusters[level].end()) root_clusters[level].erase(position); + free_cluster(prev); + curr->fanout--; + } else [[unlikely]] if (prev->get_degree() <= 1) { + prev->parent = nullptr; + curr->fanout--; + root_clusters[level].push_back(prev); + } + del = false; + } + // Update pointers + prev = curr; + curr = prev->parent; + level++; + } + // DO LAST DELETIONS + if (del) [[likely]] { // Possibly delete prev + assert(prev->get_degree() <= UFO_ARRAY_MAX); + for (auto neighborp : prev->neighbors) { + auto neighbor = UNTAG(neighborp); + if (neighbor) neighbor->remove_neighbor(prev); // Remove prev from adjacency + } + auto position = std::find(root_clusters[level].begin(), root_clusters[level].end(), prev); + if (position != root_clusters[level].end()) root_clusters[level].erase(position); + free_cluster(prev); + } else [[unlikely]] root_clusters[level].push_back(prev); + if (level > max_level) max_level = level; +} + +template +void UFOTree::recluster_tree() { + for (int level = 0; level <= max_level; level++) { + if (root_clusters[level].empty()) [[unlikely]] continue; + // Update root cluster stats if we are collecting them + #ifdef COLLECT_ROOT_CLUSTER_STATS + if (root_clusters_histogram.find(root_clusters[level].size()) == root_clusters_histogram.end()) + root_clusters_histogram[root_clusters[level].size()] = 1; + else + root_clusters_histogram[root_clusters[level].size()] += 1; + #endif + // Merge deg 3-5 root clusters with all of its deg 1 neighbors + for (auto cluster : root_clusters[level]) { + if (!cluster->parent && cluster->get_degree() > 2) [[unlikely]] { + assert(cluster->get_degree() <= 5); + auto parent = allocate_cluster(); + if constexpr (!std::is_same::value) { + parent->value = identity_v; + } + parent->fanout = 1; + cluster->parent = parent; + root_clusters[level+1].push_back(parent); + assert(UFO_ARRAY_MAX >= 3); + if (!cluster->has_neighbor_set()) [[likely]] { + for (int i = 0; i < UFO_ARRAY_MAX; ++i) { + auto neighbor = UNTAG(cluster->neighbors[i]); + if (neighbor->get_degree() == 1) [[unlikely]] { + auto curr = neighbor->parent; + int lev = level+1; + while (curr) { + auto temp = curr; + curr = curr->parent; + auto position = std::find(root_clusters[lev].begin(), root_clusters[lev].end(), temp); + if (position != root_clusters[lev].end()) root_clusters[lev].erase(position); + free_cluster(temp); + lev++; + } + neighbor->parent = cluster->parent; + parent->fanout++; + } else if (neighbor->parent) { // Populate new parent's neighbors + if constexpr (std::is_same::value) { + parent->insert_neighbor(neighbor->parent); + neighbor->parent->insert_neighbor(parent); + } else { + parent->insert_neighbor_with_value(neighbor->parent, cluster->get_edge_value(i)); + neighbor->parent->insert_neighbor_with_value(parent, cluster->get_edge_value(i)); + } + } + } + } else [[unlikely]] { + for (int i = 0; i < UFO_ARRAY_MAX-1; ++i) { + auto neighbor = cluster->neighbors[i]; + if (neighbor->get_degree() == 1) [[unlikely]] { + auto curr = neighbor->parent; + int lev = level+1; + while (curr) { + auto temp = curr; + curr = curr->parent; + auto position = std::find(root_clusters[lev].begin(), root_clusters[lev].end(), temp); + if (position != root_clusters[lev].end()) root_clusters[lev].erase(position); + free_cluster(temp); + lev++; + } + neighbor->parent = cluster->parent; + parent->fanout++; + } else if (neighbor->parent) { // Populate new parent's neighbors + if constexpr (std::is_same::value) { + parent->insert_neighbor(neighbor->parent); + neighbor->parent->insert_neighbor(parent); + } else { + parent->insert_neighbor_with_value(neighbor->parent, cluster->get_edge_value(i)); + neighbor->parent->insert_neighbor_with_value(parent, cluster->get_edge_value(i)); + } + } + } + for (auto neighbor_pair : *cluster->get_neighbor_set()) { + auto neighbor = neighbor_pair.first; + if (neighbor->get_degree() == 1) [[unlikely]] { + auto curr = neighbor->parent; + int lev = level+1; + while (curr) { + auto temp = curr; + curr = curr->parent; + auto position = std::find(root_clusters[lev].begin(), root_clusters[lev].end(), temp); + if (position != root_clusters[lev].end()) root_clusters[lev].erase(position); + free_cluster(temp); + lev++; + } + neighbor->parent = cluster->parent; + parent->fanout++; + } else if (neighbor->parent) { // Populate new parent's neighbors + if constexpr (std::is_same::value) { + parent->insert_neighbor(neighbor->parent); + neighbor->parent->insert_neighbor(parent); + } else { + parent->insert_neighbor_with_value(neighbor->parent, neighbor_pair.second); + neighbor->parent->insert_neighbor_with_value(parent, neighbor_pair.second); + } + } + } + } + } + } + // This loop handles all deg 2 and 1 root clusters + for (auto cluster : root_clusters[level]) { + // Combine deg 2 root clusters with deg 2 root clusters + if (!cluster->parent && cluster->get_degree() == 2) [[unlikely]] { + assert(UFO_ARRAY_MAX >= 2); + for (int i = 0; i < 2; ++i) { + auto neighbor = cluster->neighbors[i]; + if (!neighbor->parent && (neighbor->get_degree() == 2)) [[unlikely]] { + auto parent = allocate_cluster(); + cluster->parent = parent; + neighbor->parent = parent; + parent->fanout = 2; + if constexpr (!std::is_same::value) { // Path query + parent->value = f_e(cluster->value, f_e(neighbor->value, cluster->get_edge_value(i))); + } + root_clusters[level+1].push_back(parent); + for (int i = 0; i < 2; ++i) { // Populate new parent's neighbors + if (cluster->neighbors[i]->parent && cluster->neighbors[i]->parent != parent) { + if constexpr (std::is_same::value) { + parent->insert_neighbor(cluster->neighbors[i]->parent); + cluster->neighbors[i]->parent->insert_neighbor(parent); + } else { + parent->insert_neighbor_with_value(cluster->neighbors[i]->parent, cluster->get_edge_value(i)); + cluster->neighbors[i]->parent->insert_neighbor_with_value(parent, cluster->get_edge_value(i)); + } + } + if (neighbor->neighbors[i]->parent && neighbor->neighbors[i]->parent != parent) { + if constexpr (std::is_same::value) { + parent->insert_neighbor(neighbor->neighbors[i]->parent); + neighbor->neighbors[i]->parent->insert_neighbor(parent); + } else { + parent->insert_neighbor_with_value(neighbor->neighbors[i]->parent, neighbor->get_edge_value(i)); + neighbor->neighbors[i]->parent->insert_neighbor_with_value(parent, neighbor->get_edge_value(i)); + } + } + } + break; + } + } + // Combine deg 2 root clusters with deg 1 or 2 non-root clusters + if (!cluster->parent) [[unlikely]] { + assert(UFO_ARRAY_MAX >= 2); + for (int i = 0; i < 2; ++i) { + auto neighbor = cluster->neighbors[i]; + if (neighbor->parent && (neighbor->get_degree() == 1 || neighbor->get_degree() == 2)) [[unlikely]] { + if (neighbor->contracts()) continue; + cluster->parent = neighbor->parent; + neighbor->parent->fanout++; + if constexpr (!std::is_same::value) { // Path query + cluster->parent->value = f_e(cluster->value, f_e(neighbor->value, cluster->get_edge_value(i))); + } + remove_ancestors(cluster->parent, level+1); // Recursive remove ancestor call + auto other_neighbor = cluster->neighbors[!i]; // Popoulate neighbors + // if (other_neighbor->parent && (long) other_neighbor->parent->parent != 1) { + if (other_neighbor->parent) { + if constexpr (std::is_same::value) { + insert_adjacency(cluster->parent, other_neighbor->parent); + } else { + insert_adjacency(cluster->parent, other_neighbor->parent, cluster->get_edge_value(!i)); + } + } + break; + } + } + } + // Always combine deg 1 root clusters with its neighboring cluster + } else if (!cluster->parent && cluster->get_degree() == 1) [[unlikely]] { + auto neighbor = cluster->neighbors[0]; + if (neighbor->parent) { + if (neighbor->get_degree() == 2 && neighbor->contracts()) continue; + cluster->parent = neighbor->parent; + neighbor->parent->fanout++; + remove_ancestors(cluster->parent, level+1); + } else { + auto parent = allocate_cluster(); + cluster->parent = parent; + neighbor->parent = parent; + parent->fanout = 2; + if constexpr (!std::is_same::value) { // Path query + parent->value = identity_v; + } + for (int i = 0; i < 2; ++i) { // Populate new parent's neighbors + if (neighbor->neighbors[i] && neighbor->neighbors[i] != cluster && neighbor->neighbors[i]->parent) { + if constexpr (std::is_same::value) { + parent->insert_neighbor(neighbor->neighbors[i]->parent); + neighbor->neighbors[i]->parent->insert_neighbor(parent); + } else { + parent->insert_neighbor_with_value(neighbor->neighbors[i]->parent, neighbor->get_edge_value(i)); + neighbor->neighbors[i]->parent->insert_neighbor_with_value(parent, neighbor->get_edge_value(i)); + } + } + } + root_clusters[level+1].push_back(parent); + } + } + } + // Add remaining uncombined clusters to the next level + for (auto cluster : root_clusters[level]) { + if (!cluster->parent && cluster->get_degree() > 0) [[unlikely]] { + auto parent = allocate_cluster(); + cluster->parent = parent; + parent->fanout = 1; + if constexpr (!std::is_same::value) { // Path query + parent->value = cluster->value; + } + for (int i = 0; i < 2; ++i) { // Populate new parent's neighbors + if (cluster->neighbors[i] && cluster->neighbors[i]->parent) { + if constexpr (std::is_same::value) { + parent->insert_neighbor(cluster->neighbors[i]->parent); + cluster->neighbors[i]->parent->insert_neighbor(parent); + } else { + parent->insert_neighbor_with_value(cluster->neighbors[i]->parent, cluster->get_edge_value(i)); + cluster->neighbors[i]->parent->insert_neighbor_with_value(parent, cluster->get_edge_value(i)); + } + } + } + root_clusters[level+1].push_back(parent); + } + } + // Clear the contents of this level + root_clusters[level].clear(); + if (level == max_level && !root_clusters[max_level+1].empty()) max_level++; + } +} + +template +bool UFOTree::is_high_degree_or_high_fanout(Cluster* cluster, Cluster* child, int level) { + int cluster_degree = cluster->degree > 0 ? cluster->degree : cluster->get_degree(); + if (cluster_degree > 2) [[unlikely]] return true; + if (!child->neighbors[1] && cluster->fanout > 2) [[unlikely]] return true; + int child_degree = child->degree > 0 ? child->degree : child->get_degree(); + if (child_degree - cluster_degree > 2) [[unlikely]] return true; + return false; +} + +/* Helper function which takes a cluster c and the level of that cluster. The function +should find every cluster that shares a parent with c, disconnect it from their parent +and add it as a root cluster to be processed. */ +template +void UFOTree::disconnect_siblings(Cluster* c, int level) { + if (c->get_degree() == 1) { + auto center = c->neighbors[0]; + if (center->parent && c->parent != center->parent) return; + assert(center->get_degree() <= 5); + if (!center->has_neighbor_set()) [[likely]] { + for (auto neighborp : center->neighbors) { + Cluster* neighbor = UNTAG(neighborp); + if (neighbor && neighbor->parent == c->parent && neighbor != c) { + neighbor->parent = nullptr; // Set sibling parent pointer to null + root_clusters[level].push_back(neighbor); // Keep track of root clusters + } + } + } else [[unlikely]] { + for (int i = 0; i < UFO_ARRAY_MAX-1; ++i) { + Cluster* neighbor = center->neighbors[i]; + if (neighbor->parent == c->parent && neighbor != c) { + neighbor->parent = nullptr; // Set sibling parent pointer to null + root_clusters[level].push_back(neighbor); // Keep track of root clusters + } + } + for (auto neighbor_pair : *center->get_neighbor_set()) { + Cluster* neighbor = neighbor_pair.first; + if (neighbor && neighbor->parent == c->parent && neighbor != c) { + neighbor->parent = nullptr; // Set sibling parent pointer to null + root_clusters[level].push_back(neighbor); // Keep track of root clusters + } + } + } + center->parent = nullptr; + root_clusters[level].push_back(center); + } else { + assert(c->get_degree() <= 5); + if (!c->has_neighbor_set()) [[likely]] { + for (auto neighborp : c->neighbors) { + Cluster* neighbor = UNTAG(neighborp); + if (neighbor && neighbor->parent == c->parent) { + neighbor->parent = nullptr; // Set sibling parent pointer to null + root_clusters[level].push_back(neighbor); // Keep track of root clusters + } + } + } else [[unlikely]] { + for (int i = 0; i < UFO_ARRAY_MAX-1; ++i) { + Cluster* neighbor = c->neighbors[i]; + if (neighbor->parent == c->parent) { + neighbor->parent = nullptr; // Set sibling parent pointer to null + root_clusters[level].push_back(neighbor); // Keep track of root clusters + } + } + for (auto neighbor_pair : *c->get_neighbor_set()) { + Cluster* neighbor = neighbor_pair.first; + if (neighbor && neighbor->parent == c->parent) { + neighbor->parent = nullptr; // Set sibling parent pointer to null + root_clusters[level].push_back(neighbor); // Keep track of root clusters + } + } + } + } +} + +template +void UFOTree::insert_adjacency(Cluster* u, Cluster* v) { + auto curr_u = u; + auto curr_v = v; + while (curr_u && curr_v && curr_u != curr_v) { + curr_u->insert_neighbor(curr_v); + curr_v->insert_neighbor(curr_u); + curr_u = curr_u->parent; + curr_v = curr_v->parent; + } +} + +template +void UFOTree::insert_adjacency(Cluster* u, Cluster* v, e_t value) { + auto curr_u = u; + auto curr_v = v; + while (curr_u && curr_v && curr_u != curr_v) { + curr_u->insert_neighbor_with_value(curr_v, value); + curr_v->insert_neighbor_with_value(curr_u, value); + curr_u = curr_u->parent; + curr_v = curr_v->parent; + } +} + +template +void UFOTree::remove_adjacency(Cluster* u, Cluster* v) { + auto curr_u = u; + auto curr_v = v; + while (curr_u && curr_v && curr_u != curr_v) { + curr_u->remove_neighbor(curr_v); + curr_v->remove_neighbor(curr_u); + // curr_u->degree = 0; + // curr_v->degree = 0; + curr_u = curr_u->parent; + curr_v = curr_v->parent; + } +} + +/* Return true if and only if there is a path from vertex u to +vertex v in the tree. */ +template +bool UFOTree::connected(vertex_t u, vertex_t v) { + return leaves[u].get_root() == leaves[v].get_root(); +} + +template +e_t UFOTree::path_query(vertex_t u, vertex_t v) { + assert(u < leaves.size() && u >= 0 && v < leaves.size() && v >= 0 && u != v && connected(u, v)); + + e_t path_u1, path_u2, path_v1, path_v2; + path_u1 = path_u2 = path_v1 = path_v2 = identity_e; + Cluster *bdry_u1, *bdry_u2, *bdry_v1, *bdry_v2; + bdry_u1 = bdry_u2 = bdry_v1 = bdry_v2 = nullptr; + if (leaves[u].get_degree() == 2) { + bdry_u1 = leaves[u].neighbors[0]; + bdry_u2 = leaves[u].neighbors[1]; + } + if (leaves[v].get_degree() == 2) { + bdry_v1 = leaves[v].neighbors[0]; + bdry_v2 = leaves[v].neighbors[1]; + } + auto curr_u = &leaves[u]; + auto curr_v = &leaves[v]; + while (curr_u->parent != curr_v->parent) { + // NOTE(ATHARVA): Make this all into one function. + if (curr_u->get_degree() > 2) { + if (curr_u->parent->get_degree() == 2) { + // Superunary to Binary + bdry_u1 = curr_u->parent->neighbors[0]; + bdry_u2 = curr_u->parent->neighbors[1]; + path_u2 = path_u1; + } + } else { + for (int i = 0; i < 2; i++) { + auto neighbor = curr_u->neighbors[i]; + if (neighbor && neighbor->parent == curr_u->parent) { + if (curr_u->get_degree() == 2) { + if (curr_u->parent->get_degree() == 2) { + // Binary to Binary + if (neighbor == bdry_u1) { + path_u1 = f_e(path_u1, f_e(curr_u->get_edge_value(i), neighbor->value)); + bdry_u2 = bdry_u2->parent; + for (int i = 0; i < 2; i++) + if (curr_u->parent->neighbors[i] && curr_u->parent->neighbors[i] != bdry_u2) + bdry_u1 = curr_u->parent->neighbors[i]; + } else { + path_u2 = f_e(path_u2, f_e(curr_u->get_edge_value(i), neighbor->value)); + bdry_u1 = bdry_u1->parent; + for (int i = 0; i < 2; i++) + if (curr_u->parent->neighbors[i] && curr_u->parent->neighbors[i] != bdry_u1) + bdry_u2 = curr_u->parent->neighbors[i]; + } + } else { + // Binary to Unary + path_u1 = (neighbor == bdry_u1) ? path_u2 : path_u1; + } + } else { + if (curr_u->parent->get_degree() == 2) { + // Unary to Binary + path_u1 = path_u2 = f_e(path_u1, curr_u->get_edge_value(i)); + bdry_u1 = curr_u->parent->neighbors[0]; + bdry_u2 = curr_u->parent->neighbors[1]; + } else { + // Unary to Unary and Unary to Superunary + path_u1 = f_e(path_u1, f_e(curr_u->get_edge_value(i), neighbor->value)); + } + } + break; + } + } + if (!curr_u->contracts()) { + if (bdry_u1) bdry_u1 = bdry_u1->parent; + if (bdry_u2) bdry_u2 = bdry_u2->parent; + } + } + curr_u = curr_u->parent; + // Same thing for the side of curr_v + if (curr_v->get_degree() > 2) { + if (curr_v->parent->get_degree() == 2) { + // Superunary to Superunary/Binary + bdry_v1 = curr_v->parent->neighbors[0]; + bdry_v2 = curr_v->parent->neighbors[1]; + path_v2 = path_v1; + } + } else { + for (int i = 0; i < 2; i++) { + auto neighbor = curr_v->neighbors[i]; + if (neighbor && neighbor->parent == curr_v->parent) { + if (curr_v->get_degree() == 2) { + if (curr_v->parent->get_degree() == 2) { + // Binary to Binary + if (neighbor == bdry_v1) { + path_v1 = f_e(path_v1, f_e(curr_v->get_edge_value(i), neighbor->value)); + bdry_v2 = bdry_v2->parent; + for (int i = 0; i < 2; i++) + if (curr_v->parent->neighbors[i] && curr_v->parent->neighbors[i] != bdry_v2) + bdry_v1 = curr_v->parent->neighbors[i]; + } else { + path_v2 = f_e(path_v2, f_e(curr_v->get_edge_value(i), neighbor->value)); + bdry_v1 = bdry_v1->parent; + for (int i = 0; i < 2; i++) + if (curr_v->parent->neighbors[i] && curr_v->parent->neighbors[i] != bdry_v1) + bdry_v2 = curr_v->parent->neighbors[i]; + } + } else { + // Binary to Unary + path_v1 = (neighbor == bdry_v1) ? path_v2 : path_v1; + } + } else { + if (curr_v->parent->get_degree() == 2) { + // Unary to Binary + path_v1 = path_v2 = f_e(path_v1, curr_v->get_edge_value(i)); + bdry_v1 = curr_v->parent->neighbors[0]; + bdry_v2 = curr_v->parent->neighbors[1]; + } else { + // Unary to Unary and Unary to Superunary + path_v1 = f_e(path_v1, f_e(curr_v->get_edge_value(i), neighbor->value)); + } + } + break; + } + } + if (!curr_v->contracts()) { + if (bdry_v1) bdry_v1 = bdry_v1->parent; + if (bdry_v2) bdry_v2 = bdry_v2->parent; + } + } + curr_v = curr_v->parent; + } + // Get the correct path sides when the two vertices meet at the LCA + e_t total = identity_e; + if (curr_u->get_degree() == 2) + total = f_e(total, (curr_v == bdry_u1) ? path_u1 : path_u2); + else + total = f_e(total, path_u1); + if (curr_v->get_degree() == 2) + total = f_e(total, (curr_u == bdry_v1) ? path_v1 : path_v2); + else + total = f_e(total, path_v1); + // If the LCA contracts them in a star merge, take both edges to the center + if (curr_u->get_degree() == 1 && curr_v->get_degree() == 1 + && curr_u->neighbors[0] != curr_v) [[unlikely]] { + total = f_e(total, curr_u->get_edge_value(0)); + total = f_e(total, curr_v->get_edge_value(0)); + } + // Add the value of the last edge (since they contract one must be deg <= 2) + else [[likely]] { + for (int i = 0; i < 2; i++) { + if (curr_u->neighbors[i] == curr_v) { + total = f_e(total, curr_u->get_edge_value(i)); + break; + } + if (curr_v->neighbors[i] == curr_u) { + total = f_e(total, curr_v->get_edge_value(i)); + break; + } + } + } + return total; +} + +} diff --git a/include/ufo_tree/util.h b/include/ufo_tree/util.h new file mode 100644 index 0000000..d4820fa --- /dev/null +++ b/include/ufo_tree/util.h @@ -0,0 +1,61 @@ +#pragma once +#include +#include +#include +#include +#include "ufo_tree/types.h" + + +namespace ufo { + +template +inline bool CAS(ET *ptr, ET oldv, ET newv) { + if (sizeof(ET) == 1) { + return __sync_bool_compare_and_swap((bool*)ptr, *((bool*)&oldv), *((bool*)&newv)); + } else if (sizeof(ET) == 4) { + return __sync_bool_compare_and_swap((int*)ptr, *((int*)&oldv), *((int*)&newv)); + } else if (sizeof(ET) == 8) { + return __sync_bool_compare_and_swap((long*)ptr, *((long*)&oldv), *((long*)&newv)); + } else { + std::cout << "CAS bad length : " << sizeof(ET) << std::endl; + abort(); + } +} + +template +inline ET AtomicLoad(ET *ptr) { + return __atomic_load_n(ptr, __ATOMIC_SEQ_CST); +} + +template +inline void AtomicStore(ET *ptr, ET val) { + __atomic_store_n(ptr, val, __ATOMIC_SEQ_CST); +} + +template +inline ET AtomicExchange(ET *ptr, ET val) { + return __sync_lock_test_and_set(ptr, val); +} + +#define MAX_VERTEX_T (std::numeric_limits::max()) + +#define VERTICES_TO_EDGE(U, V) (edge_t) U + (((edge_t) V) << 32) +#define EDGE_TYPE_TO_STRUCT(E) {(vertex_t) E, (vertex_t) (E >> 32)} + +static int max_tree_height(vertex_t n) { + return ceil(log2(n) / log2(1.2)); +} + +#define TAG(P,T) (Cluster*)((uintptr_t) P | (uintptr_t) T) +#define UNTAG(P) (Cluster*)((uintptr_t) P & (uintptr_t) ~0x7) +#define GET_TAG(P) (int)((uintptr_t) P & (uintptr_t) 0x7) + +// #define START_TIMER(X) auto X = std::chrono::high_resolution_clock::now() +// #define STOP_TIMER(X, T) T += std::chrono::duration_cast(std::chrono::high_resolution_clock::now()-X).count() +// #define PRINT_TIMER(S, T) std::cout << " " << S << " (ms): " << T/1000000 << std::endl + +#define START_TIMER(X) ; +#define STOP_TIMER(X, T) ; +#define PRINT_TIMER(S, T) ; + +} diff --git a/include/union_find_local.h b/include/union_find_local.h new file mode 100644 index 0000000..8186985 --- /dev/null +++ b/include/union_find_local.h @@ -0,0 +1,71 @@ +#pragma once + +// basically unmodified from parlay's union_find.h +// in its examples (as of commit e1b1f17) + +#include + +// The following supports both "link" (a directed union) and "find". +// They are safe to run concurrently as long as there is no cycle among +// concurrent links. This can be achieved, for example by only linking +// a vertex with lower id into one with higher degree. +// See: "Internally deterministic parallel algorithms can be fast" +// Blelloch, Fineman, Gibbons, and Shun +// for a discussion of link/find. +template +struct union_find_local { + // TODO - think about this more carefully. + // it's not like we're really using the atomics? + // parlay::sequence> parents; + parlay::sequence parents; + + size_t space_usage_bytes() const { + return sizeof(union_find_local) + (parents.capacity() * sizeof(vertex)); + } + + bool is_root(vertex u) { + return parents[u] < 0; + } + + // initialize n elements all as roots + union_find_local(size_t n) : parents(parlay::tabulate(n, [](long) { return -1; })) {} + + vertex find(vertex i) { + if (is_root(i)) return i; + vertex p = parents[i]; + if (is_root(p)) return p; + + // find root, shortcutting along the way + do { + vertex gp = parents[p]; + parents[i] = gp; + i = p; + p = gp; + } while (!is_root(p)); + return p; + } + + // Version of union that is safe for parallelism + // when no cycles are created (e.g. only link from larger + // to smaller vertex). + // Does not use ranks. + void link(vertex u, vertex v) { + // ONLY MODIFICATION + // we're going to enforce minimum as root + if (u < v) { + std::swap(u, v); + } + // ensure that u > v + // make v the parent of u + parents[u] = v; + } + + void reset() { + // make everything a root again + parlay::parallel_for(0, parents.size(), [&](size_t i) { + parents[i] = -1; + }); + } +}; + +template struct union_find_local; \ No newline at end of file diff --git a/include/util.h b/include/util.h index 0fbc050..313ce1f 100644 --- a/include/util.h +++ b/include/util.h @@ -6,6 +6,7 @@ extern std::string stream_file; extern int batch_size_arg; extern double height_factor_arg; +extern int hybrid_threshold_arg; //#define START(X) auto X = std::chrono::high_resolution_clock::now() //#define STOP(C, X) C += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - X).count() diff --git a/include/utils/epoch.h b/include/utils/epoch.h new file mode 100644 index 0000000..917d01a --- /dev/null +++ b/include/utils/epoch.h @@ -0,0 +1,597 @@ +// *************************** +// Epoch-based memory reclamation +// Supports: +// epoch::with_epoch(F f), +// which runs f within an epoch, as well as: +// epoch::New(args...) +// epoch::Retire(T* a) -- delays destruction and free +// epoch::Delete(T* a) -- destructs and frees immediately +// Retire delays destruction and free until no operation that was in a +// with_epoch at the time it was run is still within the with_epoch. +// +// All operations take constant time overhead (beyond the cost of the +// system malloc and free). +// +// Designed to work with C++ threads, or compatible threading +// libraries. In particular it uses thread_local variables, and no two +// concurrent processes can share the same instance of the variable. +// +// When NDEBUG is not set, the operations check for memory corruption +// of the bytes immediately before and after the object, and check +// for double retires/deletes. Also: +// epoch::check_ptr(T* a) +// will check that an object allocated using epoch::New(..) has not +// been corrupted. +// +// Supports undoing retires. This can be useful in transactional +// system in which an operation aborts, and any retires done during +// the operations have to be undone. In particular Retire returns a +// pointer to a boolean. Running +// epoch::undo_retire(bool* x) +// will undo the retire. Must be run in same with_epoch as the retire +// was run, otherwise it is too late to undo. If you don't want +// to undo retires, you can ignore this feature. +// +// New, Retire and Delete use a shared pool for the retired lists, +// which, although not very large, is not cleared until program +// termination. A private pool can be created with +// epoch::memory_pool a; +// which then supports a->New(args...), a->Retire(T*) and +// a->Delete(T*). On destruction of "a", all elements of the retired +// lists will be destructed and freed. +// +// Achieves constant times overhead by incrementally taking steps. +// In particular every Retire takes at most a constant number of +// incremental steps towards updating the epoch and clearing the +// retired lists. +// +// Developed as part of parlay project at CMU, initially for flock then +// used for verlib, and parlayhash. +// Current dependence on parlay is just for parlay::my_thread_id() and +// parlay::num_thread_ids() which are from "parlay/thread_specific.h". +// *************************** + +#include +#include +#include +#include +#include +#include +#include +#include +// Needed for parlay::my_thread_id of parlay::num_thread_ids +#include "threads/thread_specific.h" + +#ifndef PARLAY_EPOCH_H_ +#define PARLAY_EPOCH_H_ + +#ifndef NDEBUG +// Checks for corruption of bytes before and after allocated structures, as well as double frees. +// Requires some extra memory to pad the front and back of a structure. +#define EpochMemCheck 1 +#endif +//#define EpochMemCheck 1 + +#define USE_STEPPING 1 +//#define USE_UNDO 1 + +#ifdef USE_PARLAY_ALLOC +#include "parlay/alloc.h" +#endif + +// *************************** +// epoch structure +// *************************** + +namespace epoch { + + namespace internal { + + inline int worker_id() {return parlay::my_thread_id(); } + inline int num_workers() {return parlay::num_thread_ids();} + constexpr int max_num_workers = 1024; + +struct alignas(64) epoch_s { + + // functions to run when epoch is incremented + std::vector> before_epoch_hooks; + std::vector> after_epoch_hooks; + + struct alignas(64) announce_slot { + std::atomic last; + announce_slot() : last(-1l) {} + }; + + std::vector announcements; + std::atomic current_epoch; + epoch_s() : + announcements(std::vector(max_num_workers)), + current_epoch(0), + epoch_state(0) {} + + long get_current() { + return current_epoch.load(); + } + + long get_my_epoch() { + return announcements[worker_id()].last; + } + + void set_my_epoch(long e) { + announcements[worker_id()].last = e; + } + + int announce() { + size_t id = worker_id(); + assert(id < max_num_workers); + while (true) { + long current_e = get_current(); + long tmp = current_e; + // apparently an exchange is faster than a store (write and fence) + announcements[id].last.exchange(tmp, std::memory_order_seq_cst); + if (get_current() == current_e) return id; + } + } + + void unannounce(size_t id) { + announcements[id].last.store(-1l, std::memory_order_release); + } + + // top 16 bits are used for the process id, and the bottom 48 for + // the epoch number + using state = size_t; + std::atomic epoch_state; + + // Attempts to takes num_steps checking the announcement array to + // see that all slots are up-to-date with the current epoch. Once + // they are, the epoch is updated. Designed to deamortize the cost + // of sweeping the announcement array--every thread only does + // constant work. + state update_epoch_steps(state prev_state, int num_steps) { + state current_state = epoch_state.load(); + if (prev_state != current_state) + return current_state; + size_t i = current_state >> 48; + size_t current_e = ((1ul << 48) - 1) & current_state; + size_t workers = num_workers(); + if (i == workers) { + for (const auto h : before_epoch_hooks) h(); + long tmp = current_e; + if (current_epoch.load() == current_e && + current_epoch.compare_exchange_strong(tmp, current_e+1)) { + for (const auto h : after_epoch_hooks) h(); + } + state new_state = current_e + 1; + epoch_state.compare_exchange_strong(current_state, new_state); + return epoch_state.load(); + } + size_t j; + for (j = i ; j < i + num_steps && j < workers; j++) + if ((announcements[j].last != -1l) && announcements[j].last < current_e) + return current_state; + state new_state = (j << 48 | current_e); + if (epoch_state.compare_exchange_strong(current_state, new_state)) + return new_state; + return current_state; + } + + // this version does the full sweep + void update_epoch() { + long current_e = get_current(); + + // check if everyone is done with earlier epochs + int workers; + do { + workers = num_workers(); + if (workers > max_num_workers) { + std::cerr << "number of threads: " << workers + << ", greater than max_num_threads: " << max_num_workers << std::endl; + abort(); + } + for (int i=0; i < workers; i++) + if ((announcements[i].last != -1l) && announcements[i].last < current_e) + return; + } while (num_workers() != workers); // this is unlikely to loop + + // if so then increment current epoch + for (const auto h : before_epoch_hooks) h(); + if (current_epoch.compare_exchange_strong(current_e, current_e+1)) { + for (const auto h : after_epoch_hooks) h(); + } + } +}; + + // Juat one epoch structure shared by all + extern inline epoch_s& get_epoch() { + static epoch_s epoch; + return epoch; + } + +// *************************** +// type specific memory pools +// *************************** + +template +struct alignas(64) memory_pool { +private: + + struct list_entry { + T* ptr; +#ifdef USE_UNDO + bool keep_; + bool keep() {return keep_;} + list_entry() : keep_(false) {} + list_entry(T* ptr) : ptr(ptr), keep_(false) {} +#else + bool keep() {return false;} +#endif + }; + + // each thread keeps one of these + struct alignas(256) old_current { + std::list old; // linked list of retired items from previous epoch + std::list current; // linked list of retired items from current epoch + std::list reserve; // linked list of items that could be destructed, but delayed so they can be reused + long epoch; // epoch on last retire, updated on a retire + long retire_count; // number of retires so far, reset on updating the epoch + long alloc_count; + epoch_s::state e_state; + old_current() : e_state(0), epoch(0), retire_count(0), alloc_count(0) {} + }; + + std::vector pools; + + // wrapper used so can pad for the memory checked version + struct wrapper { +#ifdef EpochMemCheck + long pad; + std::atomic head; + T value; + std::atomic tail; +#else + T value; +#endif + }; + + // values used to check for corruption or double delete + static constexpr long default_val = 10; + static constexpr long deleted_val = 55; + + // given a pointer to value in a wrapper, return a pointer to the wrapper. + wrapper* wrapper_from_value(T* p) { + size_t offset = ((char*) &((wrapper*) p)->value) - ((char*) p); + return (wrapper*) (((char*) p) - offset); + } + + // destructs entries on a list + void clear_list(std::list& lst) { + for (list_entry& x : lst) + if (!x.keep()) { + x.ptr->~T(); + free_wrapper(wrapper_from_value(x.ptr)); + } + lst.clear(); + } + + void advance_epoch(int i, old_current& pid) { +#ifndef USE_UNDO + int delay = 1; +#else + int delay = 2; +#endif + if (pid.epoch + delay < get_epoch().get_current()) { + pid.reserve.splice(pid.reserve.end(), pid.old); + pid.old = std::move(pid.current); + pid.epoch = get_epoch().get_current(); + } + // a heuristic +#ifdef USE_STEPPING + long update_threshold = 10; +#else + long update_threshold = 10 * num_workers(); +#endif + if (++pid.retire_count == update_threshold) { + pid.retire_count = 0; +#ifdef USE_STEPPING + pid.e_state = get_epoch().update_epoch_steps(pid.e_state, 8); +#else + get_epoch().update_epoch(); +#endif + } + } + +#ifdef USE_PARLAY_ALLOC + using Allocator = parlay::type_allocator; +#endif + + void check_wrapper_on_destruct(wrapper* x) { +#ifdef EpochMemCheck + // check nothing is corrupted or double deleted + if (x->head != default_val || x->tail != default_val) { + if (x->head == deleted_val) std::cerr << "double free" << std::endl; + else if (x->head != default_val) std::cerr << "corrupted head" << x->head << std::endl; + if (x->tail != default_val) std::cerr << "corrupted tail: " << x->tail << std::endl; + abort(); + } + x->head = deleted_val; +#endif + } + + void set_wrapper_on_construct(wrapper* x) { +#ifdef EpochMemCheck + x->pad = x->head = x->tail = default_val; +#endif + } + + void free_wrapper(wrapper* x) { + check_wrapper_on_destruct(x); +#ifdef USE_PARLAY_ALLOC + return Allocator::free(x); +#else + return std::free(x); +#endif + } + + wrapper* allocate_wrapper() { + auto &pid = pools[worker_id()]; + if (!pid.reserve.empty()) { + list_entry x = pid.reserve.front(); + pid.reserve.pop_front(); + if (!x.keep()) { + x.ptr->~T(); + wrapper* w = wrapper_from_value(x.ptr); + check_wrapper_on_destruct(w); + set_wrapper_on_construct(w); + return w; + } + } +#ifdef USE_PARLAY_ALLOC + wrapper* w = Allocator::alloc(); +#else + wrapper* w = (wrapper*) std::malloc(sizeof(wrapper)); +#endif + set_wrapper_on_construct(w); + return w; + } + + public: + memory_pool() { + long workers = max_num_workers; + pools = std::vector(workers); + for (int i = 0; i < workers; i++) { + pools[i].retire_count = 0; + } + } + + memory_pool(const memory_pool&) = delete; + ~memory_pool() { clear(); } + + // for backwards compatibility + void acquire(T* p) { } + + template + T* New(Args... args) { + wrapper* x = allocate_wrapper(); + T* newv = &x->value; + new (newv) T(args...); + return newv; + } + + // f is a function that initializes a new object before it is shared + template + T* New_Init(F f, Args... args) { + T* x = New(args...); + f(x); + return x; + } + + // retire and return a pointer if want to undo the retire +#ifdef USE_UNDO + bool* Retire(T* p) { +#else + void Retire(T* p) { +#endif + auto i = worker_id(); + auto &pid = pools[i]; + if (pid.reserve.size() > 500) { + list_entry x = pid.reserve.front(); + if (!x.keep()) { + x.ptr->~T(); + free_wrapper(wrapper_from_value(x.ptr)); + } + pid.reserve.pop_front(); + } + advance_epoch(i, pid); + pid.current.push_back(list_entry{p}); +#ifdef USE_UNDO + return &pid.current.back().keep_; +#endif + } + + // destructs and frees the object immediately + void Delete(T* p) { + p->~T(); + free_wrapper(wrapper_from_value(p)); + } + + bool check_ptr(T* ptr, bool silent=false) { +#ifdef EpochMemCheck + if (ptr == nullptr) return true; + wrapper* x = wrapper_from_value(ptr); + if (!silent) { + if (x->pad != default_val) std::cerr << "memory_pool, check: pad word corrupted" << x->pad << std::endl; + if (x->head != default_val) std::cerr << "memory_pool, check: head word corrupted" << x->head << std::endl; + if (x->tail != default_val) std::cerr << "memory_pool, check: tail word corrupted: " << x->tail << std::endl; + } + return (x->pad == default_val && x->head == default_val && x->tail == default_val); +#endif + return true; + } + + // Clears all the lists, to be used on termination, or could be use + // at a quiescent point when noone is reading any retired items. + void clear() { + // for (int i=0; i < num_workers(); i++) + // std::cout << i << ": " << pools[1].old.size() << ", " + // << pools[i].current.size() << ", " + // << pools[i].reserve.size() << std::endl; + get_epoch().update_epoch(); + for (int i=0; i < num_workers(); i++) { + clear_list(pools[i].old); + clear_list(pools[i].current); + clear_list(pools[i].reserve); + } + //Allocator::print_stats(); + } + + void stats() {} +}; + +template +struct alignas(64) retire_pool { +private: + + struct list_entry { + char data[sizeof(T)]; + }; + + // each thread keeps one of these + struct alignas(256) old_current { + std::list old; // linked list of retired items from previous epoch + std::list current; // linked list of retired items from current epoch + long epoch; // epoch on last retire, updated on a retire + long retire_count; // number of retires so far, reset on updating the epoch + epoch_s::state e_state; + old_current() : e_state(0), epoch(0), retire_count(0) {} + }; + + std::vector pools; + + // destructs entries on a list + void clear_list(std::list& lst) { + for (list_entry& x : lst) + ((T*) (&(x.data)))->~T(); + lst.clear(); + } + + void advance_epoch(int i, old_current& pid) { + if (pid.epoch + 1 < get_epoch().get_current()) { + clear_list(pid.old); + pid.old = std::move(pid.current); + pid.epoch = get_epoch().get_current(); + } +#ifdef USE_STEPPING + long update_threshold = 10; +#else + long update_threshold = 10 * num_workers(); +#endif + if (++pid.retire_count == update_threshold) { + pid.retire_count = 0; +#ifdef USE_STEPPING + pid.e_state = get_epoch().update_epoch_steps(pid.e_state, 8); +#else + get_epoch().update_epoch(); +#endif + } + } + + public: + retire_pool() { + long workers = max_num_workers; + pools = std::vector(workers); + for (int i = 0; i < workers; i++) + pools[i].retire_count = 0; + } + + retire_pool(const retire_pool&) = delete; + ~retire_pool() { clear(); } + + void Retire(T* p) { + auto i = worker_id(); + auto &pid = pools[i]; + advance_epoch(i, pid); + list_entry x; + strncpy(x.data, (char*) p, sizeof(T)); + pid.current.push_back(x); + } + + // Clears all the lists, to be used on termination, or could be use + // at a quiescent point when noone is reading any retired items. + void clear() { + get_epoch().update_epoch(); + for (int i=0; i < num_workers(); i++) { + clear_list(pools[i].old); + clear_list(pools[i].current); + } + } + + void stats() {} +}; + +} // namespace internal + +// *************************** +// The public interface +// *************************** + + // x should point to the skip field of a link + inline void undo_retire(bool* x) { *x = true;} + + template + using memory_pool = internal::memory_pool; + + template + extern inline memory_pool& get_default_pool() { + static memory_pool pool; + return pool; + } + + template + using retire_pool = internal::retire_pool; + + template + extern inline retire_pool& get_default_retire_pool() { + static retire_pool pool; + return pool; + } + + template + static T* New(Args... args) { + return get_default_pool().New(std::forward(args)...);} + + template + static void Delete(T* p) {get_default_pool().Delete(p);} + + template +#ifdef USE_UNDO + static bool* Retire(T* p) {return get_default_pool().Retire(p);} +#else + void Retire(T* p) {return get_default_pool().Retire(p);} +#endif + + template + static bool check_ptr(T* p, bool silent=false) { + return get_default_pool().check_ptr(p, silent);} + + template + static void clear() {get_default_pool().clear();} + + //template + //static void stats() {get_default_pool().stats();} + + template + auto with_epoch(Thunk f) { + int id = internal::get_epoch().announce(); + if constexpr (std::is_void_v>) { + f(); + internal::get_epoch().unannounce(id); + } else { + auto v = f(); + internal::get_epoch().unannounce(id); + return v; + } + } + +} // end namespace epoch + +#endif //PARLAY_EPOCH_H_ diff --git a/include/utils/lock.h b/include/utils/lock.h new file mode 100644 index 0000000..1ea36f8 --- /dev/null +++ b/include/utils/lock.h @@ -0,0 +1,67 @@ +#include +#include +#include + +#ifndef PARLAYLOCK_H_ +#define PARLAYLOCK_H_ + +namespace parlay { + +// creates 2^16 lock slots. +// locks.try_lock(i, f) will hash i to the h(i) % 2^16th lock. +// If the lock is not taken then f is run and the try_lock returns the +// boolean result of f then releasing the lock. Otherwise it returns false. +struct lock_set { +private: + using lck = std::atomic; + const int bucket_bits = 16; + const size_t mask = ((1ul) << bucket_bits) - 1; + std::vector locks; + + static inline uint64_t hash64(uint64_t x) { + x = (x ^ (x >> 30)) * UINT64_C(0xbf58476d1ce4e5b9); + x = (x ^ (x >> 27)) * UINT64_C(0x94d049bb133111eb); + x = x ^ (x >> 31); + return x; + } +public: + template + bool try_lock(long i, F f) { + bool old = false; + bool result = false; + lck& x = locks[hash64(i) & mask]; + if (x.compare_exchange_strong(old, true)) { + result = f(); + x = false; + } + return result; + } + lock_set() : locks(std::vector(1ul << bucket_bits)) { + std::fill(locks.begin(), locks.end(), false); + } +}; + + extern inline lock_set& get_locks() { + static lock_set locks; + return locks; + } + + template + auto try_loop(const F& f, int delay = 200, const int max_multiplier = 20) { + int multiplier = 1; + int cnt = 0; + while (true) { + if (cnt++ == 10000000000ul/(delay*max_multiplier)) { + std::cerr << "problably in an infinite retry loop" << std::endl; + abort(); + } + auto r = f(); + if (r.has_value()) return *r; + multiplier = std::min(2*multiplier, max_multiplier); + for (volatile int i=0; i < delay * multiplier; i++); + } + } + +} + +#endif // PARLAYLOCK_H_ diff --git a/include/utils/threads/portability.h b/include/utils/threads/portability.h new file mode 100644 index 0000000..88a3718 --- /dev/null +++ b/include/utils/threads/portability.h @@ -0,0 +1,112 @@ + +#ifndef PARLAY_PORTABILITY_H_ +#define PARLAY_PORTABILITY_H_ + +#if defined(_WIN32) +#ifndef NOMINMAX +#define PARLAY_DEFINED_NOMINMAX +#define NOMINMAX +#endif +#include +#ifdef PARLAY_DEFINED_NOMINMAX +#undef NOMINMAX +#endif +#endif + +#include + +#include + +namespace parlay { + +// PARLAY_INLINE: Ask the compiler politely to inline the given function. +#if defined(__GNUC__) +#define PARLAY_INLINE inline __attribute__((__always_inline__)) +#elif defined(_MSC_VER) +#define PARLAY_INLINE __forceinline +#else +#define PARLAY_INLINE inline +#endif + +// PARLAY_NOINLINE: Ask the compiler to *not* inline the given function +#if defined(__GNUC__) +#define PARLAY_NOINLINE __attribute__((__noinline__)) +#elif defined(_MSC_VER) +#define PARLAY_NOINLINE __declspec(noinline) +#else +#define PARLAY_NOINLINE +#endif + +// PARLAY_COLD: Ask the compiler to place the given function far away from other code +#if defined(__GNUC__) +#define PARLAY_COLD __attribute__((__cold__)) +#elif defined(_MSC_VER) +#define PARLAY_COLD +#else +#define PARLAY_COLD +#endif + + +// PARLAY_PACKED: Ask the compiler to pack a struct into less memory by not padding +#if defined(__GNUC__) +#define PARLAY_PACKED __attribute__((packed)) +#else +#define PARLAY_PACKED +#endif + +// PARLAY_NO_UNIQUE_ADDR: Allow a member object to occupy no space +#if defined(__has_cpp_attribute) +#if __has_cpp_attribute(no_unique_address) +#define PARLAY_NO_UNIQUE_ADDR [[no_unique_address]] +#else +#define PARLAY_NO_UNIQUE_ADDR +#endif +#else +#define PARLAY_NO_UNIQUE_ADDR +#endif + +// PARLAY_PREFETCH: Prefetch data into cache +#if defined(__GNUC__) +#define PARLAY_PREFETCH(addr, rw, locality) __builtin_prefetch ((addr), (rw), (locality)) +#elif defined(_MSC_VER) +#define PARLAY_PREFETCH(addr, rw, locality) \ + PreFetchCacheLine(((locality) ? PF_TEMPORAL_LEVEL_1 : PF_NON_TEMPORAL_LEVEL_ALL), (addr)) +#else +#define PARLAY_PREFETCH(addr, rw, locality) +#endif + + +#if defined(__cplusplus) && __cplusplus >= 202002L +#define PARLAY_LIKELY [[likely]] +#define PARLAY_UNLIKELY [[unlikely]] +#else +#define PARLAY_LIKELY +#define PARLAY_UNLIKELY +#endif + +// Check for exceptions. The standard suggests __cpp_exceptions. Clang/GCC defined __EXCEPTIONS. +// MSVC disables them with _HAS_EXCEPTIONS=0. Might not cover obscure compilers/STLs. +// +// Exceptions can be explicitly disabled in Parlay with PARLAY_NO_EXCEPTIONS. +#if !defined(PARLAY_NO_EXCEPTIONS) && \ + ((defined(__cpp_exceptions) && __cpp_exceptions != 0) || \ + (defined(__EXCEPTIONS)) || \ + (defined(_HAS_EXCEPTIONS) && _HAS_EXCEPTIONS == 1) || \ + (defined(_MSC_VER) && !defined(_HAS_EXCEPTIONS))) +#define PARLAY_EXCEPTIONS_ENABLED +#endif + +template +[[noreturn]] PARLAY_NOINLINE PARLAY_COLD void throw_exception_or_terminate(Args&&... args) { +#if defined(PARLAY_EXCEPTIONS_ENABLED) + throw Exception{std::forward(args)...}; +#else + std::cerr << Exception{std::forward(args)...}.what() << "\n"; + std::terminate(); +#endif +} + + +} // namespace parlay + +#endif // PARLAY_PORTABILITY_H_ diff --git a/include/utils/threads/thread_id_pool.h b/include/utils/threads/thread_id_pool.h new file mode 100644 index 0000000..71cc396 --- /dev/null +++ b/include/utils/threads/thread_id_pool.h @@ -0,0 +1,160 @@ + +#ifndef PARLAY_INTERNAL_THREAD_ID_POOL_H_ +#define PARLAY_INTERNAL_THREAD_ID_POOL_H_ + +#include +#include + +#include +#include +#include +#include + +namespace parlay { +namespace internal { + +using thread_id_type = unsigned int; + +// A ThreadIdPool hands out and maintains available unique dense IDs for active threads. +// Each thread that requests an ID will get one in the range from [0...get_num_thread_ids()). +// When the pool runs out of available IDs, it will allocate new ones, increasing the result +// of get_num_thread_ids(). Threads that die will return their ID to the pool for re-use by +// a subsequently spawned thread. +// +// There is a global singleton instance of ThreadIdPool given by ThreadIdPool::instance(), +// however this function is private and should not be called by the outside world. The public +// API through which the world can access thread IDs is limited to the free functions: +// +// - get_thread_id() -> size_t: Returns the thread ID of the current thread. Will assign +// one if this thread doesn't have one yet. +// - get_num_thread_ids() -> size_t: Returns the number of unique thread IDs that have +// been handed out. +// +class ThreadIdPool : public std::enable_shared_from_this { + + // Prevent public construction since this class is meant as a global singleton + struct private_constructor { + explicit private_constructor() = default; + }; + + public: + + // Returns a unique thread ID for the current thread in the range [0...get_num_thread_ids()) + friend thread_id_type get_thread_id(); + + // Returns the number of assigned thread IDs in the range [0...get_num_thread_ids()) + friend thread_id_type get_num_thread_ids(); + + + ~ThreadIdPool() noexcept { + size_t num_destroyed = 0; + for (auto current = available_ids.load(std::memory_order_relaxed); current; num_destroyed++) { + auto old = std::exchange(current, current->next); + delete old; + } + assert(num_destroyed == num_thread_ids.load(std::memory_order_relaxed)); + } + + // The constructor must be public since we make_shared it, but we protect it with a private parameter type + explicit ThreadIdPool(private_constructor) noexcept : num_thread_ids(0), available_ids(nullptr) { } + + ThreadIdPool(const ThreadIdPool&) = delete; + ThreadIdPool& operator=(const ThreadIdPool&) = delete; + + private: + + // A ThreadId corresponds to a unique ID number in the range [0...num_thread_ids). When it is + // not in use (the thread that owned it dies), it is returned to the global pool which maintains + // a linked list of available ones. + class ThreadId { + friend class ThreadIdPool; + + explicit ThreadId(const thread_id_type id_) noexcept : id(id_), next(nullptr) { } + + public: + const thread_id_type id; + private: + ThreadId* next; + }; + + // A ThreadIdOwner indicates that a thread is currently in possession of the given ThreadID. + // Each thread has a static thread_local ThreadIdOwner containing the ID that it owns. + // On construction, it acquires an available ThreadID, and on destruction, it releases + // it back to the pool. The ThreadIdOwner stores a shared_ptr to the pool to guarantee + // that the pool does not get destroyed before a detached thread returns its ID. + class ThreadIdOwner { + friend class ThreadIdPool; + + explicit ThreadIdOwner(ThreadIdPool& pool_) + : pool(pool_.shared_from_this()), node(pool->acquire()), id(node->id) { } + + ~ThreadIdOwner() { pool->relinquish(node); } + + private: + const std::shared_ptr pool; + ThreadId* const node; + + public: + const thread_id_type id; + }; + + // Grab a free ID from the available list, or if there are none available, allocate a new one. + ThreadId* acquire() { + if (available_ids.load(std::memory_order_relaxed)) { + // We only take the lock if there are available IDs in the pool. In the common case + // where there are no relinquished IDs available for re-use we don't need the lock. + static std::mutex m_; + std::lock_guard g_{m_}; + + ThreadId* current = available_ids.load(std::memory_order_relaxed); + while (current && !available_ids.compare_exchange_weak(current, current->next, + std::memory_order_acquire, std::memory_order_relaxed)) {} + if (current) { return current; } + } + return new ThreadId(num_thread_ids.fetch_add(1)); + } + + // Given the ID back to the global pool for reuse + void relinquish(ThreadId* p) { + p->next = available_ids.load(std::memory_order_relaxed); + while (!available_ids.compare_exchange_weak(p->next, p, + std::memory_order_release, std::memory_order_relaxed)) {} + } + + static inline const ThreadIdOwner& get_local_thread_id() { + static const thread_local ThreadIdPool::ThreadIdOwner my_id(instance()); + return my_id; + } + + static inline ThreadIdPool& instance() { + // We hold the global thread id pool inside a shared_ptr because it is possible + // for threads to be spawned *before* the ID pool has been initialized, which + // means that they may outlive this static variable. Each ThreadId holds onto + // a copy of the shared_ptr to ensure that the pool stays alive long enough + // for the IDs to relinquish themselves back to the pool. + // + // I think it is still possible to cause a segfault by spawning a new thread + // *after* the static destructors have run... so please do not spawn threads + // inside your static destructors :) + static const std::shared_ptr pool = std::make_shared(private_constructor{}); + return *pool; + } + + std::atomic num_thread_ids; + std::atomic available_ids; +}; + +inline thread_id_type get_thread_id() { + return ThreadIdPool::get_local_thread_id().id; +} + +inline thread_id_type get_num_thread_ids() { + return ThreadIdPool::instance().num_thread_ids.load(); +} + + +} // namespace internal +} // namespace parlay + + +#endif // PARLAY_INTERNAL_THREAD_ID_POOL_H_ diff --git a/include/utils/threads/thread_specific.h b/include/utils/threads/thread_specific.h new file mode 100644 index 0000000..ccea7fe --- /dev/null +++ b/include/utils/threads/thread_specific.h @@ -0,0 +1,463 @@ + +#ifndef PARLAY_THREAD_SPECIFIC_H_ +#define PARLAY_THREAD_SPECIFIC_H_ + +#include +#include + +#include +#include // IWYU pragma: keep +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "thread_id_pool.h" +#include "portability.h" +//#include "range.h" +#include "type_traits.h" + + +namespace parlay { + +using internal::thread_id_type; + +// Returns a unique thread ID for the current running thread +// in the range of [0...num_thread_ids()). Thread IDs are +// guaranteed to be unique for all *live* threads, but they +// are re-used after a thread dies and another is spawned. +inline thread_id_type my_thread_id() { + return internal::get_thread_id(); +} + +// Return the number of thread IDs that have been assigned to +// threads. All thread IDs are in the range [0...num_thread_ids()). +// +// Important note: Thread IDs are assigned lazily when a thread +// first requests one. Therefore num_thread_ids() is *not* +// guaranteed to be as large as the number of live threads if +// those threads have never called my_thread_id(). +inline thread_id_type num_thread_ids() { + return internal::get_num_thread_ids(); +} + +namespace internal { + +class ThreadListChunkData { + + public: + + // This is just std::bit_ceil(std::thread::hardware_concurrency()) but we don't assume C++20 + const static inline std::size_t thread_list_chunk_size = []() { + std::size_t size = 4; + while (size < std::thread::hardware_concurrency()) + size *= 2; + return size; + }(); + + // Used by ThreadSpecific which stores a chunked sequence of items that is at least as large + // as the number of active threads. Given a thread ID, items are split into chunks of size: + // + // P, P, 2P, 4P, 8P, ... + // + // where P is the lowest power of two that is at least as large as the number of hardware threads. + static std::size_t compute_chunk_id(thread_id_type id) { + std::size_t k = thread_list_chunk_size; + std::size_t chunk = 0; + while (k <= id) { + chunk++; + k *= 2; + } + return chunk; + } + + static std::size_t compute_chunk_position(thread_id_type id, std::size_t chunk_id) { + if (chunk_id == 0) + return id; + else { + auto high_bit = thread_list_chunk_size << (chunk_id - 1); + assert(id & high_bit); + return id - high_bit; + } + } + + explicit ThreadListChunkData(thread_id_type thread_id_) noexcept : thread_id(thread_id_), + chunk_id(compute_chunk_id(thread_id)), chunk_position(compute_chunk_position(thread_id, chunk_id)) { } + + const thread_id_type thread_id; + const std::size_t chunk_id; + const std::size_t chunk_position; +}; + +extern inline const ThreadListChunkData& get_chunk_data() { + static thread_local const ThreadListChunkData data{get_thread_id()}; + return data; +} + +template +struct Uninitialized { + union { + alignas(64) std::monostate empty; + T value; + }; + + Uninitialized() noexcept { }; + + T& operator*() { return value; } + + T* get() { return std::addressof(value); } + + ~Uninitialized() { value.~T(); } +}; + +} // namespace internal + +// A ThreadSpecific stores a list of objects of type T such that there +// is a unique object for each active thread. The list automatically grows +// when additional threads are spawned and attempt to access it. Threads +// may also traverse the entire list if they need to. +// +// By default, list elements are all value initialized, roughly meaning +// that class types are default constructed, and builtin types are zero +// initialized. For custom initialization, you can pass a constructor +// function which returns the desired value. The constructor function +// can take zero or one arguments. If it takes one argument, it will be +// passed the thread ID that it is constructing for. Note that the +// elements are not guaranteed to be constructed by the thread that +// they belong to, and they may be constructed in advance of any thread +// actually taking ownership of that ID. +// +// A few things to note: +// +// - Thread IDs are always unique for the set of currently live threads, +// but not unique over the course of the entire program. A thread that +// dies will give up its ID to be claimed by a new thread later. +// +// - The list elements are *not* destroyed when the thread that "owns" +// them is destroyed. A new thread that reclaims a previously-used ID +// will find the item at that position in the same state that it was +// left by the previous thread. Elements are only destroyed when the +// entire ThreadSpecific is destroyed. +// +// Therefore, threads are responsible for manually cleaning up the +// contents of a ThreadSpecific and/or resetting it to a default value +// for the next thread that might claim the spot if they need to. +// +template +class ThreadSpecific { + + // 25 chunks guarantees enough slots for any machine + // with up to 2^48 bytes of addressable virtual memory, + // assuming that threads are 8MB large. + static constexpr std::size_t n_chunks = 25; + + public: + + using reference = T&; + using value_type = T; + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + using pointer = T*; + + ThreadSpecific() : constructor([](std::size_t) { return T{}; }) { + initialize(); + } + + template && !std::is_invocable_v, int> = 0> + explicit ThreadSpecific(F&& constructor_) + : constructor([f = std::forward(constructor_)](std::size_t) { return f(); }) { + initialize(); + } + + template, int> = 0> + explicit ThreadSpecific(F&& constructor_) : constructor(std::forward(constructor_)) { + initialize(); + } + + ThreadSpecific(const ThreadSpecific&) = delete; + ThreadSpecific& operator=(const ThreadSpecific&) = delete; + ThreadSpecific(ThreadSpecific&&) = delete; + + ~ThreadSpecific() { + for (internal::Uninitialized* chunk : chunks) { + delete[] chunk; + } + } + + T& operator*() { return get(); } + T* operator->() { return std::addressof(get()); } + + T& get() { + auto chunk_data = internal::get_chunk_data(); + return get_by_index(chunk_data.chunk_id, chunk_data.chunk_position); + } + + const T& operator*() const { return get(); } + T const* operator->() const { return std::addressof(get()); } + + const T& get() const { + auto chunk_data = internal::get_chunk_data(); + return get_by_index(chunk_data.chunk_id, chunk_data.chunk_position); + } + + template + void for_each(F&& f) { + static_assert(std::is_invocable_v); + + auto num_threads = num_thread_ids(); + thread_id_type tid = 0; + internal::Uninitialized* chunk = chunks[0].load(std::memory_order_relaxed); + + for (std::size_t chunk_id = 0; tid < num_threads; chunk = chunks[++chunk_id].load(std::memory_order_acquire)) { + auto chunk_size = get_chunk_size(chunk_id); + if (!chunk) PARLAY_UNLIKELY { + ensure_chunk_exists(chunk_id); + chunk = chunks[chunk_id].load(std::memory_order_relaxed); + } + for (std::size_t i = 0; tid < num_threads && i < chunk_size; i++, tid++) { + f(*chunk[i]); + } + } + } + + // Allow looping over all thread's data + template + class iterator_t { + friend class ThreadSpecific; + + using parent_type = maybe_const_t>; + + iterator_t(std::size_t chunk_id_, std::size_t position_, parent_type* parent_) : + chunk_id(chunk_id_), position(position_), parent(parent_) { } + + public: + using iterator_category = std::random_access_iterator_tag; + using reference = std::add_lvalue_reference_t>; + using value_type = T; + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + using pointer = std::add_pointer_t>; + + iterator_t() = default; + + /* implicit */ iterator_t(const iterator_t& other) // cppcheck-suppress noExplicitConstructor // NOLINT + : chunk_id(other.chunk_id), position(other.position), parent(other.parent) { } + + reference operator*() const { return parent->get_by_index_nocheck(chunk_id, position); } + + reference operator[](std::size_t p) const { + auto tmp = *this; + tmp += p; + return *tmp; + } + + iterator_t& operator++() { + position++; + if (position == get_chunk_size(chunk_id)) { + if (++chunk_id < n_chunks && parent->chunks[chunk_id].load(std::memory_order_acquire) == nullptr) PARLAY_UNLIKELY + parent->ensure_chunk_exists(chunk_id); + position = 0; + } + return *this; + } + + iterator_t operator++(int) { auto tmp = *this; ++(*this); return tmp; } //NOLINT + + iterator_t& operator--() { + if (position == 0) { + position = get_chunk_size(--chunk_id) - 1; + if (parent->chunks[chunk_id].load(std::memory_order_acquire) == nullptr) PARLAY_UNLIKELY + parent->ensure_chunk_exists(chunk_id); + } + else { + position--; + } + return *this; + } + + iterator_t operator--(int) { auto tmp = *this; --(*this); return tmp; } //NOLINT + + iterator_t& operator+=(difference_type diff) { + if (diff < 0) return *this -= (-diff); + assert(diff >= 0); + position += diff; + if (position >= get_chunk_size(chunk_id)) { + do { + position -= get_chunk_size(chunk_id++); + } while (position >= get_chunk_size(chunk_id)); + if (parent->chunks[chunk_id].load(std::memory_order_acquire) == nullptr) PARLAY_UNLIKELY + parent->ensure_chunk_exists(chunk_id); + } + return *this; + } + + iterator_t operator+(difference_type diff) const { + auto result = *this; + result += diff; + return result; + } + + iterator_t& operator-=(difference_type diff) { + if (diff < 0) return *this += (-diff); + assert(diff >= 0); + auto pos = static_cast(position); + pos -= diff; + if (pos < 0) { + do { + pos += static_cast(get_chunk_size(--chunk_id)); + } while (pos < 0); + if (parent->chunks[chunk_id].load(std::memory_order_acquire) == nullptr) PARLAY_UNLIKELY + parent->ensure_chunk_exists(chunk_id); + } + assert(pos >= 0); + position = static_cast(pos); + return *this; + } + + iterator_t operator-(difference_type diff) const { + auto result = *this; + result -= diff; + return result; + } + + difference_type operator-(const iterator_t& other) const { + if (other > *this) return -(other - *this); + assert(other <= *this); + auto result = static_cast(position) - static_cast(other.position); + auto chunk_id_ = other.chunk_id; + while (chunk_id_ < chunk_id) { + result += static_cast(get_chunk_size(chunk_id_++)); + } + return result; + } + + bool operator==(const iterator_t& other) const { + return chunk_id == other.chunk_id && position == other.position; + } + + bool operator!=(const iterator_t& other) const { + return chunk_id != other.chunk_id || position != other.position; + } + + bool operator<(const iterator_t& other) const { + return chunk_id < other.chunk_id || (chunk_id == other.chunk_id && position < other.position); + } + + bool operator<=(const iterator_t& other) const { + return chunk_id < other.chunk_id || (chunk_id == other.chunk_id && position <= other.position); + } + + bool operator>(const iterator_t& other) const { + return chunk_id > other.chunk_id || (chunk_id == other.chunk_id && position > other.position); + } + + bool operator>=(const iterator_t& other) const { + return chunk_id > other.chunk_id || (chunk_id == other.chunk_id && position >= other.position); + } + + friend void swap(iterator_t& left, iterator_t& right) { + std::swap(left.chunk_id, right.chunk_id); + std::swap(left.position, right.position); + std::swap(left.parent, right.parent); + } + + std::size_t chunk_id{n_chunks}; + std::size_t position{0}; + parent_type* parent{nullptr}; + }; + + using iterator = iterator_t; + using const_iterator = iterator_t; + + //static_assert(is_random_access_iterator_v); + //static_assert(is_random_access_iterator_v); + + [[nodiscard]] iterator begin() { + return iterator{0,0,this}; + } + + [[nodiscard]] const_iterator begin() const { + return const_iterator{0,0,this}; + } + + [[nodiscard]] iterator end() { + internal::ThreadListChunkData data{num_thread_ids()}; + return iterator{data.chunk_id, data.chunk_position, this}; + } + + [[nodiscard]] const_iterator end() const { + internal::ThreadListChunkData data{num_thread_ids()}; + return const_iterator{data.chunk_id, data.chunk_position, this}; + } + + private: + + void initialize() { + internal::get_chunk_data(); // Force static initialization before any ThreadLocals are constructed + chunks[0].store(new internal::Uninitialized[internal::ThreadListChunkData::thread_list_chunk_size], std::memory_order_relaxed); + std::fill(chunks.begin() + 1, chunks.end(), nullptr); + auto chunk = chunks[0].load(std::memory_order_relaxed); + for (std::size_t i = 0; i < internal::ThreadListChunkData::thread_list_chunk_size; i++) { + new (static_cast(chunk[i].get())) T(constructor(i)); + } + } + + static std::size_t get_chunk_size(std::size_t chunk_id) { + assert(chunk_id < n_chunks); + if (chunk_id == 0) return internal::ThreadListChunkData::thread_list_chunk_size; + else return internal::ThreadListChunkData::thread_list_chunk_size << (chunk_id - 1); + } + + T& get_by_index(std::size_t chunk_id, std::size_t chunk_position) { + if (chunk_id > 0 && chunks[chunk_id].load(std::memory_order_acquire) == nullptr) PARLAY_UNLIKELY + ensure_chunk_exists(chunk_id); + return get_by_index_nocheck(chunk_id, chunk_position); + } + + const T& get_by_index(std::size_t chunk_id, std::size_t chunk_position) const { + if (chunk_id > 0 && chunks[chunk_id].load(std::memory_order_acquire) == nullptr) PARLAY_UNLIKELY + ensure_chunk_exists(chunk_id); + return get_by_index_nocheck(chunk_id, chunk_position); + } + + T& get_by_index_nocheck(std::size_t chunk_id, std::size_t chunk_position) { + assert(chunks[chunk_id].load() != nullptr); + return *(chunks[chunk_id].load(std::memory_order_relaxed)[chunk_position]); + } + + const T& get_by_index_nocheck(std::size_t chunk_id, std::size_t chunk_position) const { + assert(chunks[chunk_id].load() != nullptr); + return *(chunks[chunk_id].load(std::memory_order_relaxed)[chunk_position]); + } + + void ensure_chunk_exists(std::size_t chunk_id) const { + std::lock_guard lock(growing_mutex); + if (chunks[chunk_id].load(std::memory_order_relaxed) == nullptr) { + auto chunk_size = get_chunk_size(chunk_id); + auto chunk = new internal::Uninitialized[chunk_size]; + for (std::size_t i = 0; i < chunk_size; i++) { + new (static_cast(chunk[i].get())) T(constructor(chunk_size + i)); + } + chunks[chunk_id].store(chunk, std::memory_order_release); + } + } + + mutable std::function constructor; + mutable std::mutex growing_mutex; + mutable std::array*>, n_chunks> chunks; +}; + + //static_assert(is_random_access_range_v>); + //static_assert(is_random_access_range_v>); + +} // namespace parlay + + +#endif // PARLAY_THREAD_SPECIFIC_H_ diff --git a/include/utils/threads/type_traits.h b/include/utils/threads/type_traits.h new file mode 100644 index 0000000..a8b68ea --- /dev/null +++ b/include/utils/threads/type_traits.h @@ -0,0 +1,286 @@ +// Useful type traits used mostly internally by Parlay +// +// Many inspired by this video, and the following standards +// proposals: +// - https://www.youtube.com/watch?v=MWBfmmg8-Yo +// - http://open-std.org/JTC1/SC22/WG21/docs/papers/2014/n4034.pdf +// - https://quuxplusone.github.io/blog/code/object-relocation-in-terms-of-move-plus-destroy-draft-7.html +// +// Includes: +// - priority_tag +// - is_trivial_allocator +// - is_trivially_relocatable / is_nothrow_relocatable +// + +#ifndef PARLAY_TYPE_TRAITS_H_ +#define PARLAY_TYPE_TRAITS_H_ + +#include + +#include +#include +#include +#include +#include // IWYU pragma: keep + +// IWYU pragma: no_include + +namespace parlay { + +// Provides the member type T +template +struct type_identity { + using type = T; +}; + +// Equal to the type T, i.e., the identity transformation +template +using type_identity_t = typename type_identity::type; + +// Given a pointer-to-member (object or function), returns +// the type of the class in which the member lives +template +struct member_pointer_class; + +template +struct member_pointer_class : public type_identity {}; + +template +using member_pointer_class_t = typename member_pointer_class::type; + +// Provides the member type std::add_const_t if Const is +// true, otherwise provides the member type T +template +using maybe_const = std::conditional, T>; + +// Adds const to the given type if Const is true +template +using maybe_const_t = typename maybe_const::type; + +// Provides the member type std::decay_t if Decay is +// true, otherwise provides the member type T +template +using maybe_decay = std::conditional, T>; + +// Decays the given type if Decay is true +template +using maybe_decay_t = typename maybe_decay::type; + +// Provides the member value true if the given type is an instance of std::optional +template +struct is_optional : std::false_type {}; + +template +struct is_optional> : std::true_type {}; + +// true if the given type is an instance of std::optional +template +inline constexpr bool is_optional_v = is_optional::value; + +template +using is_less_than_comparable = std::conjunction< + std::is_invocable_r, T, U>, + std::is_invocable_r, U, T> + >; + +template +inline constexpr bool is_less_than_comparable_v = is_less_than_comparable::value; + +template +using is_equality_comparable = std::conjunction< + std::is_invocable_r, T, U>, + std::is_invocable_r, U, T>, + std::is_invocable_r, T, U>, + std::is_invocable_r, U, T> + >; + +template +inline constexpr bool is_equality_comparable_v = is_equality_comparable::value; + +// Defines a member value true if the given type BinaryOperator_ can be invoked on types +// T1&& and T2 to yield a result of a type that is convertible to T1. +// +// This requirement corresponds to the needs of a left fold over the operator BinaryOperator_ +// with an identity and result type of T1, where the intermediate elements being reduced over +// are potentially of type T2. +template +struct is_binary_operator_for : public std::false_type {}; + +template +struct is_binary_operator_for >, + std::enable_if_t< std::is_invocable_r_v >, + std::enable_if_t< std::is_invocable_r_v >, + std::enable_if_t< std::is_invocable_r_v >, + std::enable_if_t< std::is_invocable_r_v > +>, std::enable_if_t>> : public std::true_type{}; + +// Handle the case where BinaryOperator_ is a member function pointer +template +struct is_binary_operator_for >, + std::enable_if_t< std::is_invocable_r_v&, T1&&, T1&&> >, + std::enable_if_t< std::is_invocable_r_v&, T1&&, T2> >, + std::enable_if_t< std::is_invocable_r_v&, T2, T2> >, + std::enable_if_t< std::is_invocable_r_v&, T2, T1&&> > +>, std::enable_if_t>> : public std::true_type{}; + +// True if the given type BinaryOperator_ can be invoked on types T1&& and T2 to yield a result +// of a type that is convertible to T1. T2 defaults to T1&& if not specified. +// +// This requirement corresponds to the needs of a left fold over the operator BinaryOperator_ +// with an identity and result type of T1, where the intermediate elements being reduced over +// are potentially of type T2. +template +inline constexpr bool is_binary_operator_for_v = is_binary_operator_for::value; + +// Defines the member value true if T is a pair or a tuple of length two +template +struct is_pair : public std::false_type {}; + +template +struct is_pair(std::declval()) ), + decltype( std::get<1>(std::declval()) ), + std::enable_if_t< 2 == std::tuple_size_v> > +>> : public std::true_type {}; + +// True if T is a pair or a tuple of length two +template +inline constexpr bool is_pair_v = is_pair::value; + +/* --------------------- Priority tags. ------------------------- + Priority tags are an easy way to force template resolution to + pick the "best" option in the presence of multiple valid + choices. It works because of the facts that priority_tag + is a subtype of priority_tag, and template resolution + will always pick the most specialised option when faced with + a choice, so it will prefer priority_tag over + priority_tag +*/ + +template +struct priority_tag : priority_tag {}; + +template<> +struct priority_tag<0> {}; + + +/* ----------------- Trivial allocators. --------------------- + Allocator-aware containers and algorithms need to know whether + they can construct/destruct objects directly inside memory given + to them by an allocator, or whether the allocator has custom + behaviour. Since some optimizations require us to circumvent + custom allocator behaviour, we need to detect when an allocator + does not do this. + + Specifically, an allocator-aware algorithm must construct objects + inside memory returned by an allocator by writing + + std::allocator_traits::construct(allocator, p, args); + + if the allocator type defines a method .construct, then this results + in forwarding the construction to that method. Otherwise, this just + results in a call to + + new (p) T(std::forward(args)...) + + If we wish to circumvent calling the constructor, for example, + for a trivially relocatable type in which we would prefer to + copy directly via memcpy, we must ensure that the allocator + does not have a custom .construct method. Otherwise, we can + not optimize, and must continue to use the allocator's own + construct method. + + The same discussion is true for destruction as well. + + See https://www.youtube.com/watch?v=MWBfmmg8-Yo for more info. +*/ + +namespace internal { + +// Detect the existence of the .destroy method of the type Alloc +template +auto trivial_allocator(Alloc& a, T* p, priority_tag<2>) + -> decltype(void(a.destroy(p)), std::false_type()); + +// Detect the existence of the .construct method of the type Alloc +template +auto trivial_allocator(Alloc& a, T* p, priority_tag<1>) + -> decltype(void(a.construct(p, std::declval())), std::false_type()); + +// By default, if no .construct or .destroy methods are found, assume +// that the allocator is trivial +template +auto trivial_allocator(Alloc& a, T* p, priority_tag<0>) + -> std::true_type; + +} // namespace internal + +template +struct is_trivial_allocator + : decltype(internal::trivial_allocator(std::declval(), nullptr, priority_tag<2>())) {}; + +template +inline constexpr bool is_trivial_allocator_v = is_trivial_allocator::value; + +// Manually specialize std::allocator since it is trivial, but +// some (maybe all?) implementations still provide a .construct +// and .destroy method anyway. +template +struct is_trivial_allocator, T> : std::true_type {}; + +/* ----------------- Trivially relocatable. --------------------- + A type T is called trivially relocatable if, given a pointer + p to an object of type T, and a pointer q to unintialized + memory large enough for an object of type T, then + + new (q) T(std::move(*p)); + p->~T(); + + is equivalent to + + std::memcpy(p, q, sizeof(T)); + + Any type that is trivially move constructible and trivially + destructible is therefore trivially relocatable. User-defined + types that are not obviously trivially relocatable can be + annotated as such by specializing the is_trivially_relocatable + type. + + See proposal D1144R0 for copious details: + https://quuxplusone.github.io/blog/code/object-relocation-in-terms-of-move-plus-destroy-draft-7.html +*/ + +template +struct is_trivially_relocatable : + std::bool_constant::value && + std::is_trivially_destructible::value> { }; + +template struct is_nothrow_relocatable : + std::bool_constant::value || + (std::is_nothrow_move_constructible::value && + std::is_nothrow_destructible::value)> { }; + +template +inline constexpr bool is_trivially_relocatable_v = is_trivially_relocatable::value; + +template +inline constexpr bool is_nothrow_relocatable_v = is_nothrow_relocatable::value; + +// The standard allocator is stateless, so it is trivially relocatable, +// but unfortunately it is not detected as such, so we mark it manually. +// This is important because parlay::sequence is only trivially +// relocatable when its allocator is trivially relocatable. + +template +struct is_trivially_relocatable> : std::true_type {}; + +template +struct is_trivially_relocatable> : + std::bool_constant::value && + is_trivially_relocatable::value> {}; + +} // namespace parlay + +#endif //PARLAY_TYPE_TRAITS_H_ diff --git a/results.txt b/results.txt new file mode 100644 index 0000000..1600066 --- /dev/null +++ b/results.txt @@ -0,0 +1,61 @@ +15, 4176 +2137, 851286 +4256, 852158 +6351, 852495 +8683, 852486 +10790, 852650 +12957, 852699 +15099, 852751 +17261, 852858 +19396, 852810 +21520, 852866 +23661, 853033 +25826, 852895 +27956, 852934 +30124, 853005 +32326, 853110 +34454, 853050 +36614, 853059 +38752, 853102 +40908, 853100 +43100, 853189 +45254, 853196 +47371, 853228 +49498, 853214 +51643, 853251 +53837, 853268 +56016, 853197 +58171, 853271 +60327, 853206 +62541, 853285 +64634, 853273 +66760, 853268 +68961, 853244 +71141, 853120 +73343, 853280 +75566, 853282 +77826, 853284 +80161, 853217 +82301, 853225 +84427, 853253 +86560, 853215 +88680, 853286 +123, 856122 +3028, 1692291 +5883, 1692496 +8678, 1692857 +11721, 1693162 +14547, 1693551 +17479, 1693607 +20352, 1693699 +23215, 1693782 +26150, 1693739 +29132, 1693762 +32293, 1693782 +35297, 1693955 +38308, 1693979 +41169, 1693853 +44220, 1693999 +47164, 1694025 +50101, 1694071 +52974, 1694108 diff --git a/scripts/batch_size_experiment.sh b/scripts/batch_size_experiment.sh new file mode 100755 index 0000000..b690cff --- /dev/null +++ b/scripts/batch_size_experiment.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +declare base_dir="$(dirname $(dirname $(realpath $0)))" + +cd ${base_dir}/build +set -e +cmake -DSKETCH_BUFFER_SIZE=25 .. +make -j +set +e + +mkdir -p ./../results +mkdir -p ./../results/mpi_speed_results + +# Test run +# mpirun -np 23 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_13_query10_binary 100 0 --gtest_filter=*mpi_mixed_speed_test* + +# KRON-16 Batch Size Sweep +mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_16_query10_binary 1 0 --gtest_filter=*mpi_mixed_speed_test* +mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_16_query10_binary 10 0 --gtest_filter=*mpi_mixed_speed_test* +mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_16_query10_binary 50 0 --gtest_filter=*mpi_mixed_speed_test* +mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_16_query10_binary 100 0 --gtest_filter=*mpi_mixed_speed_test* +mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_16_query10_binary 1000 0 --gtest_filter=*mpi_mixed_speed_test* +# KRON-16 fixed-forest +mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_16_ff_query10_binary 1 0 --gtest_filter=*mpi_mixed_speed_test* +mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_16_ff_query10_binary 10 0 --gtest_filter=*mpi_mixed_speed_test* +mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_16_ff_query10_binary 50 0 --gtest_filter=*mpi_mixed_speed_test* +mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_16_ff_query10_binary 100 0 --gtest_filter=*mpi_mixed_speed_test* +mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_16_ff_query10_binary 1000 0 --gtest_filter=*mpi_mixed_speed_test* + +# Twitter Batch Size Sweep +mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/twitter_query10_binary 1 0 --gtest_filter=*mpi_mixed_speed_test* +mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/twitter_query10_binary 10 0 --gtest_filter=*mpi_mixed_speed_test* +mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/twitter_query10_binary 50 0 --gtest_filter=*mpi_mixed_speed_test* +mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/twitter_query10_binary 100 0 --gtest_filter=*mpi_mixed_speed_test* +mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/twitter_query10_binary 1000 0 --gtest_filter=*mpi_mixed_speed_test* +# Twitter fixed-forest +mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/twitter_ff_query10_binary 1 0 --gtest_filter=*mpi_mixed_speed_test* +mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/twitter_ff_query10_binary 10 0 --gtest_filter=*mpi_mixed_speed_test* +mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/twitter_ff_query10_binary 50 0 --gtest_filter=*mpi_mixed_speed_test* +mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/twitter_ff_query10_binary 100 0 --gtest_filter=*mpi_mixed_speed_test* +mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/twitter_ff_query10_binary 1000 0 --gtest_filter=*mpi_mixed_speed_test* diff --git a/scripts/density_sweep_experiment.sh b/scripts/density_sweep_experiment.sh new file mode 100644 index 0000000..3eb5be1 --- /dev/null +++ b/scripts/density_sweep_experiment.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +declare base_dir="$(dirname $(dirname $(realpath $0)))" + +cd ${base_dir}/build +set -e +make -j +set +e + +mkdir -p ./../results + +# mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/erdos_0001_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test* +# mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/erdos_001_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test* +# mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/erdos_01_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test* +# mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/erdos_10_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test* +# mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/erdos_20_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test* +# mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/erdos_30_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test* +# mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/erdos_40_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test* +# mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/erdos_50_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test* +# mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/erdos_60_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test* +# mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/erdos_70_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test* +# mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/erdos_80_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test* +# mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/erdos_90_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test* +# mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/erdos_100_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test* diff --git a/scripts/export_results.sh b/scripts/export_results.sh new file mode 100755 index 0000000..213838e --- /dev/null +++ b/scripts/export_results.sh @@ -0,0 +1,81 @@ +#!/bin/bash + +#declare base_dir="$(dirname $(dirname $(realpath $0)))" + +#cd ${base_dir}/results/mpi_speed_results + + +write_out() { + filename=$1.txt + if [ -f $filename ]; then + awk -F ' ' 'BEGIN {ORS=","}NR==1{print $2}' $filename >> $2 + awk -F ' ' 'BEGIN {ORS=","}NR==2{print $2}' $filename >> $3 + + else + echo -n "0," >> $2 + echo -n "0," >> $3 + fi +} + +declare -a streams=( +[0]="kron_13_query10_binary" +[1]="kron_15_query10_binary" +[2]="kron_16_query10_binary" +[3]="kron_17_query10_binary" +[4]="kron_18_query10_binary" +# +[5]="dnc_query10_binary" +[6]="tech_query10_binary" +[7]="enron_query10_binary" +# +[8]="twitter_query10_binary" +[9]="stanford_query10_binary" +[10]="random2N_query10_binary" +[11]="randomNLOGN_query10_binary" +[12]="randomNSQRTN_query10_binary" +[13]="randomDIV_query10_binary" +# Fixed Forest +[14]="kron_13_ff_query10_binary" +[15]="kron_15_ff_query10_binary" +[16]="kron_16_ff_query10_binary" +[17]="kron_17_ff_query10_binary" +[18]="kron_18_ff_query10_binary" +# +[19]="dnc_ff_query10_binary" +[20]="tech_ff_query10_binary" +[21]="enron_ff_query10_binary" +# +[22]="twitter_ff_query10_binary" +[23]="stanford_ff_query10_binary" +[24]="random2N_ff_query10_binary" +[25]="randomNLOGN_ff_query10_binary" +[26]="randomNSQRTN_ff_query10_binary" +[27]="randomDIV_ff_query10_binary" +) + +updates="UPDATES.txt" +queries="QUERIES.txt" +rm $updates +rm $queries + +for i in $(seq 0 13); +do + write_out ${streams[$i]} $updates $queries +done + +echo "" >> $updates +echo "" >> $queries + +updates="UPDATES_FF.txt" +queries="QUERIES_FF.txt" +rm $updates +rm $queries + +for i in $(seq 14 27); +do + write_out ${streams[$i]} $updates $queries +done + +echo "" >> $updates +echo "" >> $queries + diff --git a/scripts/full_test_experiment.sh b/scripts/full_test_experiment.sh new file mode 100755 index 0000000..7370bfb --- /dev/null +++ b/scripts/full_test_experiment.sh @@ -0,0 +1,105 @@ +#!/bin/bash + +declare base_dir="$(dirname $(dirname $(realpath $0)))" + +cd ${base_dir}/build +set -e +#cmake -DSKETCH_BUFFER_SIZE=25 .. +#make -j +#set +e + +mkdir -p ./../results +mkdir -p ./../results/mpi_speed_results + +run_test() { + cat binary_streams/$1 > /dev/null + mpirun -np $2 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/$1 0 $3 --gtest_filter=*mpi_mixed_speed_test* +} + +declare -a streams=( +[0]="kron_13_query10_binary" +[1]="kron_15_query10_binary" +[2]="kron_16_query10_binary" +[3]="kron_17_query10_binary" +[4]="kron_18_query10_binary" +# +[5]="dnc_query10_binary" +[6]="tech_query10_binary" +[7]="enron_query10_binary" +# +[8]="twitter_query10_binary" +[9]="stanford_query10_binary" +[10]="random2N_query10_binary" +[11]="randomNLOGN_query10_binary" +[12]="randomNSQRTN_query10_binary" +[13]="randomDIV_query10_binary" +# Fixed Forest +[14]="kron_13_ff_query10_binary" +[15]="kron_15_ff_query10_binary" +[16]="kron_16_ff_query10_binary" +[17]="kron_17_ff_query10_binary" +[18]="kron_18_ff_query10_binary" +# +[19]="dnc_ff_query10_binary" +[20]="tech_ff_query10_binary" +[21]="enron_ff_query10_binary" +# +[22]="twitter_ff_query10_binary" +[23]="stanford_ff_query10_binary" +[24]="random2N_ff_query10_binary" +[25]="randomNLOGN_ff_query10_binary" +[26]="randomNSQRTN_ff_query10_binary" +[27]="randomDIV_ff_query10_binary" +) + +declare -a nps=( +[0]=23 +[1]=26 +[2]=28 +[3]=30 +[4]=31 +# +[5]=19 +[6]=26 +[7]=29 +# +[8]=28 +[9]=31 +[10]=32 +[11]=29 +[12]=25 +[13]=29 +# Fixed Forest +[14]=23 +[15]=26 +[16]=28 +[17]=30 +[18]=31 +# +[19]=19 +[20]=26 +[21]=29 +# +[22]=28 +[23]=31 +[24]=32 +[25]=29 +[26]=25 +[27]=29 +) + + +run_test ${streams[$1]} ${nps[$1]} $2 + +# Test run +# mpirun -np 23 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_13_query10_binary 0 0 --gtest_filter=*mpi_mixed_speed_test* + + +exit +# Tests including memory measurement +run_mem_test() { + mpirun -np $1 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/$2 0 0 --gtest_filter=*mpi_mixed_speed_test* & + ./../scripts/mem_record.sh mpi_dynamicCC_tests 2 ./../results/mpi_space_results/batch_size_sweep/$2_$3_mem.txt + wait +} + diff --git a/scripts/gibbs_experiments.sh b/scripts/gibbs_experiments.sh new file mode 100755 index 0000000..4f3c580 --- /dev/null +++ b/scripts/gibbs_experiments.sh @@ -0,0 +1,55 @@ +#!/bin/bash + +declare base_dir="$(dirname $(dirname $(realpath $0)))" + +cd ${base_dir}/build +#set -e +#cmake -DSKETCH_BUFFER_SIZE=25 .. +#make -j +#set +e + +mkdir -p ./../results +mkdir -p ./../results/gibbs_speed_results + +run_test() { + cat binary_streams/$1 > /dev/null + ./dynamicCC_tests binary_streams/$1 --gtest_filter=*gibbs_mixed_speed_test* +} + +declare -a streams=( +[0]="kron_13_query10_binary" +[1]="kron_15_query10_binary" +[2]="kron_16_query10_binary" +[3]="kron_17_query10_binary" +[4]="kron_18_query10_binary" +# +[5]="dnc_query10_binary" +[6]="tech_query10_binary" +[7]="enron_query10_binary" +# +[8]="twitter_query10_binary" +[9]="stanford_query10_binary" +[10]="random2N_query10_binary" +[11]="randomNLOGN_query10_binary" +[12]="randomNSQRTN_query10_binary" +[13]="randomDIV_query10_binary" +# Fixed Forest +[14]="kron_13_ff_query10_binary" +[15]="kron_15_ff_query10_binary" +[16]="kron_16_ff_query10_binary" +[17]="kron_17_ff_query10_binary" +[18]="kron_18_ff_query10_binary" +# +[19]="dnc_ff_query10_binary" +[20]="tech_ff_query10_binary" +[21]="enron_ff_query10_binary" +# +[22]="twitter_ff_query10_binary" +[23]="stanford_ff_query10_binary" +[24]="random2N_ff_query10_binary" +[25]="randomNLOGN_ff_query10_binary" +[26]="randomNSQRTN_ff_query10_binary" +[27]="randomDIV_ff_query10_binary" +) + +run_test ${streams[$1]} diff --git a/scripts/mem_record.sh b/scripts/mem_record.sh index 7213534..42530ad 100755 --- a/scripts/mem_record.sh +++ b/scripts/mem_record.sh @@ -19,7 +19,7 @@ while true; do do if [ -e /proc/$pid/smaps_rollup ] then - impact=$((`cat /proc/$pid/smaps_rollup | grep Rss | awk '{print $2}'`)) + impact=$((`cat /proc/$pid/smaps_rollup | grep '^Pss:' | awk '{print $2}'`)) sum=$((impact + sum)) #echo $pid, $impact, $sum fi diff --git a/scripts/mpi_correct_test.sh b/scripts/mpi_correct_test.sh index 464fbd6..20e6328 100755 --- a/scripts/mpi_correct_test.sh +++ b/scripts/mpi_correct_test.sh @@ -9,12 +9,12 @@ set -e make -j set +e -mpirun -np 23 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_13_stream_binary 0 0 --gtest_filter=*mpi_correctness_test* -mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_15_stream_binary 0 0 --gtest_filter=*mpi_correctness_test* -mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_16_stream_binary 0 0 --gtest_filter=*mpi_correctness_test* -mpirun -np 30 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_17_stream_binary 0 0 --gtest_filter=*mpi_correctness_test* -mpirun -np 31 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_18_stream_binary 0 0 --gtest_filter=*mpi_correctness_test* +# mpirun -np 23 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_13_stream_binary 0 0 --gtest_filter=*mpi_correctness_test* +# mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_15_stream_binary 0 0 --gtest_filter=*mpi_correctness_test* +# mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_16_stream_binary 0 0 --gtest_filter=*mpi_correctness_test* +# mpirun -np 30 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_17_stream_binary 0 0 --gtest_filter=*mpi_correctness_test* +# mpirun -np 31 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_18_stream_binary 0 0 --gtest_filter=*mpi_correctness_test* mpirun -np 19 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/dnc_stream_binary 0 0 --gtest_filter=*mpi_correctness_test* -mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/tech_stream_binary 0 0 --gtest_filter=*mpi_correctness_test* -mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/enron_stream_binary 0 0 --gtest_filter=*mpi_correctness_test* +# mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/tech_stream_binary 0 0 --gtest_filter=*mpi_correctness_test* +# mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/enron_stream_binary 0 0 --gtest_filter=*mpi_correctness_test* diff --git a/scripts/mpi_query_test.sh b/scripts/mpi_query_test.sh index 99eb8c5..e802296 100755 --- a/scripts/mpi_query_test.sh +++ b/scripts/mpi_query_test.sh @@ -10,27 +10,52 @@ set +e mkdir -p ./../results # DEFAULT BATCH SIZE, DEFAULT SKIPLIST HEIGHT FACTOR (1 / log log n) -mpirun -np 23 ./mpi_dynamicCC_tests binary_streams/kron_13_stream_binary 0 0 --gtest_filter=*mpi_query_speed_test* -mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_15_stream_binary 0 0 --gtest_filter=*mpi_query_speed_test* -mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_16_stream_binary 0 0 --gtest_filter=*mpi_query_speed_test* -mpirun -np 30 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_17_stream_binary 0 0 --gtest_filter=*mpi_query_speed_test* -mpirun -np 31 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_18_stream_binary 0 0 --gtest_filter=*mpi_query_speed_test* + +# mpirun -np 23 ./mpi_dynamicCC_tests binary_streams/kron_13_stream_binary 0 0 --gtest_filter=*mpi_query_speed_test* +# mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_15_stream_binary 0 0 --gtest_filter=*mpi_query_speed_test* +# mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_16_stream_binary 0 0 --gtest_filter=*mpi_query_speed_test* +# mpirun -np 30 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_17_stream_binary 0 0 --gtest_filter=*mpi_query_speed_test* +# mpirun -np 31 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_18_stream_binary 0 0 --gtest_filter=*mpi_query_speed_test* + mpirun -np 19 ./mpi_dynamicCC_tests binary_streams/dnc_stream_binary 0 0 --gtest_filter=*mpi_query_speed_test* mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/tech_stream_binary 0 0 --gtest_filter=*mpi_query_speed_test* mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/enron_stream_binary 0 0 --gtest_filter=*mpi_query_speed_test* mpirun -np 19 ./mpi_dynamicCC_tests binary_streams/dnc_streamified_binary 0 0 --gtest_filter=*mpi_query_speed_test* mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/tech_streamified_binary 0 0 --gtest_filter=*mpi_query_speed_test* mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/enron_streamified_binary 0 0 --gtest_filter=*mpi_query_speed_test* +mpirun -np 19 ./mpi_dynamicCC_tests binary_streams/dnc_ff_binary 0 0 --gtest_filter=*mpi_query_speed_test* +mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/tech_ff_binary 0 0 --gtest_filter=*mpi_query_speed_test* +mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/enron_ff_binary 0 0 --gtest_filter=*mpi_query_speed_test* + +mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/twitter_stream_binary 0 0 --gtest_filter=*mpi_query_speed_test* +mpirun -np 31 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/stanford_stream_binary 0 0 --gtest_filter=*mpi_query_speed_test* +mpirun -np 32 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/random2N_stream_binary 0 0 --gtest_filter=*mpi_query_speed_test* +mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/randomNLOGN_stream_binary 0 0 --gtest_filter=*mpi_query_speed_test* +mpirun -np 25 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/randomNSQRTN_stream_binary 0 0 --gtest_filter=*mpi_query_speed_test* +mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/randomDIV_stream_binary 0 0 --gtest_filter=*mpi_query_speed_test* + # DEFAULT BATCH SIZE, SKIPLIST HEIGHT FACTOR = 1 -mpirun -np 23 ./mpi_dynamicCC_tests binary_streams/kron_13_stream_binary 0 1 --gtest_filter=*mpi_query_speed_test* -mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_15_stream_binary 0 1 --gtest_filter=*mpi_query_speed_test* -mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_16_stream_binary 0 1 --gtest_filter=*mpi_query_speed_test* -mpirun -np 30 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_17_stream_binary 0 1 --gtest_filter=*mpi_query_speed_test* -mpirun -np 31 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_18_stream_binary 0 1 --gtest_filter=*mpi_query_speed_test* + +# mpirun -np 23 ./mpi_dynamicCC_tests binary_streams/kron_13_stream_binary 0 1 --gtest_filter=*mpi_query_speed_test* +# mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_15_stream_binary 0 1 --gtest_filter=*mpi_query_speed_test* +# mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_16_stream_binary 0 1 --gtest_filter=*mpi_query_speed_test* +# mpirun -np 30 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_17_stream_binary 0 1 --gtest_filter=*mpi_query_speed_test* +# mpirun -np 31 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_18_stream_binary 0 1 --gtest_filter=*mpi_query_speed_test* + mpirun -np 19 ./mpi_dynamicCC_tests binary_streams/dnc_stream_binary 0 1 --gtest_filter=*mpi_query_speed_test* mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/tech_stream_binary 0 1 --gtest_filter=*mpi_query_speed_test* mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/enron_stream_binary 0 1 --gtest_filter=*mpi_query_speed_test* mpirun -np 19 ./mpi_dynamicCC_tests binary_streams/dnc_streamified_binary 0 1 --gtest_filter=*mpi_query_speed_test* mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/tech_streamified_binary 0 1 --gtest_filter=*mpi_query_speed_test* mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/enron_streamified_binary 0 1 --gtest_filter=*mpi_query_speed_test* +mpirun -np 19 ./mpi_dynamicCC_tests binary_streams/dnc_ff_binary 0 1 --gtest_filter=*mpi_query_speed_test* +mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/tech_ff_binary 0 1 --gtest_filter=*mpi_query_speed_test* +mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/enron_ff_binary 0 1 --gtest_filter=*mpi_query_speed_test* + +mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/twitter_stream_binary 0 1 --gtest_filter=*mpi_query_speed_test* +mpirun -np 31 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/stanford_stream_binary 0 1 --gtest_filter=*mpi_query_speed_test* +mpirun -np 32 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/random2N_stream_binary 0 1 --gtest_filter=*mpi_query_speed_test* +mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/randomNLOGN_stream_binary 0 1 --gtest_filter=*mpi_query_speed_test* +mpirun -np 25 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/randomNSQRTN_stream_binary 0 1 --gtest_filter=*mpi_query_speed_test* +mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/randomDIV_stream_binary 0 1 --gtest_filter=*mpi_query_speed_test* diff --git a/scripts/mpi_space_test.sh b/scripts/mpi_space_test.sh index ab98c7e..71a4a0f 100755 --- a/scripts/mpi_space_test.sh +++ b/scripts/mpi_space_test.sh @@ -4,44 +4,96 @@ declare base_dir="$(dirname $(dirname $(realpath $0)))" cd ${base_dir}/build set -e -make -j -set +e +#cmake -DSKETCH_BUFFER_SIZE=25 .. +#make -j +#set +e mkdir -p ./../results +mkdir -p ./../results/mpi_speed_results mkdir -p ./../results/mpi_space_results -run_mem_test() { - mpirun -np $1 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/$2 0 0 --gtest_filter=*mpi_update_speed_test* & - ./../scripts/mem_record.sh mpi_dynamicCC_tests 2 ./../results/mpi_space_results/$2_mem.txt +# Tests including memory measurement +run_test() { + cat binary_streams/$1 > /dev/null + mpirun -np $2 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/$1 0 $3 --gtest_filter=*mpi_mixed_speed_test* & + ./../scripts/mem_record.sh mpi_dynamicCC_tests 2 ./../results/mpi_space_results/$1_$3_mem.txt wait } -run_mem_test_no_reduced_height() { - mpirun -np $1 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/$2 0 1 --gtest_filter=*mpi_update_speed_test* & - ./../scripts/mem_record.sh mpi_dynamicCC_tests 2 ./../results/mpi_space_results/$2_no_reduced_height_mem.txt - wait -} +declare -a streams=( +[0]="kron_13_query10_binary" +[1]="kron_15_query10_binary" +[2]="kron_16_query10_binary" +[3]="kron_17_query10_binary" +[4]="kron_18_query10_binary" +# +[5]="dnc_query10_binary" +[6]="tech_query10_binary" +[7]="enron_query10_binary" +# +[8]="twitter_query10_binary" +[9]="stanford_query10_binary" +[10]="random2N_query10_binary" +[11]="randomNLOGN_query10_binary" +[12]="randomNSQRTN_query10_binary" +[13]="randomDIV_query10_binary" +# Fixed Forest +[14]="kron_13_ff_query10_binary" +[15]="kron_15_ff_query10_binary" +[16]="kron_16_ff_query10_binary" +[17]="kron_17_ff_query10_binary" +[18]="kron_18_ff_query10_binary" +# +[19]="dnc_ff_query10_binary" +[20]="tech_ff_query10_binary" +[21]="enron_ff_query10_binary" +# +[22]="twitter_ff_query10_binary" +[23]="stanford_ff_query10_binary" +[24]="random2N_ff_query10_binary" +[25]="randomNLOGN_ff_query10_binary" +[26]="randomNSQRTN_ff_query10_binary" +[27]="randomDIV_ff_query10_binary" +) + +declare -a nps=( +[0]=23 +[1]=26 +[2]=28 +[3]=30 +[4]=31 +# +[5]=19 +[6]=26 +[7]=29 +# +[8]=28 +[9]=31 +[10]=32 +[11]=29 +[12]=25 +[13]=29 +# Fixed Forest +[14]=23 +[15]=26 +[16]=28 +[17]=30 +[18]=31 +# +[19]=19 +[20]=26 +[21]=29 +# +[22]=28 +[23]=31 +[24]=32 +[25]=29 +[26]=25 +[27]=29 +) + +for i in $(seq 0 27); +do + run_test ${streams[$i]} ${nps[$i]} 0 +done -run_mem_test "23" "kron_13_stream_binary" -run_mem_test "26" "kron_15_stream_binary" -run_mem_test "28" "kron_16_stream_binary" -run_mem_test "30" "kron_17_stream_binary" -run_mem_test "31" "kron_18_stream_binary" -run_mem_test "19" "dnc_stream_binary" -run_mem_test "26" "tech_stream_binary" -run_mem_test "29" "enron_stream_binary" -run_mem_test "19" "dnc_streamified_binary" -run_mem_test "26" "tech_streamified_binary" -run_mem_test "29" "enron_streamified_binary" - -run_mem_test_no_reduced_height "23" "kron_13_stream_binary" -run_mem_test_no_reduced_height "26" "kron_15_stream_binary" -run_mem_test_no_reduced_height "28" "kron_16_stream_binary" -run_mem_test_no_reduced_height "30" "kron_17_stream_binary" -run_mem_test_no_reduced_height "31" "kron_18_stream_binary" -run_mem_test_no_reduced_height "19" "dnc_stream_binary" -run_mem_test_no_reduced_height "26" "tech_stream_binary" -run_mem_test_no_reduced_height "29" "enron_stream_binary" -run_mem_test_no_reduced_height "19" "dnc_streamified_binary" -run_mem_test_no_reduced_height "26" "tech_streamified_binary" -run_mem_test_no_reduced_height "29" "enron_streamified_binary" diff --git a/scripts/mpi_update_test.sh b/scripts/mpi_update_test.sh index edbc2ac..ae70a59 100755 --- a/scripts/mpi_update_test.sh +++ b/scripts/mpi_update_test.sh @@ -9,31 +9,56 @@ set +e mkdir -p ./../results -# DEFAULT BATCH SIZE (100), DEFAULT SKIPLIST HEIGHT FACTOR (1 / log log n) -mpirun -np 23 ./mpi_dynamicCC_tests binary_streams/kron_13_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test* -mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_15_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test* -mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_16_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test* -mpirun -np 30 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_17_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test* -mpirun -np 31 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_18_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test* +# DEFAULT BATCH SIZE (100), DEFAULT SKIPLIST HEIGHT FACTOR (1 / log log n) ============================================================== + +# mpirun -np 23 ./mpi_dynamicCC_tests binary_streams/kron_13_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test* +# mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_15_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test* +# mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_16_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test* +# mpirun -np 30 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_17_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test* +# mpirun -np 31 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_18_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test* + mpirun -np 19 ./mpi_dynamicCC_tests binary_streams/dnc_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test* mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/tech_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test* mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/enron_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test* mpirun -np 19 ./mpi_dynamicCC_tests binary_streams/dnc_streamified_binary 0 0 --gtest_filter=*mpi_update_speed_test* mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/tech_streamified_binary 0 0 --gtest_filter=*mpi_update_speed_test* mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/enron_streamified_binary 0 0 --gtest_filter=*mpi_update_speed_test* +mpirun -np 19 ./mpi_dynamicCC_tests binary_streams/dnc_ff_binary 0 0 --gtest_filter=*mpi_update_speed_test* +mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/tech_ff_binary 0 0 --gtest_filter=*mpi_update_speed_test* +mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/enron_ff_binary 0 0 --gtest_filter=*mpi_update_speed_test* + +mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/twitter_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test* +mpirun -np 31 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/stanford_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test* +mpirun -np 32 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/random2N_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test* +mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/randomNLOGN_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test* +mpirun -np 25 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/randomNSQRTN_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test* +mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/randomDIV_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test* + + +# BATCH SIZE = 1, DEFAULT SKIPLIST HEIGHT FACTOR (1 / log log n) ======================================================================= + +# mpirun -np 23 ./mpi_dynamicCC_tests binary_streams/kron_13_stream_binary 1 0 --gtest_filter=*mpi_update_speed_test* +# mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_15_stream_binary 1 0 --gtest_filter=*mpi_update_speed_test* +# mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_16_stream_binary 1 0 --gtest_filter=*mpi_update_speed_test* +# mpirun -np 30 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_17_stream_binary 1 0 --gtest_filter=*mpi_update_speed_test* +# mpirun -np 31 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_18_stream_binary 1 0 --gtest_filter=*mpi_update_speed_test* -# BATCH SIZE = 1, DEFAULT SKIPLIST HEIGHT FACTOR (1 / log log n) -mpirun -np 23 ./mpi_dynamicCC_tests binary_streams/kron_13_stream_binary 1 0 --gtest_filter=*mpi_update_speed_test* -mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_15_stream_binary 1 0 --gtest_filter=*mpi_update_speed_test* -mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_16_stream_binary 1 0 --gtest_filter=*mpi_update_speed_test* -mpirun -np 30 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_17_stream_binary 1 0 --gtest_filter=*mpi_update_speed_test* -mpirun -np 31 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_18_stream_binary 1 0 --gtest_filter=*mpi_update_speed_test* mpirun -np 19 ./mpi_dynamicCC_tests binary_streams/dnc_stream_binary 1 0 --gtest_filter=*mpi_update_speed_test* mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/tech_stream_binary 1 0 --gtest_filter=*mpi_update_speed_test* mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/enron_stream_binary 1 0 --gtest_filter=*mpi_update_speed_test* mpirun -np 19 ./mpi_dynamicCC_tests binary_streams/dnc_streamified_binary 1 0 --gtest_filter=*mpi_update_speed_test* mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/tech_streamified_binary 1 0 --gtest_filter=*mpi_update_speed_test* mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/enron_streamified_binary 1 0 --gtest_filter=*mpi_update_speed_test* +mpirun -np 19 ./mpi_dynamicCC_tests binary_streams/dnc_ff_binary 1 0 --gtest_filter=*mpi_update_speed_test* +mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/tech_ff_binary 1 0 --gtest_filter=*mpi_update_speed_test* +mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/enron_ff_binary 1 0 --gtest_filter=*mpi_update_speed_test* + +mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/twitter_stream_binary 1 0 --gtest_filter=*mpi_update_speed_test* +mpirun -np 31 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/stanford_stream_binary 1 0 --gtest_filter=*mpi_update_speed_test* +mpirun -np 32 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/random2N_stream_binary 1 0 --gtest_filter=*mpi_update_speed_test* +mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/randomNLOGN_stream_binary 1 0 --gtest_filter=*mpi_update_speed_test* +mpirun -np 25 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/randomNSQRTN_stream_binary 1 0 --gtest_filter=*mpi_update_speed_test* +mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/randomDIV_stream_binary 1 0 --gtest_filter=*mpi_update_speed_test* diff --git a/scripts/sketch_buffer_experiment.sh b/scripts/sketch_buffer_experiment.sh new file mode 100755 index 0000000..86a98e9 --- /dev/null +++ b/scripts/sketch_buffer_experiment.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +declare base_dir="$(dirname $(dirname $(realpath $0)))" +echo "Testing with buffer size $1" + +cd ${base_dir}/build +#set -e +#cmake -DSKETCH_BUFFER_SIZE=$1 .. +#make -j +#set +e + +mkdir -p ./../results +mkdir -p ./../results/mpi_speed_results + +run_test() { + cat binary_streams/$1 > /dev/null + mpirun -np $2 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/$1 0 0 --gtest_filter=*mpi_mixed_speed_test* +} + +run_test kron_16_query10_binary 28 +run_test kron_16_ff_query10_binary 28 +run_test twitter_query10_binary 28 +run_test twitter_ff_query10_binary 28 + diff --git a/scripts/sketch_buffer_space.sh b/scripts/sketch_buffer_space.sh new file mode 100755 index 0000000..55e4978 --- /dev/null +++ b/scripts/sketch_buffer_space.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +declare base_dir="$(dirname $(dirname $(realpath $0)))" +echo "Testing with buffer size $1" + +cd ${base_dir}/build +set -e +cmake -DSKETCH_BUFFER_SIZE=$1 .. +make -j +set +e + +mkdir -p ./../results +mkdir -p ./../results/mpi_space_results + +run_mem_test() { + mpirun -np $1 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/$2 0 0 --gtest_filter=*mpi_mixed_speed_test* & + ./../scripts/mem_record.sh mpi_dynamicCC_tests 2 ./../results/mpi_space_results/batch_size_sweep/$2_$3_mem.txt + wait +} + +# Test run +# run_mem_test "23" "kron_13_query10_binary" $1 + +# KRON-16 Batch Size Sweep +run_mem_test "28" "kron_16_query10_binary" $1 +# KRON-16 fixed-forest +run_mem_test "28" "kron_16_ff_query10_binary" $1 + +# Twitter Batch Size Sweep +run_mem_test "28" "twitter_query10_binary" $1 +# Twitter fixed-forest +run_mem_test "28" "twitter_ff_query10_binary" $1 diff --git a/src/batch_tiers.cpp b/src/batch_tiers.cpp new file mode 100644 index 0000000..ea2dc7b --- /dev/null +++ b/src/batch_tiers.cpp @@ -0,0 +1,761 @@ +#include "../include/batch_tiers.h" +#include "util.h" +#include +#include +#include + +// // #define CANARY(X) do {if (update.edge.src == 1784 && update.edge.dst == 4420) { std::cout << __FILE__ << ":" << __LINE__ << " says " << X << std::endl;}} while (false) +// #define CANARY(X) ; +// // #define ENDPOINT_CANARY(X, src, dst) do {if ((src == 7781 || dst == 7781)) {std::cout << __FILE__ << ":" << __LINE__ << " says " << X << " " << src << " " << dst << std::endl;}} while (false) +// #define ENDPOINT_CANARY(X, src, dst) ; + +// long lct_time = 0; +// long ett_time = 0; +// long ett_find_root = 0; +// long ett_get_agg = 0; +// long sketch_query = 0; +// long sketch_time = 0; +// long refresh_time = 0; +// long parallel_isolated_check = 0; +// long tiers_grown = 0; +// long normal_refreshes = 0; + + +// template requires(SketchColumnConcept) +// bool Batch::is_connected(node_id_t a, node_id_t b) { +// return this->link_cut_tree.find_root(a) == this->link_cut_tree.find_root(b); +// } + +// template requires(SketchColumnConcept) +// thread_local parlay::sequence BatchTiers::_deltas_buffer = parlay::sequence(); + +template requires(SketchColumnConcept) +BatchTiers::BatchTiers(node_id_t num_nodes, uint64_t seed) : num_nodes(num_nodes), seed(seed), link_cut_tree(num_nodes), query_ett(num_nodes, 0, seed) , _already_checked_components(2048, true), _unique_update_ids(2048), _component_reps_dsu(0) { + // TODO - use the batch_size parameter? + _component_reps_dsu = union_find_local(maximum_batch_size * 2); + // Algorithm parameters + uint32_t num_tiers = log2(num_nodes)/(log2(3)-1); + // Initialize all the ETTs + std::random_device dev; + std::mt19937 rng(dev()); + std::uniform_int_distribution dist(0,MAX_INT); + // int seed = dist(rng); + std::cout << "SEED: " << seed << std::endl; + rng.seed(seed); + dist(rng); // To give 1:1 correspondence with MPI seeds + for (uint32_t i = 0; i < num_tiers; i++) { + int tier_seed = dist(rng); + ett.emplace_back(num_nodes, i, tier_seed); + } + + // Initialize the root nodes matrix + _root_nodes.resize(num_tiers); + for (auto& tier_roots : _root_nodes) { + tier_roots.resize(maximum_batch_size * 2); + } + // and _updated_components + _updated_components.resize(num_tiers); + // { + // auto tmp = parlay::parlay_unordered_map_direct(2 * maximum_batch_size, true); + // std::swap(this->_already_checked_components, tmp); + // } + // { + // auto tmp2 = parlay::parlay_unordered_map_direct(2 * maximum_batch_size, true); + // std::swap(this->_unique_update_ids, tmp2); + // } +} + +template + requires(SketchColumnConcept) +BatchTiers::BatchTiers( + node_id_t num_nodes, uint32_t num_tiers, int batch_size, size_t seed) : num_nodes(num_nodes), seed(seed), link_cut_tree(num_nodes), query_ett(num_nodes, 0, seed), _already_checked_components(num_nodes, true), _unique_update_ids(2048), _component_reps_dsu(0) { + // TODO - use the batch_size parameter? + _component_reps_dsu = union_find_local(maximum_batch_size * 2); + + // Initialize all the ETTs + std::random_device dev; + std::mt19937 rng(dev()); + std::uniform_int_distribution dist(0,MAX_INT); + // int seed = dist(rng); + std::cout << "SEED: " << seed << std::endl; + rng.seed(seed); + dist(rng); // To give 1:1 correspondence with MPI seeds + for (uint32_t i = 0; i < num_tiers; i++) { + int tier_seed = dist(rng); + ett.emplace_back(num_nodes, i, tier_seed); + } + + // Initialize the root nodes matrix + _root_nodes.resize(num_tiers); + for (auto& tier_roots : _root_nodes) { + tier_roots.resize(maximum_batch_size * 2); + } + // and _updated_components + _updated_components.resize(num_tiers); +} + +template requires(SketchColumnConcept) +BatchTiers::~BatchTiers() {} + + +// TODO - check correctness on doing links/cuts out of order. lowkey it should be fine +// from a correctness pov +template requires(SketchColumnConcept) +void BatchTiers::update_batch(const parlay::sequence &updates) { + + size_t num_updates = updates.size(); + size_t num_tiers = ett.size(); + assert(num_updates <= maximum_batch_size); + _already_checked_components.clear(); + // std::cout << "Processing batch of size " << num_updates << " on " << num_tiers << " tiers." << std::endl; + + // treat all update endpoints as coming from independent components + _component_reps_dsu.reset(); + + // 0) Step 0: Process any necessary tree cut operations on every tier. + // we WONT immediately do the sketch updates in this case, and will rely on the next parallel branch for that + tbb::parallel_for( + tbb::blocked_range(0, ett.size()), + [&](const tbb::blocked_range& r) { + for (size_t i = r.begin(); i != r.end(); ++i) { + for (const auto& update : updates) { + if (update.type == DELETE && ett[i].has_edge(update.edge.src, update.edge.dst)) { + ett[i].cut(update.edge.src, update.edge.dst); + } + } + } + }, + tbb::static_partitioner{} + ); + // note: can just put this in the above region or use pardo + // and process on the LCT: + for (const auto& update : updates) { + if (update.type == DELETE && is_tree_edge(update.edge.src, update.edge.dst)) { + link_cut_tree.cut(update.edge.src, update.edge.dst); + query_ett.cut(update.edge.src, update.edge.dst); + transaction_log.push_back(update); + } + } + // 1) Step 1: Process all sketch aggs in true batch parallel. + // _process_sketch_aggs_only(updates); + // _process_sketch_aggs_tier_sequential(updates); + _process_sketch_aggs_with_cas(updates); + + // 2) Step 2: Check for isolated components. + uint32_t first_isolated_tier = _search_for_isolated_components(updates); + // std::cout << "First isolated tier: " << first_isolated_tier << std::endl; + if (first_isolated_tier == UINT32_MAX) { + // no isolated components found, so we can return early + return; + } + // the first isolated tier has had no link/cut modifications to it. so its roots array is a valid + // check + + _unique_update_ids.clear(); + _unique_update_ids.resize(num_updates * 2); + std::atomic num_unique_components = 0; + // construct _unique_update_ids such that it contains just ONE idx for every unique + // component at the first isolated tier + parlay::parlay_unordered_map_direct*, int32_t> component_to_unique_id(2048, true); + tbb::parallel_for( + tbb::blocked_range(0, num_updates), + [&](const tbb::blocked_range& r) { + for (size_t update_idx = r.begin(); update_idx != r.end(); ++update_idx) { + for (bool src_or_dst : {true, false}) { + node_id_t vertex = src_or_dst ? updates[update_idx].edge.src : updates[update_idx].edge.dst; + SkipListNode* root = root_node(first_isolated_tier, update_idx, src_or_dst); + // assign a unique id to this component if it doesnt have one already + // + std::optional existing_id = component_to_unique_id.Insert(root, vertex); + if (!existing_id.has_value()) { + size_t idx = num_unique_components.fetch_add(1); + _unique_update_ids[idx] = vertex; + } + } + } + }); + _unique_update_ids.resize(num_unique_components.load()); + + // 3) proceed tier-serially: + // * at the first isolated tier, collect all components that are isolated. + // * each isolated component will give a new edge (a,b) + // * if a path exists already between a and b in the final tier/LCT, then cut the maximum weight + // edge on the path, starting from the tier where it first appears (call it tier M) and going until the final one. + // + // (NOTE THAT tier M has to have a higher index than the first isolated tier. Because we know that + // the first isolated tier has a forest such that the endpoints of (a) and (b) were not connected. + // if there were a lower index tier, that wouldve violated the subset invariant.) + // + // * if apath does not exist, then we link the two endpoints in all tiers ABOVE the first isolated tier. + // Note that this can cause NEW isolated components to appear in tiers above. + // + // + // * once we do this for every isolated component at the first isolated tier, check the next tier + // to see if it has any isolated components. If it does, repeat (3) at the next tier. + // + // + // SHORT CUTS: we can also tell if a component is maximized by checking for an empty sketch. This + // means we can avoid doing further isolation checks. + // for (uint32_t) + // TODO - is_empty check optimization + // return; + for (uint32_t tier = first_isolated_tier; tier < ett.size()-1; tier++) { + _updated_components[tier].clear(); + } + for (uint32_t tier = first_isolated_tier; tier < ett.size()-1; tier++) { + bool components_maximized = _fix_isolations_at_tier(updates, tier); + if (components_maximized) { + // if all components were maximized, we can skip the next tier + // we know that at this point, there are no isolations at higher tiers. + // because all potential isolated components must be a union of the modified components + // found at this tier. so we can just return + // std::cout << "All components maximized at tier " << tier << ", skipping further checks" << std::endl; + return; + } + } +}; + +template requires(SketchColumnConcept) +std::vector> BatchTiers::get_cc() { + this->flush_buffer(); + std::vector> cc; + std::set*> visited; + int top = ett.size()-1; + for (uint32_t i = 0; i < ett[top].ett_nodes.size(); i++) { + // TODO - this is simply incorrect with a hash map impl of ett_nodes + if (visited.find(&ett[top].ett_node(i)) == visited.end()) { + std::set*> pointer_component = ett[top].ett_node(i).get_component(); + std::set component; + for (auto pointer : pointer_component) { + component.insert(pointer->vertex); + visited.insert(pointer); + } + cc.push_back(component); + } + } + return cc; +} + +template requires(SketchColumnConcept) +bool BatchTiers::is_connected(node_id_t a, node_id_t b) { + this->flush_buffer(); + // TODO - use a sketchless ETT + // return this->link_cut_tree.find_root(a) == this->link_cut_tree.find_root(b); + return query_ett.is_connected(a, b); +} + +template requires(SketchColumnConcept) +void BatchTiers::_process_sketch_aggs_only(const parlay::sequence &updates) { + size_t num_updates = updates.size(); + size_t num_tiers = ett.size(); + assert(num_updates <= maximum_batch_size); + // 1) STEP 1: Speculative non-tree edge update processing + // (plus cleaning up and doing the updates for the tree edge deletions) + // in parallel, accross every tier and update, + // update the ETT aggregates + // then, reduce to find the maximum + // TODO - make sure tree edge deletions arent being processed twice. + // parlay::parallel_for(0, num_tiers*num_updates, [&](size_t i) { + // size_t tier = i / num_updates; + // size_t update_idx = i % num_updates; + // GraphUpdate update = updates[update_idx]; + // vec_t edge_id = concat_pairing_fn(update.edge.src, update.edge.dst); + // SkipListNode<> *src_parent = ett[tier].update_sketch_atomic(update.edge.src, edge_id); + // SkipListNode<> *dst_parent = ett[tier].update_sketch_atomic(update.edge.dst, edge_id); + + // root_node(tier, update_idx, true) = src_parent; + // root_node(tier, update_idx, false) = dst_parent; + // }, granularity); + + // step 1 memory optimization: + // enforce greater locality by first doing edges in + // lower, higher sorted order (only do the srcs) + // then in higher, lower (invert, then do dsts) + auto src_sorted_update_idxs = parlay::tabulate(num_updates, [&](size_t i) { + return i; + }); + parlay::sort_inplace(src_sorted_update_idxs, [&](size_t i, size_t j) { + return updates[i].edge.src < updates[j].edge.src; + }); + auto dst_sorted_update_idxs = parlay::tabulate(num_updates, [&](size_t i) { + return i; + }); + parlay::sort_inplace(dst_sorted_update_idxs, [&](size_t i, size_t j) { + return updates[i].edge.dst < updates[j].edge.dst; + }); + + // bool conservative=true; + // do src updates: + // parlay::blocked_for(0, num_updates * num_tiers, granularity, [&](size_t block_idx, size_t start, size_t end) { + // for (size_t i = start; i < end; i++) { + tbb::parallel_for( + tbb::blocked_range(0, num_updates * num_tiers, granularity), + [&](const tbb::blocked_range &r) { + for (size_t i = r.begin(); i != r.end(); ++i) { + size_t tier = i / num_updates; + size_t update_idx = src_sorted_update_idxs[i % num_updates]; + GraphUpdate update = updates[update_idx]; + vec_t edge_id = concat_pairing_fn(update.edge.src, update.edge.dst); + ColumnEntryDelta delta = ett[tier].generate_entry_delta(update.edge.src, edge_id); + SkipListNode *src_parent = ett[tier].update_sketch_atomic(update.edge.src, delta); + root_node(tier, update_idx, true) = src_parent; + } + }); + // }, tbb::static_partitioner{}); + // }, conservative); + // now dst updates: + // parlay::blocked_for(0, num_updates * num_tiers, granularity, [&](size_t block_idx, size_t start, size_t end) { + // for (size_t i = start; i < end; i++) { + tbb::parallel_for( + tbb::blocked_range(0, num_updates * num_tiers, granularity), + [&](const tbb::blocked_range &r) { + for (size_t i = r.begin(); i != r.end(); ++i) { + size_t tier = i / num_updates; + size_t update_idx = dst_sorted_update_idxs[i % num_updates]; + GraphUpdate update = updates[update_idx]; + vec_t edge_id = concat_pairing_fn(update.edge.src, update.edge.dst); + ColumnEntryDelta delta = ett[tier].generate_entry_delta(update.edge.dst, edge_id); + SkipListNode *dst_parent = ett[tier].update_sketch_atomic(update.edge.dst, delta); + root_node(tier, update_idx, false) = dst_parent; + // }, conservative);} + } + }); + // tbb::static_partitioner{}); + // }, conservative); +} + +template requires(SketchColumnConcept) +void BatchTiers::_process_sketch_aggs_with_cas(const parlay::sequence &updates) { + size_t num_updates = updates.size(); + size_t num_tiers = ett.size(); + assert(num_updates <= maximum_batch_size); + auto src_sorted_update_idxs = parlay::tabulate(num_updates, [&](size_t i) { + return i; + }); + parlay::sort_inplace(src_sorted_update_idxs, [&](size_t i, size_t j) { + return updates[i].edge.src < updates[j].edge.src; + }); + auto dst_sorted_update_idxs = parlay::tabulate(num_updates, [&](size_t i) { + return i; + }); + parlay::sort_inplace(dst_sorted_update_idxs, [&](size_t i, size_t j) { + return updates[i].edge.dst < updates[j].edge.dst; + }); + parlay::sequence*> temp_roots; + // in src order: + tbb::parallel_for( + tbb::blocked_range(0, num_updates * num_tiers, granularity), + [&](const tbb::blocked_range& r) { + for (size_t i = r.begin(); i != r.end(); ++i) { + size_t tier = i / num_updates; + size_t update_idx = src_sorted_update_idxs[i % num_updates]; + GraphUpdate update = updates[update_idx]; + const ColumnEntryDelta delta = ett[tier].generate_entry_delta(update.edge.src, concat_pairing_fn(update.edge.src, update.edge.dst)); + SkipListNode* src_parent = ett[tier].ett_node( + update.edge.src) + .update_sketch_atomic_to_level(delta, 1); // 3 levels up + SkipListNode* root = src_parent->find_root_with_cas(); + root_node(tier, update_idx, true) = root; + } + }); + // in dst order: + tbb::parallel_for( + tbb::blocked_range(0, num_updates * num_tiers, granularity), + [&](const tbb::blocked_range& r) { + for (size_t i = r.begin(); i != r.end(); ++i) { + size_t tier = i / num_updates; + size_t update_idx = dst_sorted_update_idxs[i % num_updates]; + GraphUpdate update = updates[update_idx]; + + const ColumnEntryDelta delta = ett[tier].generate_entry_delta(update.edge.dst, concat_pairing_fn(update.edge.src, update.edge.dst)); + SkipListNode* dst_parent = ett[tier].ett_node( + update.edge.dst) + .update_sketch_atomic_to_level(delta, 1); // 3 levels up + SkipListNode* root = dst_parent->find_root_with_cas(); + root_node(tier, update_idx, false) = root; + } + }); + // TODO - this is gonna be unperformant, but I'd say worth it for simplicity in testing + // update root_node matrix + // in src order: + tbb::parallel_for( + tbb::blocked_range(0, num_updates * num_tiers, granularity), + [&](const tbb::blocked_range& r) { + for (size_t i = r.begin(); i != r.end(); ++i) { + size_t tier = i / num_updates; + size_t update_idx = src_sorted_update_idxs[i % num_updates]; + GraphUpdate update = updates[update_idx]; + if (root_node(tier, update_idx, true) != nullptr) { + root_node(tier, update_idx, true)->recompute_aggs_topdown(2); + } + else { + SkipListNode* root = ett[tier].get_root(update.edge.src); + root_node(tier, update_idx, true) = root; + } + } + }); + // in dst order: + tbb::parallel_for( + tbb::blocked_range(0, num_updates * num_tiers, granularity), + [&](const tbb::blocked_range& r) { + for (size_t i = r.begin(); i != r.end(); ++i) { + size_t tier = i / num_updates; + size_t update_idx = dst_sorted_update_idxs[i % num_updates]; + GraphUpdate update = updates[update_idx]; + SkipListNode* root = ett[tier].get_root(update.edge.dst); + if (root_node(tier, update_idx, false) != nullptr) { + root_node(tier, update_idx, false)->recompute_aggs_topdown(2); + } + else { + SkipListNode* root = ett[tier].get_root(update.edge.dst); + root_node(tier, update_idx, false) = root; + } + } + }); +} + +template requires(SketchColumnConcept) +void BatchTiers::_process_sketch_aggs_tier_sequential(const parlay::sequence &updates) { + size_t num_updates = updates.size(); + size_t num_tiers = ett.size(); + assert(num_updates <= maximum_batch_size); + auto src_sorted_update_idxs = parlay::tabulate(num_updates, [&](size_t i) { + return i; + }); + // parlay::sort_inplace(src_sorted_update_idxs, [&](size_t i, size_t j) { + // return updates[i].edge.src < updates[j].edge.src; + // }); + auto dst_sorted_update_idxs = parlay::tabulate(num_updates, [&](size_t i) { + return i; + }); + // parlay::sort_inplace(dst_sorted_update_idxs, [&](size_t i, size_t j) { + // return updates[i].edge.dst < updates[j].edge.dst; + // }); + + // bool conservative=false; + // bool conservative=true; + tbb::parallel_for( + tbb::blocked_range(0, num_tiers, 1), + [&](const tbb::blocked_range &r) { + for (size_t tier = r.begin(); tier != r.end(); ++tier) { + for (size_t i = 0; i < num_updates; i++) { + size_t update_idx = src_sorted_update_idxs[i]; + // size_t update_idx = i; + GraphUpdate update = updates[update_idx]; + vec_t edge_id = concat_pairing_fn(update.edge.src, update.edge.dst); + // SkipListNode *src_parent = ett[tier].update_sketch(update.edge.src, edge_id); + const ColumnEntryDelta delta = ett[tier].generate_entry_delta(update.edge.src, edge_id); + SkipListNode *src_parent = ett[tier].update_sketch(update.edge.src, delta); + // SkipListNode *src_parent = ett[tier].update_sketch_atomic(update.edge.src, delta); + + root_node(tier, update_idx, true) = src_parent; + } + for (size_t i = 0; i < num_updates; i++) { + root_node(tier, i, true)->process_updates(); + } + for (size_t i = 0; i < num_updates; i++) { + size_t update_idx = dst_sorted_update_idxs[i]; + // size_t update_idx = i; + GraphUpdate update = updates[update_idx]; + vec_t edge_id = concat_pairing_fn(update.edge.src, update.edge.dst); + // SkipListNode *dst_parent = ett[tier].update_sketch(update.edge.dst, edge_id); + const ColumnEntryDelta delta = ett[tier].generate_entry_delta(update.edge.dst, edge_id); + SkipListNode *dst_parent = ett[tier].update_sketch(update.edge.dst, delta); + root_node(tier, update_idx, false) = dst_parent; + } + for (size_t i = 0; i < num_updates; i++) { + root_node(tier, i, false)->process_updates(); + } + } + }, + tbb::static_partitioner{} + ); + // 0, conservative); + // tbb::parallel_for( + // tbb::blocked_range(0, num_tiers, 1), + // [&](const tbb::blocked_range &r) { + // for (size_t tier = r.begin(); tier != r.end(); ++tier) { + // // for (size_t tier = 0; tier < num_tiers; tier++) { + // // source loop: + // parlay::sequence _deltas_buffer; + // size_t i = 0; + // while (i < num_updates) { + // _deltas_buffer.clear(); + // size_t j = i; + // while (j < num_updates && updates[src_sorted_update_idxs[j]].edge.src == updates[src_sorted_update_idxs[i]].edge.src) { + // GraphUpdate update = updates[src_sorted_update_idxs[j]]; + // vec_t edge_id = concat_pairing_fn( + // update.edge.src, + // update.edge.dst); + // auto delta = ett[tier].generate_entry_delta( + // update.edge.src, + // edge_id); + // _deltas_buffer.push_back(delta); + + // j++; + // } + // SkipListNode *src_parent = this->ett[tier].update_sketch( + // updates[src_sorted_update_idxs[i]].edge.src, + // _deltas_buffer.head(_deltas_buffer.size())); + // for (size_t k = i; k < j; k++) { + // size_t update_idx = src_sorted_update_idxs[k]; + // root_node(tier, update_idx, true) = src_parent; + // } + // i = j; + // } + // // dest loop: + // i = 0; + // while (i < num_updates) { + // _deltas_buffer.clear(); + // size_t j = i; + // while (j < num_updates && updates[dst_sorted_update_idxs[j]].edge.dst == updates[dst_sorted_update_idxs[i]].edge.dst) { + // GraphUpdate update = updates[dst_sorted_update_idxs[j]]; + // vec_t edge_id = concat_pairing_fn( + // update.edge.src, + // update.edge.dst); + // auto delta = ett[tier].generate_entry_delta( + // update.edge.dst, + // edge_id); + // _deltas_buffer.push_back(delta); + // j++; + // } + // SkipListNode *dst_parent = this->ett[tier].update_sketch( + // updates[dst_sorted_update_idxs[i]].edge.dst, + // _deltas_buffer.head(_deltas_buffer.size())); + // for (size_t k = i; k < j; k++) { + // size_t update_idx = dst_sorted_update_idxs[k]; + // root_node(tier, update_idx, false) = dst_parent; + // } + // i = j; + // } + // parlay::parallel_for(0, num_updates, [&](size_t k) { + // root_node(tier, k, true)->process_updates(); + // root_node(tier, k, false)->process_updates(); + // }); + // // for (size_t k = 0; k < num_updates; k++) { + // // root_node(tier, k, true)->process_updates(); + // // root_node(tier, k, false)->process_updates(); + // // } + // } + // }, + // tbb::static_partitioner{} + // ); +} + +template requires(SketchColumnConcept) +uint32_t BatchTiers::_search_for_isolated_components(const parlay::sequence &updates) { + size_t num_updates = updates.size(); + size_t num_tiers = ett.size(); + assert(num_updates <= maximum_batch_size); + // we can use parlay::find, as long as we are using "tier-major" order + auto isolation_tabulate = parlay::delayed_tabulate( + (num_tiers - 1) * num_updates, + [&](size_t i) { + size_t tier = i / num_updates; + size_t update_idx = i % num_updates; + for (bool src_or_dst : {true, false}) { + SkipListNode *root = root_node(tier, update_idx, src_or_dst); + SkipListNode *next_root = root_node(tier + 1, update_idx, src_or_dst); + uint32_t tier_size = root->size; + uint32_t next_size = next_root->size; + if (tier_size == next_size) { + // This means that the component is isolated + if (root->sketch_agg.sample().result == GOOD) { + // this means that the component is isolated + // std::cout << "isolation found at tier " << tier << " for update idx " << update_idx << std::endl; + return true; + } + } + } + return false; + }); + auto first_isolated_iter = parlay::find(isolation_tabulate, true); + if (first_isolated_iter == isolation_tabulate.end()) { + // no isolated components! + return UINT32_MAX; + } + uint32_t first_isolated_idx = first_isolated_iter - isolation_tabulate.begin(); + // note - i dont think we care about the isolation idx + uint32_t first_isolated_tier = first_isolated_idx / num_updates; + return first_isolated_tier; +} + +template requires(SketchColumnConcept) +bool BatchTiers::_fix_isolations_at_tier(const parlay::sequence &updates, uint32_t tier) { + size_t num_updates = updates.size(); + // size_t num_tiers = ett.size(); + + // needs to be atomically updated. + bool components_maximized = true; + for (size_t i= 0 ; i < _unique_update_ids.size(); i++) { + node_id_t vertex = _unique_update_ids[i]; + _updated_components[tier].push_back(vertex); + } + // for each update, we only need to grab ROOTS + // for (size_t i=0; i < num_updates * 2; i++) { + // for (size_t i = 0; i < num_updates * 2; i++) { + // // only if you are STILL a root. + // // AND your sketch is non-empty + // likely_if (!_component_reps_dsu.is_root(i)) { + // // return; + // continue; + // } + // bool src_or_dst = static_cast(i % 2); + // size_t update_idx = i / 2; + // _updated_components[tier].push_back( + // src_or_dst ? updates[update_idx].edge.src : updates[update_idx].edge.dst); + // }; + // now, _updated_components contains all components that need to be + // including ones that may have been inherited from doing links/cuts below. + // for (size_t i = 0; i < _updated_components[tier].size(); i++) { + parlay::sequence*> temp_roots; + std::atomic num_temp_roots = 0; + temp_roots.resize(_updated_components[tier].size()); + + tbb::parallel_for( + tbb::blocked_range(0, _updated_components[tier].size()), + [&](const tbb::blocked_range& r) { + for (size_t i = r.begin(); i != r.end(); ++i) { + node_id_t vertex_in_component = _updated_components[tier][i]; + // TODO - we can do some work to avoid checking the same component (maybe?) + // in case a component was previously merged already + // SkipListNode* component_root = ett[tier].get_root(vertex_in_component); + SkipListNode* component_root = ett[tier].ett_node(vertex_in_component).get_allowed_caller()->find_root_with_cas(); + if (component_root == nullptr) { + continue; + } + size_t idx = num_temp_roots.fetch_add(1); + temp_roots[idx] = component_root; + // component_root->clear_cas_flags(); + SkipListNode* next_tier_root = ett[tier + 1].get_root(vertex_in_component); + + // TODO - this is no longer necessary. because we are using the DSU to keep the smallest + // possible set of _updated_components settings + // actually, we'll keep it for now anyway. + // this is because the current DSU filter is just being used as a simple filter. + // since we arent doing any changes to it past the first isolated tier. + if (_already_checked_components.find((size_t)(component_root)) != _already_checked_components.end()) { + // std::cout << "yerr" << std::endl; + // return; + continue; + } + // _already_checked_components.insert_or_assign((size_t)component_root, tier); + // _already_checked_components[(size_t)component_root] = tier; + _already_checked_components.Insert((size_t)component_root, tier); + SketchClass& ett_agg = component_root->sketch_agg; + // TODO - do we want to sample before? idts. but we can at least + // do the empty check with a special new primitive + SketchSample query_result = ett_agg.sample(); + if (query_result.result != ZERO) { + if (components_maximized) { + // bool f = false; + // bool t = true; + __sync_bool_compare_and_swap((bool*)&components_maximized, true, false); + } + } + { + if (component_root->size == next_tier_root->size) { + if (query_result.result == GOOD) { + std::lock_guard guard(this->lct_and_query_ett_lock); + // .. and see if a path exists between the endpoints in the LCT + edge_id_t edge = query_result.idx; + node_id_t a = (node_id_t)edge; + node_id_t b = (node_id_t)(edge >> 32); + + // check if a path exists between the endpoints + // auto a_root = link_cut_tree.find_root(a); + // auto b_root = link_cut_tree.find_root(b); + // TODO - ETT + + // if it does, then we either need to cut it, or ignore this update + + // if (a_root == b_root) { + if (link_cut_tree.connected(a, b)) { + // a path exists, so we need to cut the maximum weight edge + // on the path + // THIS REALLY CANT BE PARALLELIZED atm + std::pair max_edge = link_cut_tree.path_query(a, b); + node_id_t c = max_edge.first.src; + node_id_t d = max_edge.first.dst; + // node_id_t c = (node_id_t)max_edge.first; + // node_id_t d = (node_id_t)(max_edge.first >> 32); + uint32_t first_appeared_tier = max_edge.second; + // if the first appeared tier is equal to tier+1, then we should check if this + // was a link we had just discovered. If so, we neither cut it, not include this link. + if (first_appeared_tier == tier + 1) { + // YOU KNOW that these couldnt have been connected in the tier above + // because otherwise the components coulld not have been the same size + // (which is necessary for isolation condition) + // + // so: DO NOTHING + } else { + // likewise, if it's a higher tier, definitely perform the cut + _pending_cuts.push_back({{c, d}, first_appeared_tier}); + link_cut_tree.cut(c, d); + query_ett.cut(c, d); + transaction_log.push_back({{c, d}, DELETE}); + + // and push the link we just found + _pending_links.push_back({a, b}); + link_cut_tree.link(a, b, tier + 1); + query_ett.link(a, b); + transaction_log.push_back({{a, b}, INSERT}); + // and update the dsu + } + } else { + // if there was no competing link between the endpoints in the LCT, + // then we just link them. + _pending_links.push_back({a, b}); + link_cut_tree.link(a, b, tier + 1); + query_ett.link(a, b); + transaction_log.push_back({{a, b}, INSERT}); + } + } + } + } + } + }); + // clear cas flags: + tbb::parallel_for( + tbb::blocked_range(0, num_temp_roots), + [&](const tbb::blocked_range& r) { + for (size_t i = r.begin(); i != r.end(); ++i) { + temp_roots[i]->clear_cas_flags(); + } + }); + + // at this point, we know exactly what cuts and links we need to do at higher tiers. + // for each tier, we'll perform the cuts and links, and then add any entries to _updated_components[tier] that + // we need to. + // parlay::parallel_for(tier + 1, ett.size(), [&](size_t t) { + tbb::parallel_for( + tbb::blocked_range(tier + 1, ett.size(), 1), + [&](const tbb::blocked_range &r) { + for (size_t t = r.begin(); t != r.end(); ++t) { + // for (size_t t = tier + 1; t < ett.size(); t + for (auto &cut : _pending_cuts) { + // do not perform cut if the edge has not yet appeared (duh?) + if (cut.second < t) + continue; + // cut the edge in the current tier + ett[t].cut(cut.first.src, cut.first.dst); + } + for (const Edge &link : _pending_links) { + ett[t].link(link.src, link.dst); + } + } + }, + tbb::static_partitioner{}); + // }); + + // at this point, all links and cuts induced have been performed, and we have a log + // of components that need to be checked for isolation in the next tier. + _pending_links.clear(); + _pending_cuts.clear(); + _already_checked_components.clear(); + + return components_maximized; + +} + +template class BatchTiers; \ No newline at end of file diff --git a/src/euler_tour_tree.cpp b/src/euler_tour_tree.cpp index a0ef06d..a79cbed 100644 --- a/src/euler_tour_tree.cpp +++ b/src/euler_tour_tree.cpp @@ -2,49 +2,101 @@ #include -EulerTourTree::EulerTourTree(node_id_t num_nodes, uint32_t tier_num, int seed) { - // Initialize all the ETT node - ett_nodes.reserve(num_nodes); - for (node_id_t i = 0; i < num_nodes; ++i) { - ett_nodes.emplace_back(seed, i, tier_num); +template + requires(SketchColumnConcept) +EulerTourTree::EulerTourTree(node_id_t num_nodes, uint32_t tier_num, int seed) : temp_sketch(0, seed), seed(seed), max_num_nodes(num_nodes), tier_num(tier_num) { + if constexpr (std::is_same_v>>) { + ett_nodes.reserve(num_nodes); + for (node_id_t i = 0; i < num_nodes; ++i) { + ett_nodes.emplace_back(seed, i, tier_num); + } } - // Initialize the temp_sketch - this->temp_sketch = new Sketch(sketch_len, seed, 1, sketch_err); + this->temp_sketch = SketchClass( + SketchClass::suggest_capacity(sketch_len), seed); } -void EulerTourTree::link(node_id_t u, node_id_t v) { - ett_nodes[u].link(ett_nodes[v], temp_sketch); +template requires(SketchColumnConcept) +void EulerTourTree::link(node_id_t u, node_id_t v) { + ett_node(u).link(ett_node(v), temp_sketch); } -void EulerTourTree::cut(node_id_t u, node_id_t v) { - ett_nodes[u].cut(ett_nodes[v], temp_sketch); +template requires(SketchColumnConcept) +void EulerTourTree::cut(node_id_t u, node_id_t v) { + ett_node(u).cut(ett_node(v), temp_sketch); } -bool EulerTourTree::has_edge(node_id_t u, node_id_t v) { - return ett_nodes[u].has_edge_to(&ett_nodes[v]); +template requires(SketchColumnConcept) +bool EulerTourTree::has_edge(node_id_t u, node_id_t v) { + return ett_node(u).has_edge_to(&ett_node(v)); } -SkipListNode* EulerTourTree::update_sketch(node_id_t u, vec_t update_idx) { - return ett_nodes[u].update_sketch(update_idx); +template requires(SketchColumnConcept) +SkipListNode* EulerTourTree::update_sketch(node_id_t u, vec_t update_idx) { + return ett_node(u).update_sketch(update_idx); } -std::pair EulerTourTree::update_sketches(node_id_t u, node_id_t v, vec_t update_idx) { +template requires(SketchColumnConcept) +SkipListNode* EulerTourTree::update_sketch(node_id_t u, const ColumnEntryDelta &delta) { + return ett_node(u).update_sketch(delta); +} + +template requires(SketchColumnConcept) +SkipListNode* EulerTourTree::update_sketch(node_id_t u, const ColumnEntryDeltas &deltas) { + if (deltas.size() >= 8) { + // std::cout << "Using temp sketch for batch of size " << deltas.size() << std::endl; + this->temp_sketch.zero_contents(); + for (const auto& delta : deltas) { + this->temp_sketch.apply_entry_delta(delta); + } + return ett_node(u).update_sketch((const SketchClass&)temp_sketch); + } + else { + return ett_node(u).update_sketch(deltas); + } +} + +template requires(SketchColumnConcept) +SkipListNode* EulerTourTree::update_sketch(node_id_t u, const SketchClass &sketch) { + return ett_node(u).update_sketch(sketch); +} + + +template requires(SketchColumnConcept) +SkipListNode* EulerTourTree::update_sketch_atomic(node_id_t u, vec_t update_idx) { + return ett_node(u).update_sketch_atomic(update_idx); +} + +template requires(SketchColumnConcept) +SkipListNode* EulerTourTree::update_sketch_atomic(node_id_t u, const ColumnEntryDelta &delta) { + return ett_node(u).update_sketch_atomic(delta); +} + +template requires(SketchColumnConcept) +SkipListNode* EulerTourTree::update_sketch_atomic(node_id_t u, const ColumnEntryDeltas &deltas) { + return ett_node(u).update_sketch_atomic(deltas); +} + +template requires(SketchColumnConcept) +std::pair *, SkipListNode *> +EulerTourTree::update_sketches(node_id_t u, node_id_t v, + vec_t update_idx) { // Update the paths in lockstep, stopping at the first common node - SkipListNode* curr1 = ett_nodes[u].allowed_caller; - SkipListNode* curr2 = ett_nodes[v].allowed_caller; - SkipListNode *prev1, *prev2; + SkipListNode* curr1 = ett_node(u).allowed_caller; + SkipListNode* curr2 = ett_node(v).allowed_caller; + SkipListNode *prev1, *prev2; + ColumnEntryDelta delta = generate_entry_delta(u, update_idx); while (curr1 || curr2) { if (curr1 == curr2) { - SkipListNode* root = curr1->get_root(); + SkipListNode<>* root = curr1->get_root(); return {root, root}; } if (curr1) { - curr1->update_agg(update_idx); + curr1->update_agg_entry_delta(delta); prev1 = curr1; curr1 = prev1->get_parent(); } if (curr2) { - curr2->update_agg(update_idx); + curr2->update_agg_entry_delta(delta); prev2 = curr2; curr2 = prev2->get_parent(); } @@ -52,106 +104,211 @@ std::pair EulerTourTree::update_sketches(node_id_t return {prev1, prev2}; } -SkipListNode* EulerTourTree::get_root(node_id_t u) { - return ett_nodes[u].get_root(); +template requires(SketchColumnConcept) +SkipListNode* EulerTourTree::get_root(node_id_t u) { + return ett_node(u).get_root(); } -Sketch* EulerTourTree::get_aggregate(node_id_t u) { - return ett_nodes[u].get_aggregate(); +template requires(SketchColumnConcept) +const SketchClass& EulerTourTree::get_aggregate(node_id_t u) { + return ett_node(u).get_aggregate(); } -uint32_t EulerTourTree::get_size(node_id_t u) { - return ett_nodes[u].get_size(); +template requires(SketchColumnConcept) +uint32_t EulerTourTree::get_size(node_id_t u) { + return ett_node(u).get_size(); } -EulerTourNode::EulerTourNode(long seed, node_id_t vertex, uint32_t tier) : seed(seed), vertex(vertex), tier(tier) { +template requires(SketchColumnConcept) +EulerTourNode::EulerTourNode(long seed, node_id_t vertex, uint32_t tier) : seed(seed), vertex(vertex), tier(tier) { // Initialize sentinel - this->make_edge(nullptr, nullptr); + this->make_edge(nullptr); } -EulerTourNode::EulerTourNode(long seed) : seed(seed) { +template requires(SketchColumnConcept) +EulerTourNode::EulerTourNode(long seed) : seed(seed) { // Initialize sentinel - this->make_edge(nullptr, nullptr); + this->make_edge(nullptr); } -EulerTourNode::~EulerTourNode() { +template requires(SketchColumnConcept) +EulerTourNode::~EulerTourNode() { // Final boundary nodes are a memory leak // Need to somehow delete all the skiplist nodes at the end // for (auto edge : edges) // edge.second->uninit_element(false); } -SkipListNode* EulerTourNode::make_edge(EulerTourNode* other, Sketch* temp_sketch) { +template requires(SketchColumnConcept) +SkipListNode* EulerTourNode::make_edge(EulerTourNode* other, SketchClass &temp_sketch) { assert(!other || this->tier == other->tier); //Constructing a new SkipListNode with pointer to this ETT object - SkipListNode* node; + SkipListNode* node; if (allowed_caller == nullptr) { - node = SkipListNode::init_element(this, true); + node = SkipListNode::init_element(this, true); allowed_caller = node; - if (temp_sketch != nullptr) { + if (temp_sketch.is_initialized()) { node->update_path_agg(temp_sketch); - temp_sketch->zero_contents(); + // note: this is really poorly written, + // but we KNOW that a move was not performed here. + // because in this branch, node is instantiated with a sketch + temp_sketch.zero_contents(); } } else { - node = SkipListNode::init_element(this, false); + node = SkipListNode::init_element(this, false); } //Add the new SkipListNode to the edge list return this->edges.emplace(std::make_pair(other, node)).first->second; //Returns the new node pointer or the one that already existed if it did } -void EulerTourNode::delete_edge(EulerTourNode* other, Sketch* temp_sketch) { +template requires(SketchColumnConcept) +SkipListNode* EulerTourNode::make_edge(EulerTourNode* other) { assert(!other || this->tier == other->tier); - SkipListNode* node_to_delete = this->edges[other]; + //Constructing a new SkipListNode with pointer to this ETT object + SkipListNode* node; + if (allowed_caller == nullptr) { + node = SkipListNode::init_element(this, true); + allowed_caller = node; + } else { + node = SkipListNode::init_element(this, false); + } + //Add the new SkipListNode to the edge list + return this->edges.emplace(std::make_pair(other, node)).first->second; + //Returns the new node pointer or the one that already existed if it did +} + +template requires(SketchColumnConcept) +void EulerTourNode::delete_edge(EulerTourNode* other, SketchClass& temp_sketch) { + assert(!other || this->tier == other->tier); + SkipListNode* node_to_delete = this->edges[other]; this->edges.erase(other); if (node_to_delete == allowed_caller) { if (this->edges.empty()) { allowed_caller = nullptr; node_to_delete->process_updates(); // std::cout << node_to_delete << std::endl; - temp_sketch->merge(*node_to_delete->sketch_agg); - node_to_delete->sketch_agg = nullptr; + // temp_sketch = std::move(node_to_delete->sketch_agg); + temp_sketch.merge(std::move(node_to_delete->sketch_agg)); + // node_to_delete->sketch_agg = nullptr; + node_to_delete->sketch_agg = SketchClass(0, seed); // We just gave the sketch to new allowed caller } else { allowed_caller = this->edges.begin()->second; node_to_delete->process_updates(); allowed_caller->update_path_agg(node_to_delete->sketch_agg); - node_to_delete->sketch_agg = nullptr; // We just gave the sketch to new allowed caller + node_to_delete->sketch_agg = SketchClass(0, seed); // We just gave the sketch to new allowed caller } } node_to_delete->uninit_element(true); } -SkipListNode* EulerTourNode::update_sketch(vec_t update_idx) { +template requires(SketchColumnConcept) +SkipListNode* EulerTourNode::update_sketch(vec_t update_idx) { assert(allowed_caller); return this->allowed_caller->update_path_agg(update_idx); } -SkipListNode* EulerTourNode::get_root() { +template requires(SketchColumnConcept) +SkipListNode* EulerTourNode::update_sketch(const ColumnEntryDelta &delta) { + assert(allowed_caller); + return this->allowed_caller->update_path_agg(delta); +} + +template requires(SketchColumnConcept) +SkipListNode* EulerTourNode::update_sketch(const ColumnEntryDeltas &deltas) { + assert(allowed_caller); + return this->allowed_caller->update_path_agg(deltas); +} + +template requires(SketchColumnConcept) +SkipListNode* EulerTourNode::update_sketch(const SketchClass &sketch) { + assert(allowed_caller); + return this->allowed_caller->update_path_agg(sketch); +} + +template requires(SketchColumnConcept) +SkipListNode* EulerTourNode::update_sketch_atomic(vec_t update_idx) { + assert(allowed_caller); + return this->allowed_caller->update_path_agg_atomic(update_idx); +} +template requires(SketchColumnConcept) +SkipListNode* EulerTourNode::update_sketch_atomic(const ColumnEntryDelta &delta) { + assert(allowed_caller); + return this->allowed_caller->update_path_agg_atomic(delta); +} +template requires(SketchColumnConcept) +SkipListNode* EulerTourNode::update_sketch_atomic(const ColumnEntryDeltas &deltas) { + assert(allowed_caller); + return this->allowed_caller->update_path_agg_atomic(deltas); +} + +template requires(SketchColumnConcept) +SkipListNode* EulerTourNode::update_sketch_noagg_atomic(const ColumnEntryDelta &delta) { + assert(allowed_caller); + this->allowed_caller->update_agg_entry_delta(delta); + return this->allowed_caller; +} +template requires(SketchColumnConcept) +SkipListNode* EulerTourNode::update_sketch_atomic_to_level(const ColumnEntryDelta &delta, uint32_t level) { + assert(allowed_caller); + // return this->allowed_caller->update_agg_atomic_to_level(level); + + SkipListNode* curr = this->allowed_caller; + SkipListNode* prev = nullptr; + while (curr != nullptr && level > 0) { + curr->update_agg_atomic_entry_delta(delta); + prev = curr; + curr = curr->get_parent(); + level--; + } + if (prev) { + std::atomic_ref atomic_needs_update(prev->needs_update); + atomic_needs_update.store( + AggUpdateState::PARENT_IS_STALE, + std::memory_order_relaxed + ); + } + return prev; +} + +template requires(SketchColumnConcept) +void EulerTourNode::recompute_aggregates_parallel() { + assert(allowed_caller); +} + + +template requires(SketchColumnConcept) +SkipListNode* EulerTourNode::get_root() const { return this->allowed_caller->get_root(); } //Get the aggregate sketch at the root of the ETT for this node -Sketch* EulerTourNode::get_aggregate() { +template requires(SketchColumnConcept) +const SketchClass& EulerTourNode::get_aggregate() { assert(allowed_caller); return this->allowed_caller->get_list_aggregate(); } -uint32_t EulerTourNode::get_size() { +template requires(SketchColumnConcept) +uint32_t EulerTourNode::get_size() { return this->allowed_caller->get_list_size(); } -bool EulerTourNode::has_edge_to(EulerTourNode* other) { +template requires(SketchColumnConcept) +bool EulerTourNode::has_edge_to(EulerTourNode* other) { return !(this->edges.find(other) == this->edges.end()); } -std::set EulerTourNode::get_component() { +template requires(SketchColumnConcept) +std::set*> EulerTourNode::get_component() { return this->allowed_caller->get_component(); } -bool EulerTourNode::link(EulerTourNode& other, Sketch* temp_sketch) { +template requires(SketchColumnConcept) +bool EulerTourNode::link(EulerTourNode& other, SketchClass& temp_sketch) { assert(this->tier == other.tier); - SkipListNode* this_sentinel = this->edges.begin()->second->get_last(); - SkipListNode* other_sentinel = other.edges.begin()->second->get_last(); + SkipListNode* this_sentinel = this->edges.begin()->second->get_last(); + SkipListNode* other_sentinel = other.edges.begin()->second->get_last(); // There should always be a sentinel assert(this_sentinel == this_sentinel->node->edges.at(nullptr)); @@ -171,19 +328,19 @@ bool EulerTourNode::link(EulerTourNode& other, Sketch* temp_sketch) { // ^ ^ // '--------------------'--- might be null - SkipListNode* aux_this_right = this->edges.begin()->second; - SkipListNode* aux_this_left = SkipListNode::split_left(aux_this_right); + SkipListNode* aux_this_right = this->edges.begin()->second; + SkipListNode* aux_this_left = SkipListNode::split_left(aux_this_right); // Unlink and destroy other_sentinel - SkipListNode* aux_other = SkipListNode::split_left(other_sentinel); + SkipListNode* aux_other = SkipListNode::split_left(other_sentinel); other_sentinel->node->delete_edge(nullptr, temp_sketch); - SkipListNode* aux_other_left, *aux_other_right; + SkipListNode* aux_other_left, *aux_other_right; if (aux_other == nullptr) { aux_other_right = aux_other_left = nullptr; } else { aux_other_right = other.edges.begin()->second; - aux_other_left = SkipListNode::split_left(aux_other_right); + aux_other_left = SkipListNode::split_left(aux_other_right); } // reroot other tree @@ -191,45 +348,60 @@ bool EulerTourNode::link(EulerTourNode& other, Sketch* temp_sketch) { // R LR L R LR L // N N - SkipListNode* aux_edge_left = this->make_edge(&other, temp_sketch); - SkipListNode* aux_edge_right = other.make_edge(this, temp_sketch); + SkipListNode* aux_edge_left = this->make_edge(&other, temp_sketch); + SkipListNode* aux_edge_right = other.make_edge(this, temp_sketch); - SkipListNode::join(aux_this_left, aux_edge_left, aux_other_right, + SkipListNode::join(aux_this_left, aux_edge_left, aux_other_right, aux_other_left, aux_edge_right, aux_this_right); return true; } -bool EulerTourNode::cut(EulerTourNode& other, Sketch* temp_sketch) { +template requires(SketchColumnConcept) +bool EulerTourNode::cut(EulerTourNode& other, SketchClass& temp_sketch) { assert(this->tier == other.tier); if (this->edges.find(&other) == this->edges.end()) { assert(other.edges.find(this) == other.edges.end()); return false; } - SkipListNode* e1 = this->edges[&other]; - SkipListNode* e2 = other.edges[this]; + SkipListNode* e1 = this->edges[&other]; + SkipListNode* e2 = other.edges[this]; - SkipListNode* frag1r = SkipListNode::split_right(e1); + SkipListNode* frag1r = SkipListNode::split_right(e1); bool order_is_e1e2 = e2->get_last() != e1; - SkipListNode* frag1l = SkipListNode::split_left(e1); + SkipListNode* frag1l = SkipListNode::split_left(e1); this->delete_edge(&other, temp_sketch); - SkipListNode* frag2r = SkipListNode::split_right(e2); - SkipListNode* frag2l = SkipListNode::split_left(e2); + SkipListNode* frag2r = SkipListNode::split_right(e2); + SkipListNode* frag2l = SkipListNode::split_left(e2); other.delete_edge(this, temp_sketch); if (order_is_e1e2) { // e1 is to the left of e2 // e2 should be made into a sentinel - SkipListNode* sentinel = other.make_edge(nullptr, temp_sketch); - SkipListNode::join(frag2l, sentinel); - SkipListNode::join(frag1l, frag2r); + SkipListNode* sentinel = other.make_edge(nullptr, temp_sketch); + SkipListNode::join(frag2l, sentinel); + SkipListNode::join(frag1l, frag2r); } else { // e2 is to the left of e1 // e1 should be made into a sentinel - SkipListNode* sentinel = this->make_edge(nullptr, temp_sketch); - SkipListNode::join(frag2r, sentinel); - SkipListNode::join(frag2l, frag1r); + SkipListNode* sentinel = this->make_edge(nullptr, temp_sketch); + SkipListNode::join(frag2r, sentinel); + SkipListNode::join(frag2l, frag1r); } return true; } + + +template class EulerTourNode; + +// using VectorContainer = std::vector>; +// using HashmapContainer = absl::flat_hash_map*>; +template class EulerTourTree; +template class EulerTourTree; + + +using ETTWithHashmap = EulerTourTree*>>; +using ETTWithVector = EulerTourTree>>; + +// template std::ostream& operator<<(std::ostream&, const EulerTourNode&); \ No newline at end of file diff --git a/src/graph_tiers.cpp b/src/graph_tiers.cpp index 4272bd9..09790ad 100644 --- a/src/graph_tiers.cpp +++ b/src/graph_tiers.cpp @@ -21,7 +21,8 @@ long tiers_grown = 0; long normal_refreshes = 0; -GraphTiers::GraphTiers(node_id_t num_nodes) : link_cut_tree(num_nodes) { +template requires(SketchColumnConcept) +GraphTiers::GraphTiers(node_id_t num_nodes, uint64_t seed) : link_cut_tree(num_nodes) { // Algorithm parameters uint32_t num_tiers = log2(num_nodes)/(log2(3)-1); @@ -29,7 +30,7 @@ GraphTiers::GraphTiers(node_id_t num_nodes) : link_cut_tree(num_nodes) { std::random_device dev; std::mt19937 rng(dev()); std::uniform_int_distribution dist(0,MAX_INT); - int seed = dist(rng); + // int seed = dist(rng); std::cout << "SEED: " << seed << std::endl; rng.seed(seed); dist(rng); // To give 1:1 correspondence with MPI seeds @@ -38,49 +39,60 @@ GraphTiers::GraphTiers(node_id_t num_nodes) : link_cut_tree(num_nodes) { ett.emplace_back(num_nodes, i, tier_seed); } - root_nodes.reserve(num_tiers*2); + root_nodes.resize(num_tiers*2); } -GraphTiers::~GraphTiers() {} +template requires(SketchColumnConcept) +GraphTiers::~GraphTiers() {} -void GraphTiers::update(GraphUpdate update) { +template requires(SketchColumnConcept) +void GraphTiers::update(GraphUpdate update) { edge_id_t edge = VERTICES_TO_EDGE(update.edge.src, update.edge.dst); // Update the sketches of both endpoints of the edge in all tiers if (update.type == DELETE && link_cut_tree.has_edge(update.edge.src, update.edge.dst)) { link_cut_tree.cut(update.edge.src, update.edge.dst); } START(su); - #pragma omp parallel for + std::atomic did_cut(false); + // #pragma omp parallel for for (uint32_t i = 0; i < ett.size(); i++) { if (update.type == DELETE && ett[i].has_edge(update.edge.src, update.edge.dst)) { + did_cut = true; ett[i].cut(update.edge.src, update.edge.dst); ENDPOINT_CANARY("Cutting Tier " << i << " ETT With", update.edge.src, update.edge.dst); } + // maintain roots of u,v endpoints root_nodes[2*i] = ett[i].update_sketch(update.edge.src, (vec_t)edge); root_nodes[2*i+1] = ett[i].update_sketch(update.edge.dst, (vec_t)edge); ENDPOINT_CANARY("Updating Sketch With", update.edge.src, update.edge.dst); + } STOP(sketch_time, su); // Refresh the data structure START(ref); - refresh(update); + this->refresh(update, did_cut); STOP(refresh_time, ref); } -void GraphTiers::refresh(GraphUpdate update) { +template requires(SketchColumnConcept) +void GraphTiers::refresh(GraphUpdate update, bool did_cut) { // In parallel check if all tiers are not isolated START(iso); std::atomic isolated(false); - //#pragma omp parallel for + // #pragma omp parallel for for (uint32_t tier = 0; tier < ett.size()-1; tier++) { // Check if the tree containing first endpoint is isolated uint32_t tier_size1 = root_nodes[2*tier]->size; uint32_t next_size1 = root_nodes[2*(tier+1)]->size; + // NOTE - We know that we are a subset of the next tier's component + // by maintenance of variants. + // thus, if the sizes are equal, we are not a proper subset + // but are a subset. This means we are violating if (tier_size1 == next_size1) { root_nodes[2*tier]->process_updates(); - Sketch* ett_agg1 = root_nodes[2*tier]->sketch_agg; - ett_agg1->reset_sample_state(); - SketchSample query_result1 = ett_agg1->sample(); + SketchClass &ett_agg1 = root_nodes[2*tier]->sketch_agg; + ett_agg1.reset_sample_state(); + SketchSample<> query_result1 = ett_agg1.sample(); if (query_result1.result == GOOD) { isolated = true; continue; @@ -91,9 +103,9 @@ void GraphTiers::refresh(GraphUpdate update) { uint32_t next_size2 = root_nodes[2*(tier+1)+1]->size; if (tier_size2 == next_size2) { root_nodes[2*tier+1]->process_updates(); - Sketch* ett_agg2 = root_nodes[2*tier+1]->sketch_agg; - ett_agg2->reset_sample_state(); - SketchSample query_result2 = ett_agg2->sample(); + SketchClass &ett_agg2 = root_nodes[2*tier+1]->sketch_agg; + ett_agg2.reset_sample_state(); + SketchSample query_result2 = ett_agg2.sample(); if (query_result2.result == GOOD) { isolated = true; continue; @@ -101,11 +113,12 @@ void GraphTiers::refresh(GraphUpdate update) { } } STOP(parallel_isolated_check, iso); + if (isolated || did_cut) normal_refreshes++; if (!isolated) return; - normal_refreshes++; // For each tier for each endpoint of the edge for (uint32_t tier = 0; tier < ett.size()-1; tier++) { + bool both_components_maximized = true; for (node_id_t v : {update.edge.src, update.edge.dst}) { // Check if the tree containing this endpoint is isolated START(size); @@ -117,14 +130,18 @@ void GraphTiers::refresh(GraphUpdate update) { continue; START(agg); - SkipListNode* root = ett[tier].get_root(v); + SkipListNode* root = ett[tier].get_root(v); root->process_updates(); - Sketch* ett_agg = root->sketch_agg; + SketchClass &ett_agg = root->sketch_agg; STOP(ett_get_agg, agg); START(sq); - ett_agg->reset_sample_state(); - SketchSample query_result = ett_agg->sample(); + ett_agg.reset_sample_state(); + SketchSample query_result = ett_agg.sample(); STOP(sketch_query, sq); + + if (query_result.result != ZERO) { + both_components_maximized = false; + } // Check for new edge to eliminate isolation if (query_result.result != GOOD) @@ -150,7 +167,7 @@ void GraphTiers::refresh(GraphUpdate update) { // Remove the maximum tier edge on all paths where it exists START(ett1); - #pragma omp parallel for + // #pragma omp parallel for for (uint32_t i = max.second; i < ett.size(); i++) { ett[i].cut(c,d); ENDPOINT_CANARY("Cutting Tier " << i << " ETT With", c, d); @@ -163,7 +180,7 @@ void GraphTiers::refresh(GraphUpdate update) { // Join the ETTs for the endpoints of the edge on all tiers above the current START(ett2); - #pragma omp parallel for + // #pragma omp parallel for for (uint32_t i = tier+1; i < ett.size(); i++) { ett[i].link(a,b); ENDPOINT_CANARY("Linking Tier " << i << " ETT With", a, b); @@ -173,16 +190,20 @@ void GraphTiers::refresh(GraphUpdate update) { link_cut_tree.link(a,b, tier+1); STOP(lct_time, lct4); } + // if (both_components_maximized) { + // break; + // } } } -std::vector> GraphTiers::get_cc() { +template requires(SketchColumnConcept) +std::vector> GraphTiers::get_cc() { std::vector> cc; - std::set visited; + std::set*> visited; int top = ett.size()-1; for (uint32_t i = 0; i < ett[top].ett_nodes.size(); i++) { - if (visited.find(&ett[top].ett_nodes[i]) == visited.end()) { - std::set pointer_component = ett[top].ett_nodes[i].get_component(); + if (visited.find(&ett[top].ett_node(i)) == visited.end()) { + std::set*> pointer_component = ett[top].ett_node(i).get_component(); std::set component; for (auto pointer : pointer_component) { component.insert(pointer->vertex); @@ -194,6 +215,9 @@ std::vector> GraphTiers::get_cc() { return cc; } -bool GraphTiers::is_connected(node_id_t a, node_id_t b) { +template requires(SketchColumnConcept) +bool GraphTiers::is_connected(node_id_t a, node_id_t b) { return this->link_cut_tree.find_root(a) == this->link_cut_tree.find_root(b); } + +template class GraphTiers; \ No newline at end of file diff --git a/src/input_node.cpp b/src/input_node.cpp index 0b3430a..291232c 100644 --- a/src/input_node.cpp +++ b/src/input_node.cpp @@ -3,6 +3,7 @@ long normal_refreshes = 0; long dt_operation_time = 0; +long num_updates = 0; InputNode::InputNode(node_id_t num_nodes, uint32_t num_tiers, int batch_size, int seed) : num_nodes(num_nodes), num_tiers(num_tiers), link_cut_tree(num_nodes), query_ett(num_nodes, 0, seed) { @@ -24,6 +25,7 @@ InputNode::~InputNode() { } void InputNode::update(GraphUpdate update) { + num_updates++; UpdateMessage update_message; update_message.update = update; update_buffer[buffer_size++] = update_message; @@ -34,10 +36,16 @@ void InputNode::update(GraphUpdate update) { void InputNode::process_updates() { if (buffer_size == 1) return; + // BUFFER PRE-PROCESSING ! + // for every update; if we know it's isolated (adds new connectivity) info, + // swap it to the front of the buffer + + uint32_t num_updates = buffer_size-1; // If less than 1/10 of the last updates are isolated use sliding window bool prev_strat = using_sliding_window; - using_sliding_window = false;//(isolation_count max_edge = link_cut_tree.path_query(update.edge.src, update.edge.dst); + split_revert_buffer[i] = max_edge.second; + // probably where most structural (spanning forest) deletes happen? + // potentially - revisit link_cut_tree.cut(update.edge.src, update.edge.dst); query_ett.cut(update.edge.src, update.edge.dst); + // transaction_log.add(update.edge, DELETE); + transaction_log.push_back(update); } } // Attempt to do the entire batch parallel with greedy refresh @@ -70,6 +83,9 @@ void InputNode::process_updates() { unlikely_if (split_revert_buffer[update_idx-1] != MAX_INT) { link_cut_tree.link(update.edge.src, update.edge.dst, split_revert_buffer[update_idx-1]); query_ett.link(update.edge.src, update.edge.dst); + // transaction_log.add(update.edge, generate_entry_dINSERT); + // // TODO - not actually sure if update is an insert type + transaction_log.push_back(GraphUpdate{update.edge, INSERT}); } } // Update the isolation history @@ -85,9 +101,11 @@ void InputNode::process_updates() { for (int update_idx = minimum_isolated_update; update_idx < end_update_idx; update_idx++) { GraphUpdate update = update_buffer[update_idx].update; START(dt_operation_timer1); - unlikely_if (update.type == DELETE && link_cut_tree.has_edge(update.edge.src, update.edge.dst)) { + unlikely_if (update.type == DELETE && query_ett.has_edge(update.edge.src, update.edge.dst)) { link_cut_tree.cut(update.edge.src, update.edge.dst); query_ett.cut(update.edge.src, update.edge.dst); + // transaction_log.add(update.edge, DELETE); + transaction_log.push_back(update); } STOP(dt_operation_time, dt_operation_timer1); uint32_t start_tier = 0; @@ -102,21 +120,27 @@ void InputNode::process_updates() { MPI_Send(&refresh_message, sizeof(RefreshMessage), MPI_BYTE, start_tier+1, 0, MPI_COMM_WORLD); for (uint32_t tier = start_tier; tier < num_tiers; tier++) { int rank = tier + 1; + // bool break_early = true; if (tier != 0) for (auto endpoint : {0,1}) { std::ignore = endpoint; // Receive a broadcast to see if the current tier/endpoint is isolated or not EttUpdateMessage update_message; bcast(&update_message, sizeof(UpdateMessage), rank); - if (update_message.type == NOT_ISOLATED) + if (update_message.type == NOT_ISOLATED) { continue; + } + // else { + // break_early = false; + // } this_update_isolated = true; // Process a LCT query message first LctResponseMessage response_message; - response_message.connected = link_cut_tree.find_root(update_message.endpoint1) == link_cut_tree.find_root(update_message.endpoint2); + // response_message.connected = link_cut_tree.find_root(update_message.endpoint1) == link_cut_tree.find_root(update_message.endpoint2); + response_message.connected = query_ett.is_connected(update_message.endpoint1, update_message.endpoint2); if (response_message.connected) { - std::pair max = link_cut_tree.path_aggregate(update_message.endpoint1, update_message.endpoint2); - response_message.cycle_edge = max.first; + std::pair max = link_cut_tree.path_query(update_message.endpoint1, update_message.endpoint2); + response_message.cycle_edge = VERTICES_TO_EDGE(max.first.src, max.first.dst); response_message.weight = max.second; } MPI_Send(&response_message, sizeof(LctResponseMessage), MPI_BYTE, rank, 0, MPI_COMM_WORLD); @@ -130,14 +154,21 @@ void InputNode::process_updates() { if (update_message.type == LINK) { link_cut_tree.link(update_message.endpoint1, update_message.endpoint2, update_message.start_tier); query_ett.link(update_message.endpoint1, update_message.endpoint2); + // transaction_log.add(update_message, INSERT); + transaction_log.push_back( + GraphUpdate{Edge{update_message.endpoint1, update_message.endpoint2}, INSERT}); break; } else if (update_message.type == CUT) { link_cut_tree.cut(update_message.endpoint1, update_message.endpoint2); query_ett.cut(update_message.endpoint1, update_message.endpoint2); + // transaction_log.add(update_message, DELETE); + transaction_log.push_back( + GraphUpdate{Edge{update_message.endpoint1, update_message.endpoint2}, DELETE}); } STOP(dt_operation_time, dt_operation_timer2); } } + // if (break_early) break; } isolation_count -= (int)isolation_history_queue.front(); isolation_history_queue.pop(); @@ -180,4 +211,5 @@ void InputNode::end() { std::cout << "======================= INPUT NODE ======================" << std::endl; std::cout << "Dynamic tree operations time (ms): " << dt_operation_time/1000 << std::endl; std::cout << "Normal refreshes: " << normal_refreshes << std::endl; + std::cout << "Number of updates: " << num_updates << std::endl; } diff --git a/src/link_cut_tree.cpp b/src/link_cut_tree.cpp index a7e3dde..19def89 100644 --- a/src/link_cut_tree.cpp +++ b/src/link_cut_tree.cpp @@ -228,14 +228,25 @@ LinkCutNode* LinkCutNode::splay() { return this; } -LinkCutTree::LinkCutTree(node_id_t num_nodes) : nodes(num_nodes) {} -LinkCutNode* LinkCutTree::join(LinkCutNode* v, LinkCutNode* w) { +template +LinkCutTree::LinkCutTree(node_id_t num_nodes) : max_nodes(num_nodes) { + if constexpr (std::is_same_v>) { + nodes = Container(num_nodes); + nodes.reserve(num_nodes); + for (node_id_t i = 0; i < num_nodes; ++i) + nodes.emplace_back(); + } + initialize_all_nodes(); +} + +template +LinkCutNode* LinkCutTree::join(LinkCutNode* v, LinkCutNode* w) { assert(v != nullptr && w != nullptr && v->get_parent() == nullptr && w->get_parent() == nullptr); LinkCutNode* tail = v->get_tail(); LinkCutNode* head = w->get_head(); - node_id_t tail_id = tail-&(this->nodes[0]); - node_id_t head_id = head-&(this->nodes[0]); + node_id_t tail_id = tail - this->get_node_ptr(0); + node_id_t head_id = head - this->get_node_ptr(0); edge_id_t edge = (tail_id < head_id) ? (((edge_id_t)tail_id << 32) + head_id) : (((edge_id_t)head_id << 32) + tail_id); tail->make_preferred_edge(edge); head->make_preferred_edge(edge); @@ -248,15 +259,16 @@ LinkCutNode* LinkCutTree::join(LinkCutNode* v, LinkCutNode* w) { return tail; } -std::pair LinkCutTree::split(LinkCutNode* v) { +template +std::pair LinkCutTree::split(LinkCutNode* v) { assert(v != nullptr); v->splay(); LinkCutNode* r = v->get_right(); LinkCutNode* w = nullptr; if (r != nullptr) { w = r->recompute_head(); - node_id_t v_id = v-&(this->nodes[0]); - node_id_t w_id = w-&(this->nodes[0]); + node_id_t v_id = v - this->get_node_ptr(0); + node_id_t w_id = w - this->get_node_ptr(0); edge_id_t edge = (v_id < w_id) ? (((edge_id_t)v_id << 32) + w_id) : (((edge_id_t)w_id << 32) + v_id); v->unmake_preferred_edge(edge); w->unmake_preferred_edge(edge); @@ -273,14 +285,16 @@ std::pair LinkCutTree::split(LinkCutNode* v) { return paths; } -LinkCutNode* LinkCutTree::splice(LinkCutNode* p) { +template +LinkCutNode* LinkCutTree::splice(LinkCutNode* p) { LinkCutNode* v = p->get_head()->get_dparent(); std::pair paths = this->split(v); p->get_head()->set_dparent(nullptr); return this->join(paths.first, p); } -LinkCutNode* LinkCutTree::expose(LinkCutNode* v) { +template +LinkCutNode* LinkCutTree::expose(LinkCutNode* v) { std::pair paths = this->split(v); LinkCutNode* p = paths.first; while(p->get_head()->get_dparent() != nullptr) { @@ -289,7 +303,8 @@ LinkCutNode* LinkCutTree::expose(LinkCutNode* v) { return p; } -LinkCutNode* LinkCutTree::evert(LinkCutNode* v) { +template +LinkCutNode* LinkCutTree::evert(LinkCutNode* v) { LinkCutNode* p = this->expose(v); p->reverse(); p->recompute_head(); @@ -297,10 +312,11 @@ LinkCutNode* LinkCutTree::evert(LinkCutNode* v) { return p; } -void LinkCutTree::link(node_id_t v, node_id_t w, uint32_t weight) { +template +void LinkCutTree::link(node_id_t v, node_id_t w, uint32_t weight) { assert(find_root(v) != find_root(w)); - LinkCutNode* v_node = &this->nodes[v]; - LinkCutNode* w_node = &this->nodes[w]; + LinkCutNode* v_node = this->get_node_ptr(v); + LinkCutNode* w_node = this->get_node_ptr(w); edge_id_t edge = (v < w) ? (((edge_id_t)v << 32) + w) : (((edge_id_t)w << 32) + v); v_node->insert_edge(edge, weight); w_node->insert_edge(edge, weight); @@ -311,10 +327,11 @@ void LinkCutTree::link(node_id_t v, node_id_t w, uint32_t weight) { this->join(p_v, p_w); } -void LinkCutTree::cut(node_id_t v, node_id_t w) { +template +void LinkCutTree::cut(node_id_t v, node_id_t w) { assert(find_root(v) == find_root(w)); - LinkCutNode* v_node = &this->nodes[v]; - LinkCutNode* w_node = &this->nodes[w]; + LinkCutNode* v_node = this->get_node_ptr(v); + LinkCutNode* w_node = this->get_node_ptr(w); edge_id_t edge = (v < w) ? (((edge_id_t)v << 32) + w) : (((edge_id_t)w << 32) + v); v_node->remove_edge(edge); w_node->remove_edge(edge); @@ -323,36 +340,42 @@ void LinkCutTree::cut(node_id_t v, node_id_t w) { w_node->set_dparent(nullptr); } -void* LinkCutTree::find_root(node_id_t v) { - return this->expose(&this->nodes[v])->get_head(); +template +void* LinkCutTree::find_root(node_id_t v) { + return this->expose(this->get_node_ptr(v))->get_head(); } -std::pair LinkCutTree::path_aggregate(node_id_t v, node_id_t w) { +template +std::pair LinkCutTree::path_aggregate(node_id_t v, node_id_t w) { assert(find_root(v) == find_root(w)); - LinkCutNode* v_node = &this->nodes[v]; - LinkCutNode* w_node = &this->nodes[w]; + LinkCutNode* v_node = this->get_node_ptr(v); + LinkCutNode* w_node = this->get_node_ptr(w); this->evert(v_node); LinkCutNode* p = this->expose(w_node); return p->get_max_edge(); } -bool LinkCutTree::has_edge(node_id_t v1, node_id_t v2) { +template +bool LinkCutTree::has_edge(node_id_t v1, node_id_t v2) { edge_id_t e = VERTICES_TO_EDGE(v1, v2); - return nodes[v1].has_edge(e); + return this->node(v1).has_edge(e); } -uint32_t LinkCutTree::get_edge_weight(node_id_t v1, node_id_t v2) { +template +uint32_t LinkCutTree::get_edge_weight(node_id_t v1, node_id_t v2) { edge_id_t e = VERTICES_TO_EDGE(v1, v2); - return nodes[v1].get_edge_weight(e); + return this->node(v1).get_edge_weight(e); } -std::vector> LinkCutTree::get_cc() { +template +std::vector> LinkCutTree::get_cc() { std::map> cc_map; std::map visited; - for (uint32_t i = 0; i < nodes.size(); i++) { - if (visited.find(&nodes[i]) == visited.end()) { + for (uint32_t i = 0; i < max_nodes; i++) { + if (!is_initialized(i)) continue; + if (visited.find(this->get_node_ptr(i)) == visited.end()) { std::set node_component; - LinkCutNode* curr = &nodes[i]; + LinkCutNode* curr = this->get_node_ptr(i); while ((curr->get_parent() && visited.find(curr->get_parent()) == visited.end()) || (curr->get_head()->get_dparent() && visited.find(curr->get_head()->get_dparent()) == visited.end())) { node_component.insert(curr); @@ -367,9 +390,9 @@ std::vector> LinkCutTree::get_cc() { std::set component; cc_map.insert({root, component}); } - for (auto node : node_component) { - cc_map[root].insert(node-&nodes[0]); - visited.insert({node, root}); + for (auto n : node_component) { + cc_map[root].insert(n - this->get_node_ptr(0)); + visited.insert({n, root}); } } } @@ -379,3 +402,6 @@ std::vector> LinkCutTree::get_cc() { } return cc; } + + +template class LinkCutTree<>; \ No newline at end of file diff --git a/src/sketchless_euler_tour_tree.cpp b/src/sketchless_euler_tour_tree.cpp index f6efd78..5d291e2 100644 --- a/src/sketchless_euler_tour_tree.cpp +++ b/src/sketchless_euler_tour_tree.cpp @@ -2,32 +2,37 @@ #include - -SketchlessEulerTourTree::SketchlessEulerTourTree(node_id_t num_nodes, uint32_t tier_num, int seed) { - // Initialize all the ETT node - ett_nodes.reserve(num_nodes); - for (node_id_t i = 0; i < num_nodes; ++i) { - ett_nodes.emplace_back(seed, i, tier_num); - } +template +SketchlessEulerTourTree::SketchlessEulerTourTree(node_id_t num_nodes, uint32_t tier_num, size_t seed) : seed(seed), tier_num(tier_num), max_num_nodes(num_nodes) { + if constexpr (std::is_same_v>) { + ett_nodes.reserve(num_nodes); + for (node_id_t i = 0; i < num_nodes; ++i) { + ett_nodes.emplace_back(seed, i, tier_num); + } + } } -void SketchlessEulerTourTree::link(node_id_t u, node_id_t v) { - ett_nodes[u].link(ett_nodes[v]); +template +void SketchlessEulerTourTree::link(node_id_t u, node_id_t v) { + ett_node(u).link(ett_node(v)); } - -void SketchlessEulerTourTree::cut(node_id_t u, node_id_t v) { - ett_nodes[u].cut(ett_nodes[v]); +template +void SketchlessEulerTourTree::cut(node_id_t u, node_id_t v) { + ett_node(u).cut(ett_node(v)); } -bool SketchlessEulerTourTree::has_edge(node_id_t u, node_id_t v) { - return ett_nodes[u].has_edge_to(&ett_nodes[v]); +template +bool SketchlessEulerTourTree::has_edge(node_id_t u, node_id_t v) { + return ett_node(u).has_edge_to(&ett_node(v)); } -SketchlessSkipListNode* SketchlessEulerTourTree::get_root(node_id_t u) { - return ett_nodes[u].get_root(); +template +SketchlessSkipListNode* SketchlessEulerTourTree::get_root(node_id_t u) { + return ett_node(u).get_root(); } -bool SketchlessEulerTourTree::is_connected(node_id_t u, node_id_t v) { +template +bool SketchlessEulerTourTree::is_connected(node_id_t u, node_id_t v) { return get_root(u) == get_root(v); } @@ -168,12 +173,14 @@ bool SketchlessEulerTourNode::cut(SketchlessEulerTourNode& other) { return true; } -std::vector> SketchlessEulerTourTree::cc_query() { +template +std::vector> SketchlessEulerTourTree::cc_query() { std::vector> cc; std::set visited; - for (uint32_t i = 0; i < ett_nodes.size(); i++) { - if (visited.find(&ett_nodes[i]) == visited.end()) { - std::set pointer_component = ett_nodes[i].get_component(); + // TODO - reimplement this. + for (uint32_t i = 0; i < max_num_nodes; i++) { + if (visited.find(&ett_node(i)) == visited.end()) { + std::set pointer_component = ett_node(i).get_component(); std::set component; for (auto pointer : pointer_component) { component.insert(pointer->vertex); @@ -184,3 +191,8 @@ std::vector> SketchlessEulerTourTree::cc_query() { } return cc; } + + +template class SketchlessEulerTourTree<>; +// template class SketchlessEulerTourTree>; +// template class SketchlessEulerTourTree>; diff --git a/src/skiplist.cpp b/src/skiplist.cpp index 53b4ec0..e5a5b0f 100644 --- a/src/skiplist.cpp +++ b/src/skiplist.cpp @@ -10,16 +10,26 @@ long skiplist_seed = time(NULL); vec_t sketch_len; vec_t sketch_err; -SkipListNode::SkipListNode(EulerTourNode* node, long seed, bool has_sketch) : node(node) { - if (has_sketch) sketch_agg = new Sketch(sketch_len, seed, 1, sketch_err); +template requires(SketchColumnConcept) +SkipListNode::SkipListNode(EulerTourNode* node, long seed, bool has_sketch) : node(node), sketch_agg(0, seed) { + // if (has_sketch) sketch_agg = new Sketch(sketch_len, seed, 1, sketch_err); + // TODO - FIGURE OUT HOW TO DO SEEDING PROPERLY + // if (has_sketch) + // sketch_agg = new SketchClass( + // SketchClass::suggest_capacity(sketch_len), seed); + if (has_sketch) + this->sketch_agg = SketchClass(SketchClass::suggest_capacity(sketch_len), seed); + return; } -SkipListNode::~SkipListNode() { - if (sketch_agg) delete sketch_agg; +template requires(SketchColumnConcept) +SkipListNode::~SkipListNode() { + // if (sketch_agg) delete sketch_agg; } -void SkipListNode::uninit_element(bool delete_bdry) { - SkipListNode* list_curr = this; +template requires(SketchColumnConcept) +void SkipListNode::uninit_element(bool delete_bdry) { + SkipListNode* list_curr = this; SkipListNode* list_prev; SkipListNode* bdry_curr = this->left; SkipListNode* bdry_prev; @@ -37,7 +47,8 @@ void SkipListNode::uninit_element(bool delete_bdry) { } } -SkipListNode* SkipListNode::init_element(EulerTourNode* node, bool is_allowed_caller) { +template requires(SketchColumnConcept) +SkipListNode* SkipListNode::init_element(EulerTourNode* node, bool is_allowed_caller) { long seed = node->get_seed(); // NOTE: WE SHOULD MAKE IT SO DIFFERENT SKIPLIST NODES FOR THE SAME ELEMENT CAN BE DIFFERENT HEIGHTS uint64_t element_height = height_factor*__builtin_ctzll(XXH3_64bits_withSeed(&node->vertex, sizeof(node_id_t), skiplist_seed))+1; @@ -77,7 +88,8 @@ SkipListNode* SkipListNode::init_element(EulerTourNode* node, bool is_allowed_ca return root->get_last(); } -SkipListNode* SkipListNode::get_parent() { +template requires(SketchColumnConcept) +SkipListNode* SkipListNode::get_parent() const { // SkipListNode* curr = this; // while (curr && !curr->up) { // curr = curr->left; @@ -86,17 +98,19 @@ SkipListNode* SkipListNode::get_parent() { return parent; } -SkipListNode* SkipListNode::get_root() { - SkipListNode* prev = nullptr; - SkipListNode* curr = this; +template requires(SketchColumnConcept) +SkipListNode* SkipListNode::get_root() const { + const SkipListNode* prev = nullptr; + const SkipListNode* curr = this; while (curr) { prev = curr; curr = prev->get_parent(); } - return prev; + return (SkipListNode*) prev; } -SkipListNode* SkipListNode::get_first() { +template requires(SketchColumnConcept) +SkipListNode* SkipListNode::get_first() const { // Go to the root first and then down to the first element, because if we start at some lower level // we may have to travel right a lot more on that level, takes log time instead of linear time SkipListNode* prev = nullptr; @@ -108,7 +122,8 @@ SkipListNode* SkipListNode::get_first() { return prev; } -SkipListNode* SkipListNode::get_last() { +template requires(SketchColumnConcept) +SkipListNode* SkipListNode::get_last() const { // Go to the root first and then down to the last element, because if we start at some lower level // we may have to travel left a lot more on that level, takes log time instead of linear time SkipListNode* prev = nullptr; @@ -120,32 +135,45 @@ SkipListNode* SkipListNode::get_last() { return prev; } -uint32_t SkipListNode::get_list_size() { +template requires(SketchColumnConcept) +uint32_t SkipListNode::get_list_size() { return this->get_root()->size; } -Sketch* SkipListNode::get_list_aggregate() { +template requires(SketchColumnConcept) +const SketchClass& SkipListNode::get_list_aggregate() { return this->get_root()->sketch_agg; } -void SkipListNode::update_agg(vec_t update_idx) { - if (!this->sketch_agg) // Only do something if this node has a sketch +template requires(SketchColumnConcept) +void SkipListNode::update_agg(vec_t update_idx) { + if (!this->sketch_agg.is_initialized()) // Only do something if this node has a sketch return; this->update_buffer[this->buffer_size] = update_idx; this->buffer_size++; - if (this->buffer_size == skiplist_buffer_cap) + if (this->buffer_size == SKETCH_BUFFER_SIZE) this->process_updates(); } -void SkipListNode::process_updates() { - if (!this->sketch_agg) // Only do something if this node has a sketch +template requires(SketchColumnConcept) +void SkipListNode::update_agg_atomic(vec_t update_idx) { + if (!this->sketch_agg.is_initialized()) // Only do something if this node has a sketch + return; + // TODO - do we need to do batchiing here too? + this->sketch_agg.atomic_update(update_idx); +} + +template requires(SketchColumnConcept) +void SkipListNode::process_updates() { + if (!this->sketch_agg.is_initialized()) // Only do something if this node has a sketch return; for (int i = 0; i < buffer_size; ++i) - this->sketch_agg->update(update_buffer[i]); + this->sketch_agg.update(update_buffer[i]); this->buffer_size = 0; } -SkipListNode* SkipListNode::update_path_agg(vec_t update_idx) { +template requires(SketchColumnConcept) +SkipListNode* SkipListNode::update_path_agg(vec_t update_idx) { SkipListNode* curr = this; SkipListNode* prev; while (curr) { @@ -156,22 +184,115 @@ SkipListNode* SkipListNode::update_path_agg(vec_t update_idx) { return prev; } -SkipListNode* SkipListNode::update_path_agg(Sketch* sketch) { +template requires(SketchColumnConcept) +SkipListNode* SkipListNode::update_path_agg_atomic(vec_t update_idx) { + SkipListNode* curr = this; + SkipListNode* prev; + while (curr) { + curr->update_agg_atomic(update_idx); + prev = curr; + curr = prev->get_parent(); + } + return prev; +} +template requires(SketchColumnConcept) +SkipListNode* SkipListNode::update_path_agg(const ColumnEntryDelta &delta) { + SkipListNode* curr = this; + SkipListNode* prev; + while (curr) { + // __builtin_prefetch(curr->get_parent()); + curr->update_agg_entry_delta(delta); + prev = curr; + curr = prev->get_parent(); + } + return prev; +} +template requires(SketchColumnConcept) +SkipListNode* SkipListNode::update_path_agg_atomic(const ColumnEntryDelta &delta) { + SkipListNode* curr = this; + SkipListNode* prev; + while (curr) { + // __builtin_prefetch(curr->get_parent()); + curr->update_agg_atomic_entry_delta(delta); + prev = curr; + curr = prev->get_parent(); + } + return prev; +} + +template requires(SketchColumnConcept) +SkipListNode* SkipListNode::update_path_agg(const ColumnEntryDeltas &deltas) { SkipListNode* curr = this; SkipListNode* prev; while (curr) { - if (!curr->sketch_agg) - curr->sketch_agg = sketch; - else - curr->sketch_agg->merge(*sketch); + curr->update_agg_entry_deltas(deltas); prev = curr; curr = prev->get_parent(); } return prev; } -std::set SkipListNode::get_component() { - std::set nodes; +template requires(SketchColumnConcept) +SkipListNode* SkipListNode::update_path_agg_atomic(const ColumnEntryDeltas &deltas) { + SkipListNode* curr = this; + SkipListNode* prev; + while (curr) { + curr->update_agg_atomic_entry_deltas(deltas); + prev = curr; + curr = prev->get_parent(); + } + return prev; +} + + +template requires(SketchColumnConcept) +SkipListNode* SkipListNode::update_path_agg(SketchClass &sketch) { + // returns the last node that was updated + SkipListNode* curr = this; + SkipListNode* prev; + if (!this->sketch_agg.is_initialized()) { + this->sketch_agg = std::move(sketch); + prev = curr; + curr = prev->get_parent(); + while (curr) + { + curr->sketch_agg.merge(this->sketch_agg); + prev = curr; + curr = prev->get_parent(); + } + // this->sketch_agg.zero_contents(); + } else { + while (curr) { + curr->sketch_agg.merge(sketch); + prev = curr; + curr = prev->get_parent(); + } + } + return prev; +} + +template requires(SketchColumnConcept) +SkipListNode* SkipListNode::update_path_agg(const SketchClass &sketch) { + // returns the last node that was updated + SkipListNode* curr = this; + SkipListNode* prev; + if (!this->sketch_agg.is_initialized()) { + assert(false); + // NOTE - SHOULD NOT USE IN THIS CASE + // TODO - make this code less confusing if possible. + } else { + while (curr) { + curr->sketch_agg.merge(sketch); + prev = curr; + curr = prev->get_parent(); + } + } + return prev; +} + +template requires(SketchColumnConcept) +std::set*> SkipListNode::get_component() { + std::set*> nodes; SkipListNode* curr = this->get_first()->right; //Skip over the boundary node while (curr) { nodes.insert(curr->node); @@ -180,7 +301,8 @@ std::set SkipListNode::get_component() { return nodes; } -void SkipListNode::uninit_list() { +template requires(SketchColumnConcept) +void SkipListNode::uninit_list() { SkipListNode* curr = this->get_first(); SkipListNode* prev; while (curr) { @@ -191,13 +313,14 @@ void SkipListNode::uninit_list() { prev->uninit_element(false); } -SkipListNode* SkipListNode::join(SkipListNode* left, SkipListNode* right) { +template requires(SketchColumnConcept) +SkipListNode* SkipListNode::join(SkipListNode* left, SkipListNode* right) { assert(left || right); if (!left) return right->get_root(); if (!right) return left->get_root(); - long seed = left->sketch_agg ? left->sketch_agg->get_seed() - : left->get_parent()->sketch_agg->get_seed(); + long seed = left->sketch_agg.is_initialized() ? left->sketch_agg.get_seed() + : left->get_parent()->sketch_agg.get_seed(); SkipListNode* l_curr = left->get_last(); SkipListNode* r_curr = right->get_first(); // this is the bottom boundary node @@ -211,8 +334,8 @@ SkipListNode* SkipListNode::join(SkipListNode* left, SkipListNode* right) { l_curr->right = r_curr->right; // skip over boundary node if (r_curr->right) r_curr->right->left = l_curr; // skip over boundary node, but to the left r_curr->process_updates(); - if (l_curr->sketch_agg && r_curr->sketch_agg) // Only if that skiplist node has a sketch - l_curr->sketch_agg->merge(*r_curr->sketch_agg); + if (l_curr->sketch_agg.is_initialized() && r_curr->sketch_agg.is_initialized()) // Only if that skiplist node has a sketch + l_curr->sketch_agg.merge(r_curr->sketch_agg); l_curr->size += r_curr->size-1; if (r_prev) delete r_prev; // Delete old boundary nodes @@ -224,7 +347,7 @@ SkipListNode* SkipListNode::join(SkipListNode* left, SkipListNode* right) { // If left list was taller add the root agg in right to the rest in left while (l_curr) { - l_curr->sketch_agg->merge(*r_prev->sketch_agg); + l_curr->sketch_agg.merge(r_prev->sketch_agg); l_curr->size += r_prev->size-1; l_prev = l_curr; l_curr = l_prev->get_parent(); @@ -233,10 +356,12 @@ SkipListNode* SkipListNode::join(SkipListNode* left, SkipListNode* right) { // If right list was taller add new boundary nodes to left list if (r_curr) { // Cache the left root to initialize the new boundary nodes - Sketch* l_root_agg = new Sketch(sketch_len, seed, 1, sketch_err); + // Sketch* l_root_agg = new Sketch(sketch_len, seed, 1, sketch_err); + SketchClass l_root_agg = SketchClass( + SketchClass::suggest_capacity(sketch_len), seed); l_prev->process_updates(); - l_root_agg->merge(*l_prev->sketch_agg); - l_root_agg->merge(*r_prev->sketch_agg); + l_root_agg.merge(l_prev->sketch_agg); + l_root_agg.merge(r_prev->sketch_agg); uint32_t l_root_size = l_prev->size - (r_prev->size-1); while (r_curr) { l_curr = new SkipListNode(nullptr, seed, true); @@ -246,10 +371,10 @@ SkipListNode* SkipListNode::join(SkipListNode* left, SkipListNode* right) { l_curr->right = r_curr->right; if (r_curr->right) r_curr->right->left = l_curr; - l_curr->sketch_agg->merge(*l_root_agg); + l_curr->sketch_agg.merge(l_root_agg); l_curr->size = l_root_size; r_curr->process_updates(); - l_curr->sketch_agg->merge(*r_curr->sketch_agg); + l_curr->sketch_agg.merge(r_curr->sketch_agg); l_curr->size += r_curr->size-1; if (r_prev) delete r_prev; // Delete old boundary nodes @@ -257,7 +382,7 @@ SkipListNode* SkipListNode::join(SkipListNode* left, SkipListNode* right) { r_prev = r_curr; r_curr = r_prev->up; } - delete l_root_agg; + // delete l_root_agg; } delete r_prev; // Update parent pointers in right list @@ -273,7 +398,8 @@ SkipListNode* SkipListNode::join(SkipListNode* left, SkipListNode* right) { return l_prev; } -SkipListNode* SkipListNode::split_left(SkipListNode* node) { +template requires(SketchColumnConcept) +SkipListNode* SkipListNode::split_left(SkipListNode* node) { assert(node && node->left && !node->down); // If just splitting off the boundary nodes do nothing instead if (!node->left->left) { @@ -292,19 +418,19 @@ SkipListNode* SkipListNode::split_left(SkipListNode* node) { r_curr->left = bdry; bdry->right = r_curr; l_curr->right = nullptr; - if (l_curr->sketch_agg && bdry->sketch_agg) // Only if its not the bottom sketchless node - l_curr->sketch_agg->merge(*bdry->sketch_agg); // XOR addition same as subtraction + if (l_curr->sketch_agg.is_initialized() && bdry->sketch_agg.is_initialized()) // Only if its not the bottom sketchless node + l_curr->sketch_agg.merge(bdry->sketch_agg); // XOR addition same as subtraction l_curr->size -= bdry->size-1; // Get next l_curr, r_curr, and bdry l_curr = l_curr->get_parent(); new_bdry = new SkipListNode(nullptr, seed, true); - if (bdry->sketch_agg) // Only if its not the bottom sketchless node - new_bdry->sketch_agg->merge(*bdry->sketch_agg); + if (bdry->sketch_agg.is_initialized()) // Only if its not the bottom sketchless node + new_bdry->sketch_agg.merge(bdry->sketch_agg); new_bdry->size = bdry->size; while (r_curr && !r_curr->up) { r_curr->process_updates(); - if (r_curr->sketch_agg) // Only if that skiplist node has a sketch - new_bdry->sketch_agg->merge(*r_curr->sketch_agg); + if (r_curr->sketch_agg.is_initialized()) // Only if that skiplist node has a sketch + new_bdry->sketch_agg.merge(r_curr->sketch_agg); new_bdry->size += r_curr->size; r_curr->parent = new_bdry; r_curr = r_curr->right; @@ -318,7 +444,7 @@ SkipListNode* SkipListNode::split_left(SkipListNode* node) { // Subtract the final right agg from the rest of the aggs on left path SkipListNode* l_prev = nullptr; while (l_curr) { - l_curr->sketch_agg->merge(*bdry->sketch_agg); // XOR addition same as subtraction + l_curr->sketch_agg.merge(bdry->sketch_agg); // XOR addition same as subtraction l_curr->size -= bdry->size-1; l_prev = l_curr; l_curr = l_curr->get_parent(); @@ -336,7 +462,8 @@ SkipListNode* SkipListNode::split_left(SkipListNode* node) { return l_prev; } -SkipListNode* SkipListNode::split_right(SkipListNode* node) { +template requires(SketchColumnConcept) +SkipListNode* SkipListNode::split_right(SkipListNode* node) { assert(node); SkipListNode* right = node->right; if (!right) return nullptr; @@ -344,6 +471,10 @@ SkipListNode* SkipListNode::split_right(SkipListNode* node) { return right->get_root(); } -SkipListNode* SkipListNode::next() { +template requires(SketchColumnConcept) +SkipListNode* SkipListNode::next() { return this->right; } + + +template class SkipListNode; \ No newline at end of file diff --git a/src/tier_node.cpp b/src/tier_node.cpp index 805d25d..0830e28 100644 --- a/src/tier_node.cpp +++ b/src/tier_node.cpp @@ -31,7 +31,8 @@ void TierNode::main() { // Receive a batch of updates and check if it is the end of stream bcast(update_buffer, sizeof(UpdateMessage)*(batch_size+1), 0); if (update_buffer[0].end) { - // std::cout << "============= TIER " << tier_num << " NODE =============" << std::endl; + std::cout << "============= TIER " << tier_num << " NODE =============" << std::endl + << "Number of components: " << ett.num_components() << std::endl; // std::cout << "Greedy batch time (ms): " << greedy_batch_time/1000 << std::endl; // std::cout << "\tSketch update time (ms): " << sketch_update_time/1000 << std::endl; // std::cout << "\tSketch query time (ms): " << sketch_query_time/1000 << std::endl; @@ -48,6 +49,9 @@ void TierNode::main() { for (uint32_t i = 0; i < num_updates; i++) { // Perform the sketch updating or root finding GraphUpdate update = update_buffer[i+1].update; + // TODO - do this in a different way? + initialize_node(update.edge.src); + initialize_node(update.edge.dst); edge_id_t edge = VERTICES_TO_EDGE(update.edge.src, update.edge.dst); split_revert_buffer[i] = false; unlikely_if (update.type == DELETE && ett.has_edge(update.edge.src, update.edge.dst)) { @@ -58,11 +62,11 @@ void TierNode::main() { auto roots = ett.update_sketches(update.edge.src, update.edge.dst, (vec_t)edge); ENDPOINT_CANARY("Updating Sketch With", update.edge.src, update.edge.dst); roots.first->process_updates(); - roots.first->sketch_agg->reset_sample_state(); - query_result_buffer[2*i] = roots.first->sketch_agg->sample().result; + roots.first->sketch_agg.reset_sample_state(); + query_result_buffer[2*i] = roots.first->sketch_agg.sample().result; roots.second->process_updates(); - roots.second->sketch_agg->reset_sample_state(); - query_result_buffer[2*i+1] = roots.second->sketch_agg->sample().result; + roots.second->sketch_agg.reset_sample_state(); + query_result_buffer[2*i+1] = roots.second->sketch_agg.sample().result; // Prepare greedy batch size messages GreedyRefreshMessage this_sizes; @@ -152,11 +156,11 @@ void TierNode::main() { e2.v = refresh_message.endpoints.second.v; for (RefreshEndpoint* e : {&e1, &e2}) { e->prev_tier_size = ett.get_size(e->v); - SkipListNode* root = ett.get_root(e->v); + SkipListNode* root = ett.get_root(e->v); root->process_updates(); - Sketch* ett_agg = root->sketch_agg; - ett_agg->reset_sample_state(); - e->sketch_query_result = ett_agg->sample(); + DefaultSketchColumn &ett_agg = root->sketch_agg; + ett_agg.reset_sample_state(); + e->sketch_query_result = ett_agg.sample(); } RefreshMessage next_refresh_message; next_refresh_message.endpoints = {e1, e2}; @@ -169,6 +173,8 @@ void TierNode::main() { for (int endpoint : {0,1}) { std::ignore = endpoint; // Receive a broadcast to see if the endpoint at the current tier is isolated or not + // OR to see if the component is maximized. + // if the component is maximized, further broadcasts are not needed EttUpdateMessage update_message; bcast(&update_message, sizeof(EttUpdateMessage), rank); if (update_message.type == NOT_ISOLATED) continue; diff --git a/test/euler_tour_tree_test.cpp b/test/euler_tour_tree_test.cpp index 28f6273..7ec8e1b 100644 --- a/test/euler_tour_tree_test.cpp +++ b/test/euler_tour_tree_test.cpp @@ -6,7 +6,10 @@ #include -bool EulerTourNode::isvalid() const { +#include "sketch_interfacing.h" + +template requires(SketchColumnConcept) +bool EulerTourNode::isvalid() const { bool invalid = false; // validate allowed_caller is null iff edges is empty EXPECT_EQ(allowed_caller == nullptr, this->edges.empty()) << (invalid = true, ""); @@ -45,7 +48,8 @@ bool EulerTourNode::isvalid() const { return true; } -std::ostream& operator<<(std::ostream& os, const EulerTourNode& ett) { +template requires(SketchColumnConcept) +std::ostream& operator<<(std::ostream& os, const EulerTourNode& ett) { os << "EulerTourNode " << &ett << std::endl; for (const auto& [k, v] : ett.edges) { os << "to EulerTourNode " << k << " is " << &v << std::endl; @@ -55,8 +59,9 @@ std::ostream& operator<<(std::ostream& os, const EulerTourNode& ett) { return os; } +template requires(SketchColumnConcept) std::ostream& operator<<(std::ostream& os, - const std::vector& nodes) { + const std::vector>& nodes) { for (const auto& node : nodes) { os << node; } @@ -74,6 +79,8 @@ TEST(EulerTourTreeSuite, stress_test) { srand(seed); std::cout << "Seeding stress test with " << seed << std::endl; EulerTourTree ett(nodecount, 0, seed); + // ensure that all nodes are iniitalized: + ett.initialize_all_nodes(); for (int i = 0; i < n; i++) { int a = rand() % nodecount, b = rand() % nodecount; @@ -84,11 +91,18 @@ TEST(EulerTourTreeSuite, stress_test) { } if (i % n/100 == 0) { - ASSERT_TRUE(std::all_of(ett.ett_nodes.begin(), ett.ett_nodes.end(), - [](auto& node){return node.isvalid();})) - << "Stress test validation failed, final state:" - << std::endl - << ett.ett_nodes; + // TODO - bring back these test cases + for (int j=0; j < nodecount; j++) { + ASSERT_TRUE(ett.ett_node(j).isvalid()); + // << "Stress test validation failed at iteration " + // << i << ", node " << j << ", final state:" + // << std::endl << ett.ett_nodes; + } + // ASSERT_TRUE(std::all_of(ett.ett_nodes.begin(), ett.ett_nodes.end(), + // [](auto& node){return node.isvalid();})) + // << "Stress test validation failed, final state:" + // << std::endl + // << ett.ett_nodes; } } } @@ -103,7 +117,8 @@ TEST(EulerTourTreeSuite, random_links_and_cuts) { int seed = time(NULL); srand(seed); std::cout << "Seeding random links and cuts test with " << seed << std::endl; - EulerTourTree ett(nodecount, 0, seed); + EulerTourTree ett(nodecount, 0, seed); + ett.initialize_all_nodes(); for (int i = 0; i < nodecount; i++) ett.update_sketch(i, (vec_t)i); @@ -115,54 +130,66 @@ TEST(EulerTourTreeSuite, random_links_and_cuts) { } else { ett.cut(a,b); } - ASSERT_TRUE(std::all_of(ett.ett_nodes.begin(), ett.ett_nodes.end(), - [](auto& node){return node.isvalid();})) - << "Stress test validation failed, final state:" - << std::endl - << ett.ett_nodes; + for (int j=0; j < nodecount; j++) { + ASSERT_TRUE(ett.ett_node(j).isvalid()); + // << "Random links and cuts validation failed at iteration " + // << i << ", node " << j << ", final state:" + // << std::endl << ett.ett_nodes; + } + // ASSERT_TRUE(std::all_of(ett.ett_nodes.begin(), ett.ett_nodes.end(), + // [](auto& node){return node.isvalid();})) + // << "Stress test validation failed, final state:" + // << std::endl + // << ett.ett_nodes; } - std::unordered_set sentinels; + std::unordered_set*> sentinels; for (int i = 0; i < nodecount; i++) { - SkipListNode *sentinel = ett.ett_nodes[i].edges.begin()->second->get_last(); + SkipListNode *sentinel = ett.ett_node(i).edges.begin()->second->get_last(); sentinels.insert(sentinel); } // Walk up from an occurrence of each node to the root of its auxiliary tre - std::unordered_map aggs; - std::unordered_map sizes; + std::unordered_map*, DefaultSketchColumn*> aggs; + std::unordered_map*, uint32_t> sizes; for (int i = 0; i < nodecount; i++) { - SkipListNode* sentinel = ett.ett_nodes[i].edges.begin()->second->get_last(); + SkipListNode* sentinel = ett.ett_node(i).edges.begin()->second->get_last(); if (aggs.find(sentinel) == aggs.end()) { - Sketch* agg = new Sketch(sketch_len, seed, 1, sketch_err); + // DefaultSketchColumn* agg = new Sketch(sketch_len, seed, 1, sketch_err); + DefaultSketchColumn *agg = new DefaultSketchColumn( + DefaultSketchColumn::suggest_capacity(sketch_len), seed); aggs.insert({sentinel, agg}); - SkipListNode* sentinel_root = sentinel->get_root(); + SkipListNode* sentinel_root = sentinel->get_root(); + sentinel_root->process_updates(); - aggs[sentinel]->merge(*sentinel->get_list_aggregate()); + aggs[sentinel]->merge(sentinel->get_list_aggregate()); sizes[sentinel] = sentinel->get_list_size(); } } - std::unordered_map naive_aggs; - std::unordered_map naive_sizes; + std::unordered_map*, DefaultSketchColumn*> naive_aggs; + std::unordered_map*, uint32_t> naive_sizes; // Naively compute aggregates for each connected component for (int i = 0; i < nodecount; i++) { - SkipListNode* sentinel = ett.ett_nodes[i].edges.begin()->second->get_last(); + SkipListNode* sentinel = ett.ett_node(i).edges.begin()->second->get_last(); sentinel->process_updates(); if (naive_aggs.find(sentinel) != naive_aggs.end()) { - naive_aggs[sentinel]->merge(*ett.ett_nodes[i].allowed_caller->sketch_agg); + naive_aggs[sentinel]->merge(ett.ett_node(i).allowed_caller->sketch_agg); naive_sizes[sentinel] += 1; } else { - Sketch* agg = new Sketch(sketch_len, seed, 1, sketch_err); + // Sketch* agg = new Sketch(sketch_len, seed, 1, sketch_err); + // DefaultSketchColumn *agg = new DefaultSketchColumn(4, 0); + DefaultSketchColumn *agg = new DefaultSketchColumn( + DefaultSketchColumn::suggest_capacity(sketch_len), seed); naive_aggs.insert({sentinel, agg}); - naive_aggs[sentinel]->merge(*ett.ett_nodes[i].allowed_caller->sketch_agg); + naive_aggs[sentinel]->merge(ett.ett_node(i).allowed_caller->sketch_agg); naive_sizes[sentinel] = 1; } } @@ -187,10 +214,14 @@ TEST(EulerTourTreeSuite, get_aggregate) { std::cout << "Seeding get aggregate test with " << seed << std::endl; // Keep a manual aggregate of all the sketches - Sketch true_aggregate(sketch_len, seed, 1, sketch_err); + // DefaultSketchColumn true_aggregate(sketch_len, seed, 1, sketch_err); + // DefaultSketchColumn true_aggregate(4, 0); + DefaultSketchColumn true_aggregate( + DefaultSketchColumn::suggest_capacity(sketch_len), seed); int nodecount = 1000; - EulerTourTree ett(nodecount, 0, seed); + EulerTourTree ett(nodecount, 0, seed); + ett.initialize_all_nodes(); // Add value to each sketch, update the manual aggregate for (int i = 0; i < nodecount; i++) @@ -205,6 +236,6 @@ TEST(EulerTourTreeSuite, get_aggregate) { } // Check that the ETT aggregate is properly maintained and gotten - Sketch* aggregate = ett.get_aggregate(0); - ASSERT_TRUE(*aggregate == true_aggregate); + const DefaultSketchColumn &aggregate = ett.get_aggregate(0); + ASSERT_TRUE(aggregate == true_aggregate); } diff --git a/test/graph_tiers_test.cpp b/test/graph_tiers_test.cpp index 53ac9a8..a08f82b 100644 --- a/test/graph_tiers_test.cpp +++ b/test/graph_tiers_test.cpp @@ -5,12 +5,17 @@ #include #include #include "graph_tiers.h" +#include "batch_tiers.h" #include "binary_graph_stream.h" -#include "mat_graph_verifier.h" +// #include "mat_graph_verifier.h" +#include "graph_verifier.h" #include "util.h" const vec_t DEFAULT_SKETCH_ERR = 1; +// using GraphTierSystem = GraphTiers; +using GraphTierSystem = BatchTiers; + auto start = std::chrono::high_resolution_clock::now(); auto stop = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(stop - start); @@ -32,54 +37,132 @@ static void print_metrics() { std::cout << "Total number of normal refreshes: " << normal_refreshes << std::endl; } +TEST(GraphTiersSuite, gibbs_mixed_speed_test) { + BinaryGraphStream stream(stream_file, 100000); + long edgecount = stream.edges(); + // height_factor = 1;//1./log2(log2(stream.nodes())); + height_factor = 1/log2(log2(stream.nodes())); + sketch_len = Sketch::calc_vector_length(stream.nodes()); + sketch_err = DEFAULT_SKETCH_ERR; + std::random_device dev; + std::mt19937 rng(dev()); + std::uniform_int_distribution dist(0,MAX_INT); + uint64_t seed = dist(rng); + GraphTierSystem gt(stream.nodes(), seed); + gt.initialize_all_nodes(); + + long total_update_time = 0; + long total_query_time = 0; + auto update_timer = std::chrono::high_resolution_clock::now(); + auto query_timer = update_timer; + bool doing_updates = true; + for (long i = 0; i < edgecount; i++) { + // Read an update from the stream and have the input node process it + GraphUpdate operation = stream.get_edge(); + if (operation.type == 2) { // 2 is the symbol for queries + unlikely_if (doing_updates) { + total_update_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - update_timer).count(); + doing_updates = false; + query_timer = std::chrono::high_resolution_clock::now(); + } + gt.is_connected(operation.edge.src, operation.edge.dst); + } else { + unlikely_if (!doing_updates) { + total_query_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - query_timer).count(); + doing_updates = true; + update_timer = std::chrono::high_resolution_clock::now(); + } + gt.update(operation); + } + unlikely_if(i%1000000 == 0 || i == edgecount-1) { + std::cout << "FINISHED OPERATION " << i << " OUT OF " << edgecount << " IN " << stream_file << std::endl; + } + } + if (doing_updates) { + total_update_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - update_timer).count(); + } else { + total_query_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - query_timer).count(); + } + + std::cout << "Total update time(ms): " << (total_update_time/1000) << std::endl; + std::cout << "Total query time(ms): " << (total_query_time/1000) << std::endl; + + std::ofstream file; + std::string out_file = "./../results/gibbs_speed_results/" + stream_file.substr(stream_file.find("/") + 1) + ".txt"; + std::cout << "WRITING RESULTS TO " << out_file << std::endl; + file.open (out_file, std::ios_base::app); + file << " UPDATES/SECOND: " << ((long)(0.9*edgecount))/(1 + total_update_time/1000)*1000 << std::endl; + file << " QUERIES/SECOND: " << ((long)(0.1*edgecount))/(1 + total_query_time/1000)*1000 << std::endl; + file.close(); +} + TEST(GraphTiersSuite, mini_correctness_test) { + node_id_t numnodes = 10; - GraphTiers gt(numnodes); - MatGraphVerifier gv(numnodes); + height_factor = 1 / log2(log2(numnodes)); + sketch_len = Sketch::calc_vector_length(numnodes); + sketch_err = DEFAULT_SKETCH_ERR; + + std::random_device dev; + std::mt19937 rng(dev()); + std::uniform_int_distribution dist(0,MAX_INT); + uint64_t seed = dist(rng); + GraphTierSystem gt(numnodes, seed); + gt.initialize_all_nodes(); + GraphVerifier gv(numnodes); // Link all of the nodes into 1 connected component for (node_id_t i = 0; i < numnodes-1; i++) { gt.update({{i, i+1}, INSERT}); - gv.edge_update(i,i+1); - std::vector> cc = gt.get_cc(); - try { - gv.reset_cc_state(); - gv.verify_soln(cc); - } catch (IncorrectCCException& e) { - std::cout << "Incorrect cc found after linking nodes " << i << " and " << i+1 << std::endl; - std::cout << "GOT: " << cc.size() << " components, EXPECTED: " << numnodes-i-1 << " components" << std::endl; - FAIL(); + gv.edge_update({i, i + 1}); + if (i % 3 == 0) { + std::vector> cc = gt.get_cc(); + try { + // gv.reset_cc_state(); + gv.verify_cc_from_component_set(cc); + } catch (IncorrectCCException& e) { + std::cout << "Incorrect cc found after linking nodes " << i << " and " << i + 1 << std::endl; + std::cout << "GOT: " << cc.size() << " components, EXPECTED: " << numnodes - i - 1 << " components" << std::endl; + FAIL(); + } } } // One by one cut all of the nodes into singletons for (node_id_t i = 0; i < numnodes-1; i++) { gt.update({{i, i+1}, DELETE}); - gv.edge_update(i,i+1); - std::vector> cc = gt.get_cc(); - try { - gv.reset_cc_state(); - gv.verify_soln(cc); - } catch (IncorrectCCException& e) { - std::cout << "Incorrect cc found after cutting nodes " << i << " and " << i+1 << std::endl; - std::cout << "GOT: " << cc.size() << " components, EXPECTED: " << i+2 << " components" << std::endl; - FAIL(); + gv.edge_update({i,i+1}); + if (i % 3 == 0) { + std::vector> cc = gt.get_cc(); + try { + // gv.reset_cc_state(); + gv.verify_cc_from_component_set(cc); + } catch (IncorrectCCException& e) { + std::cout << "Incorrect cc found after cutting nodes " << i << " and " << i + 1 << std::endl; + std::cout << "GOT: " << cc.size() << " components, EXPECTED: " << i + 2 << " components" << std::endl; + FAIL(); + } } } } TEST(GraphTiersSuite, deletion_replace_correctness_test) { node_id_t numnodes = 50; - GraphTiers gt(numnodes); - MatGraphVerifier gv(numnodes); + std::random_device dev; + std::mt19937 rng(dev()); + std::uniform_int_distribution dist(0,MAX_INT); + uint64_t seed = dist(rng); + GraphTierSystem gt(numnodes, seed); + gt.initialize_all_nodes(); + GraphVerifier gv(numnodes); // Link all of the nodes into 1 connected component for (node_id_t i = 0; i < numnodes-1; i++) { gt.update({{i, i+1}, INSERT}); - gv.edge_update(i,i+1); + gv.edge_update({i,i+1}); std::vector> cc = gt.get_cc(); try { - gv.reset_cc_state(); - gv.verify_soln(cc); + // gv.reset_cc_state(); + gv.verify_cc_from_component_set(cc); } catch (IncorrectCCException& e) { std::cout << "Incorrect cc found after linking nodes " << i << " and " << i+1 << std::endl; std::cout << "GOT: " << cc.size() << " components, EXPECTED: " << numnodes-i-1 << " components" << std::endl; @@ -93,19 +176,19 @@ TEST(GraphTiersSuite, deletion_replace_correctness_test) { second = rand() % numnodes; gt.update({{first, second}, INSERT}); - gv.edge_update(first, second); + gv.edge_update({first, second}); node_id_t distance = std::max(first, second) - std::min(first, second); // Cut a random edge first = std::min(first, second) + rand() % (distance-1); gt.update({{first, first+1}, DELETE}); - gv.edge_update(first, first+1); + gv.edge_update({first, first+1}); std::vector> cc = gt.get_cc(); try { - gv.reset_cc_state(); - gv.verify_soln(cc); + // gv.reset_cc_state(); + gv.verify_cc_from_component_set(cc); } catch (IncorrectCCException& e) { std::cout << "Incorrect cc found after cutting nodes " << first << " and " << first+1 << std::endl; std::cout << "GOT: " << cc.size() << " components, EXPECTED: 1 components" << std::endl; @@ -115,7 +198,7 @@ TEST(GraphTiersSuite, deletion_replace_correctness_test) { } TEST(GraphTiersSuite, omp_correctness_test) { - omp_set_dynamic(1); + // omp_set_dynamic(1); try { BinaryGraphStream stream(stream_file, 100000); @@ -123,25 +206,31 @@ TEST(GraphTiersSuite, omp_correctness_test) { sketch_len = Sketch::calc_vector_length(stream.nodes()); sketch_err = DEFAULT_SKETCH_ERR; - GraphTiers gt(stream.nodes()); + std::random_device dev; + std::mt19937 rng(dev()); + std::uniform_int_distribution dist(0,MAX_INT); + uint64_t seed = dist(rng); + GraphTierSystem gt(stream.nodes(), seed); + gt.initialize_all_nodes(); int edgecount = stream.edges(); edgecount = 1000000; - MatGraphVerifier gv(stream.nodes()); + GraphVerifier gv(stream.nodes()); start = std::chrono::high_resolution_clock::now(); for (int i = 0; i < edgecount; i++) { GraphUpdate update = stream.get_edge(); gt.update(update); - gv.edge_update(update.edge.src, update.edge.dst); + gv.edge_update(update.edge); unlikely_if(i%1000 == 0 || i == edgecount-1) { std::vector> cc = gt.get_cc(); try { - gv.reset_cc_state(); - gv.verify_soln(cc); + // gv.reset_cc_state(); + gv.verify_cc_from_component_set(cc); std::cout << "Update " << i << ", CCs correct." << std::endl; } catch (IncorrectCCException& e) { std::cout << "Incorrect connected components found at update " << i << std::endl; std::cout << "GOT: " << cc.size() << std::endl; + std::cout << "EXPECTED: " << gv.get_num_kruskal_ccs() << std::endl; FAIL(); } } @@ -157,16 +246,22 @@ TEST(GraphTiersSuite, omp_correctness_test) { } TEST(GraphTiersSuite, omp_speed_test) { - omp_set_dynamic(1); + // omp_set_dynamic(1); try { long time = 0; BinaryGraphStream stream(stream_file, 100000); - height_factor = 1./log2(log2(stream.nodes())); + // height_factor = 1;//1./log2(log2(stream.nodes())); + height_factor = 1/log2(log2(stream.nodes())); sketch_len = Sketch::calc_vector_length(stream.nodes()); sketch_err = DEFAULT_SKETCH_ERR; - GraphTiers gt(stream.nodes()); + std::random_device dev; + std::mt19937 rng(dev()); + std::uniform_int_distribution dist(0,MAX_INT); + uint64_t seed = dist(rng); + GraphTierSystem gt(stream.nodes(), seed); + gt.initialize_all_nodes(); int edgecount = stream.edges(); start = std::chrono::high_resolution_clock::now(); @@ -174,7 +269,7 @@ TEST(GraphTiersSuite, omp_speed_test) { for (int i = 0; i < edgecount; i++) { GraphUpdate update = stream.get_edge(); gt.update(update); - unlikely_if (i % 100000 == 0) { + unlikely_if (i % 1000000000 == 0) { auto stop = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(stop - start); std::cout << "FINISHED UPDATE " << i << " OUT OF " << edgecount << " IN " << stream_file << std::endl; @@ -193,7 +288,7 @@ TEST(GraphTiersSuite, omp_speed_test) { } TEST(GraphTiersSuite, query_speed_test) { - omp_set_dynamic(1); + // omp_set_dynamic(1); try { BinaryGraphStream stream(stream_file, 100000); @@ -203,7 +298,13 @@ TEST(GraphTiersSuite, query_speed_test) { sketch_err = DEFAULT_SKETCH_ERR; int nodecount = stream.nodes(); - GraphTiers gt(nodecount); + + std::random_device dev; + std::mt19937 rng(dev()); + std::uniform_int_distribution dist(0,MAX_INT); + uint64_t sketch_seed = dist(rng); + GraphTierSystem gt(nodecount, sketch_seed); + gt.initialize_all_nodes(); int edgecount = 150000; std::cout << "Building up graph..." << std::endl; diff --git a/test/hybrid_shmem_test_runner.cpp b/test/hybrid_shmem_test_runner.cpp new file mode 100644 index 0000000..d3a36ea --- /dev/null +++ b/test/hybrid_shmem_test_runner.cpp @@ -0,0 +1,14 @@ +#include +#include +#include "util.h" + + +std::string stream_file; + +int main(int argc, char** argv) { + if (argc > 1) + stream_file = argv[1]; + testing::InitGoogleTest(&argc, argv); + int ret = RUN_ALL_TESTS(); + return ret; +} diff --git a/test/hybrid_shmem_tests.cpp b/test/hybrid_shmem_tests.cpp new file mode 100644 index 0000000..fe4614f --- /dev/null +++ b/test/hybrid_shmem_tests.cpp @@ -0,0 +1,519 @@ +#include +#include +#include +#include +#include +#include +#include +#include "graph_tiers.h" +#include "batch_tiers.h" +#include "binary_graph_stream.h" +// #include "mat_graph_verifier.h" +#include "graph_verifier.h" +#include "mpi_hybrid_conn.h" +#include "util.h" + +const vec_t DEFAULT_SKETCH_ERR = 1; + + +size_t update_batch_size = 200; + +static uint32_t compute_num_tiers(node_id_t node_count) { + if (node_count <= 100) { + return 5; + } + const double numerator = log2(static_cast(node_count)); + //const double denominator = log2(3.0) - 1.0; + // const double denominator=0.6; + const double denominator = 1.4; + auto tiers = static_cast(numerator / denominator); + return std::max(5, tiers); +} + +// using GraphTierSystem = GraphTiers; +using GraphTierSystem = BatchTiers; + +auto start = std::chrono::high_resolution_clock::now(); +auto stop = std::chrono::high_resolution_clock::now(); +auto duration = std::chrono::duration_cast(stop - start); + +static void print_metrics() { + stop = std::chrono::high_resolution_clock::now(); + duration = std::chrono::duration_cast(stop - start); + std::cout << "\nTotal time for all updates performed (ms): " << duration.count() << std::endl; + std::cout << "\tTotal time in Sketch update (ms): " << sketch_time/1000 << std::endl; + std::cout << "\tTotal time in Refresh function (ms): " << refresh_time/1000 << std::endl; + std::cout << "\t\tTime in Parallel isolated checking (ms): " << parallel_isolated_check/1000 << std::endl; + std::cout << "\t\tTime in Sketch queries (ms): " << sketch_query/1000 << std::endl; + std::cout << "\t\tTime in LCT operations (ms): " << lct_time/1000 << std::endl; + std::cout << "\t\tTime in ETT operations (ms): " << (ett_time+ett_find_root+ett_get_agg)/1000 << std::endl; + std::cout << "\t\t\tETT Split and Join (ms): " << ett_time/1000 << std::endl; + std::cout << "\t\t\tETT Find Tree Root (ms): " << ett_find_root/1000 << std::endl; + std::cout << "\t\t\tETT Get Aggregate (ms): " << ett_get_agg/1000 << std::endl; + std::cout << "Total number of tiers grown: " << tiers_grown << std::endl; + std::cout << "Total number of normal refreshes: " << normal_refreshes << std::endl; +} + +TEST(HybridGraphTiersSuite, gibbs_mixed_speed_test) { + BinaryGraphStream stream(stream_file, 100000); + long edgecount = stream.edges(); + // height_factor = 1;//1./log2(log2(stream.nodes())); + height_factor = 1/log2(log2(stream.nodes())); + sketch_len = Sketch::calc_vector_length(stream.nodes()); + sketch_err = DEFAULT_SKETCH_ERR; + std::random_device dev; + std::mt19937 rng(dev()); + std::uniform_int_distribution dist(0,MAX_INT); + uint64_t seed = dist(rng); + // GraphTierSystem gt(stream.nodes(), seed); + // HybridConnectivityManager + uint32_t num_tiers = log2(stream.nodes())/(log2(3)-1); + HybridConnectivityManager hybrid_driver( + stream.nodes(), num_tiers, update_batch_size, seed + ); + + long total_update_time = 0; + long total_query_time = 0; + auto update_timer = std::chrono::high_resolution_clock::now(); + auto query_timer = update_timer; + bool doing_updates = true; + for (long i = 0; i < edgecount; i++) { + // Read an update from the stream and have the input node process it + GraphUpdate operation = stream.get_edge(); + if (operation.type == 2) { // 2 is the symbol for queries + unlikely_if (doing_updates) { + total_update_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - update_timer).count(); + doing_updates = false; + query_timer = std::chrono::high_resolution_clock::now(); + } + hybrid_driver.connectivity_query(operation.edge.src, operation.edge.dst); + } else { + unlikely_if (!doing_updates) { + total_query_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - query_timer).count(); + doing_updates = true; + update_timer = std::chrono::high_resolution_clock::now(); + } + hybrid_driver.update(operation); + } + unlikely_if(i%1000000 == 0 || i == edgecount-1) { + std::cout << "FINISHED OPERATION " << i << " OUT OF " << edgecount << " IN " << stream_file << std::endl; + std::cout << "Sketched nodes: " << hybrid_driver.sketched_node_count() << " out of " << stream.nodes() << std::endl; + // std::cout << "- Space usage of CF: " << hybrid_driver.get_space_usage_cf()/(1024*1024) << " MB" << std::endl; + // std::cout << "- Space usage of Driver: " << hybrid_driver.get_space_usage_driver()/(1024*1024) << " MB" << std::endl; + // std::cout << "- Space usage of Sketches: " << hybrid_driver.space_usage_conn_sketch()/(1024*1024) << " MB" << std::endl; + // std::cout << "- Space usage of Recovery Sketches: " << hybrid_driver.space_usage_recovery_sketch()/(1024*1024) << " MB" << std::endl; + std::cout << "- Total edges: " << hybrid_driver.total_edges() << std::endl; + std::cout << "- Sketched edges: " << hybrid_driver.num_sketched_edges() << std::endl; + double percent_sketched = 100.0 * ((double)hybrid_driver.num_sketched_edges()) / ((double)hybrid_driver.total_edges()); + std::cout << "- Percent sketched edges: " << percent_sketched << "%" << std::endl; + } + } + if (doing_updates) { + total_update_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - update_timer).count(); + } else { + total_query_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - query_timer).count(); + } + + std::cout << "Total update time(ms): " << (total_update_time/1000) << std::endl; + std::cout << "Total query time(ms): " << (total_query_time/1000) << std::endl; + + std::ofstream file; + std::string out_file = "./../results/gibbs_speed_results/" + stream_file.substr(stream_file.find("/") + 1) + ".txt"; + std::cout << "WRITING RESULTS TO " << out_file << std::endl; + file.open (out_file, std::ios_base::app); + file << " UPDATES/SECOND: " << ((long)(0.9*edgecount))/(1 + total_update_time/1000)*1000 << std::endl; + file << " QUERIES/SECOND: " << ((long)(0.1*edgecount))/(1 + total_query_time/1000)*1000 << std::endl; + file.close(); +} +TEST(HybridGraphTiersSuite, sparse_only_speed_test) { + BinaryGraphStream stream(stream_file, 100000); + long edgecount = stream.edges(); + // height_factor = 1;//1./log2(log2(stream.nodes())); + height_factor = 1/log2(log2(stream.nodes())); + sketch_len = Sketch::calc_vector_length(stream.nodes()); + sketch_err = DEFAULT_SKETCH_ERR; + std::random_device dev; + std::mt19937 rng(dev()); + std::uniform_int_distribution dist(0,MAX_INT); + uint64_t seed = dist(rng); + // GraphTierSystem gt(stream.nodes(), seed); + // HybridConnectivityManager + uint32_t num_tiers = log2(stream.nodes())/(log2(3)-1); + SCCWN<> cf_algo(stream.nodes()); + + long total_update_time = 0; + long total_query_time = 0; + auto update_timer = std::chrono::high_resolution_clock::now(); + auto query_timer = update_timer; + bool doing_updates = true; + for (long i = 0; i < edgecount; i++) { + // Read an update from the stream and have the input node process it + GraphUpdate operation = stream.get_edge(); + if (operation.type == 2) { // 2 is the symbol for queries + unlikely_if (doing_updates) { + total_update_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - update_timer).count(); + doing_updates = false; + query_timer = std::chrono::high_resolution_clock::now(); + } + cf_algo.is_connected(operation.edge.src, operation.edge.dst); + } else { + unlikely_if (!doing_updates) { + total_query_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - query_timer).count(); + doing_updates = true; + update_timer = std::chrono::high_resolution_clock::now(); + } + if (operation.type == INSERT) { + cf_algo.insert(operation.edge.src, operation.edge.dst); + } else { + cf_algo.remove(operation.edge.src, operation.edge.dst); + } + } + unlikely_if(i%1000000 == 0 || i == edgecount-1) { + std::cout << "FINISHED OPERATION " << i << " OUT OF " << edgecount << " IN " << stream_file << std::endl; + if (i%20000000 == 0 || i == edgecount-1) { + std::cout << "- Space usage of CF: " << cf_algo.getMemUsage()/(1024*1024) << " MB" << std::endl; + } + } + } + if (doing_updates) { + total_update_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - update_timer).count(); + } else { + total_query_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - query_timer).count(); + } + std::cout << "Total update time(ms): " << (total_update_time/1000) << std::endl; + std::cout << "Total query time(ms): " << (total_query_time/1000) << std::endl; + + std::ofstream file; + std::string out_file = "./../results/gibbs_speed_results/" + stream_file.substr(stream_file.find("/") + 1) + ".txt"; + std::cout << "WRITING RESULTS TO " << out_file << std::endl; + file.open (out_file, std::ios_base::app); + file << " UPDATES/SECOND: " << ((long)(0.9*edgecount))/(1 + total_update_time/1000)*1000 << std::endl; + file << " QUERIES/SECOND: " << ((long)(0.1*edgecount))/(1 + total_query_time/1000)*1000 << std::endl; + file.close(); +} + +TEST(HybridGraphTiersSuite, hybrid_memory_test) { + BinaryGraphStream stream(stream_file, 100000); + long edgecount = stream.edges(); + // height_factor = 1;//1./log2(log2(stream.nodes())); + height_factor = 1/log2(log2(stream.nodes())); + sketch_len = Sketch::calc_vector_length(stream.nodes()); + sketch_err = DEFAULT_SKETCH_ERR; + std::random_device dev; + std::mt19937 rng(dev()); + std::uniform_int_distribution dist(0,MAX_INT); + uint64_t seed = dist(rng); + // GraphTierSystem gt(stream.nodes(), seed); + // HybridConnectivityManager + uint32_t num_tiers = log2(stream.nodes())/(log2(3)-1); + HybridConnectivityManager hybrid_driver( + stream.nodes(), num_tiers, update_batch_size, seed + ); + + long total_update_time = 0; + long total_query_time = 0; + auto update_timer = std::chrono::high_resolution_clock::now(); + auto query_timer = update_timer; + bool doing_updates = true; + for (long i = 0; i < edgecount; i++) { + // Read an update from the stream and have the input node process it + GraphUpdate operation = stream.get_edge(); + if (operation.type == 2) { // 2 is the symbol for queries + unlikely_if (doing_updates) { + total_update_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - update_timer).count(); + doing_updates = false; + query_timer = std::chrono::high_resolution_clock::now(); + } + hybrid_driver.connectivity_query(operation.edge.src, operation.edge.dst); + } else { + unlikely_if (!doing_updates) { + total_query_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - query_timer).count(); + doing_updates = true; + update_timer = std::chrono::high_resolution_clock::now(); + } + hybrid_driver.update(operation); + } + unlikely_if(i%1000000 == 0 || i == edgecount-1) { + std::cout << "FINISHED OPERATION " << i << " OUT OF " << edgecount << " IN " << stream_file << std::endl; + if (i%20000000 == 0 || i == edgecount-1) { + std::cout << "Sketched nodes: " << hybrid_driver.sketched_node_count() << " out of " << stream.nodes() << std::endl; + std::cout << "- Space usage of CF: " << hybrid_driver.get_space_usage_cf()/(1024*1024) << " MB" << std::endl; + std::cout << "- Space usage of Driver: " << hybrid_driver.get_space_usage_driver()/(1024*1024) << " MB" << std::endl; + std::cout << "- Space usage of Sketches: " << hybrid_driver.space_usage_conn_sketch()/(1024*1024) << " MB" << std::endl; + std::cout << "- Space usage of Recovery Sketches: " << hybrid_driver.space_usage_recovery_sketch()/(1024*1024) << " MB" << std::endl; + std::cout << "- Total edges: " << hybrid_driver.total_edges() << std::endl; + std::cout << "- Sketched edges: " << hybrid_driver.num_sketched_edges() << std::endl; + double percent_sketched = 100.0 * ((double)hybrid_driver.num_sketched_edges()) / ((double)hybrid_driver.total_edges()); + std::cout << "- Percent sketched edges: " << percent_sketched << "%" << std::endl; + } + } + } + if (doing_updates) { + total_update_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - update_timer).count(); + } else { + total_query_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - query_timer).count(); + } + + std::cout << "Total update time(ms): " << (total_update_time/1000) << std::endl; + std::cout << "Total query time(ms): " << (total_query_time/1000) << std::endl; + + std::ofstream file; + std::string out_file = "./../results/gibbs_speed_results/" + stream_file.substr(stream_file.find("/") + 1) + ".txt"; + std::cout << "WRITING RESULTS TO " << out_file << std::endl; + file.open (out_file, std::ios_base::app); + file << " UPDATES/SECOND: " << ((long)(0.9*edgecount))/(1 + total_update_time/1000)*1000 << std::endl; + file << " QUERIES/SECOND: " << ((long)(0.1*edgecount))/(1 + total_query_time/1000)*1000 << std::endl; + file.close(); +} + +TEST(HybridGraphTiersSuite, mini_correctness_test) { + + node_id_t numnodes = 10; + height_factor = 1 / log2(log2(numnodes)); + sketch_len = Sketch::calc_vector_length(numnodes); + sketch_err = DEFAULT_SKETCH_ERR; + + std::random_device dev; + std::mt19937 rng(dev()); + std::uniform_int_distribution dist(0,MAX_INT); + uint64_t seed = dist(rng); + uint32_t num_tiers = compute_num_tiers(numnodes); + HybridConnectivityManager hybrid_driver( + numnodes, num_tiers, update_batch_size, seed + ); + GraphVerifier gv(numnodes); + + // Link all of the nodes into 1 connected component + for (node_id_t i = 0; i < numnodes-1; i++) { + hybrid_driver.update({{i, i+1}, INSERT}); + gv.edge_update({i, i + 1}); + if (i % 3 == 0) { + std::vector> cc = hybrid_driver.cc_query(); + try { + // gv.reset_cc_state(); + gv.verify_cc_from_component_set(cc); + } catch (IncorrectCCException& e) { + std::cout << "Incorrect cc found after linking nodes " << i << " and " << i + 1 << std::endl; + std::cout << "GOT: " << cc.size() << " components, EXPECTED: " << numnodes - i - 1 << " components" << std::endl; + FAIL(); + } + } + } + // One by one cut all of the nodes into singletons + for (node_id_t i = 0; i < numnodes-1; i++) { + hybrid_driver.update({{i, i+1}, DELETE}); + gv.edge_update({i,i+1}); + if (i % 3 == 0) { + std::vector> cc = hybrid_driver.cc_query(); + try { + // gv.reset_cc_state(); + gv.verify_cc_from_component_set(cc); + } catch (IncorrectCCException& e) { + std::cout << "Incorrect cc found after cutting nodes " << i << " and " << i + 1 << std::endl; + std::cout << "GOT: " << cc.size() << " components, EXPECTED: " << i + 2 << " components" << std::endl; + FAIL(); + } + } + } +} + +TEST(HybridGraphTiersSuite, deletion_replace_correctness_test) { + node_id_t numnodes = 50; + std::random_device dev; + std::mt19937 rng(dev()); + std::uniform_int_distribution dist(0,MAX_INT); + uint64_t seed = dist(rng); + uint32_t num_tiers = compute_num_tiers(numnodes); + HybridConnectivityManager hybrid_driver( + numnodes, num_tiers, update_batch_size, seed + ); + GraphVerifier gv(numnodes); + + // Link all of the nodes into 1 connected component + for (node_id_t i = 0; i < numnodes-1; i++) { + hybrid_driver.update({{i, i+1}, INSERT}); + gv.edge_update({i,i+1}); + std::vector> cc = hybrid_driver.cc_query(); + try { + // gv.reset_cc_state(); + gv.verify_cc_from_component_set(cc); + } catch (IncorrectCCException& e) { + std::cout << "Incorrect cc found after linking nodes " << i << " and " << i+1 << std::endl; + std::cout << "GOT: " << cc.size() << " components, EXPECTED: " << numnodes-i-1 << " components" << std::endl; + FAIL(); + } + } + // Generate a random bridge + node_id_t first = rand() % numnodes; + node_id_t second = rand() % numnodes; + while(first == second || second == first+1 || first == second+1) + second = rand() % numnodes; + + hybrid_driver.update({{first, second}, INSERT}); + gv.edge_update({first, second}); + + node_id_t distance = std::max(first, second) - std::min(first, second); + // Cut a random edge + first = std::min(first, second) + rand() % (distance-1); + + hybrid_driver.update({{first, first+1}, DELETE}); + gv.edge_update({first, first+1}); + + std::vector> cc = hybrid_driver.cc_query(); + try { + // gv.reset_cc_state(); + gv.verify_cc_from_component_set(cc); + } catch (IncorrectCCException& e) { + std::cout << "Incorrect cc found after cutting nodes " << first << " and " << first+1 << std::endl; + std::cout << "GOT: " << cc.size() << " components, EXPECTED: 1 components" << std::endl; + FAIL(); + } + +} + +TEST(HybridGraphTiersSuite, omp_correctness_test) { + // omp_set_dynamic(1); + try { + BinaryGraphStream stream(stream_file, 100000); + + height_factor = 1/log2(log2(stream.nodes())); + sketch_len = Sketch::calc_vector_length(stream.nodes()); + sketch_err = DEFAULT_SKETCH_ERR; + + std::random_device dev; + std::mt19937 rng(dev()); + std::uniform_int_distribution dist(0,MAX_INT); + uint64_t seed = dist(rng); + uint32_t num_tiers = compute_num_tiers(stream.nodes()); + HybridConnectivityManager hybrid_driver( + stream.nodes(), num_tiers, update_batch_size, seed + ); + int edgecount = stream.edges(); + edgecount = 1000000; + GraphVerifier gv(stream.nodes()); + start = std::chrono::high_resolution_clock::now(); + + for (int i = 0; i < edgecount; i++) { + GraphUpdate update = stream.get_edge(); + hybrid_driver.update(update); + gv.edge_update(update.edge); + unlikely_if(i%1000 == 0 || i == edgecount-1) { + std::vector> cc = hybrid_driver.cc_query(); + try { + // gv.reset_cc_state(); + gv.verify_cc_from_component_set(cc); + std::cout << "Update " << i << ", CCs correct." << std::endl; + } catch (IncorrectCCException& e) { + std::cout << "Incorrect connected components found at update " << i << std::endl; + std::cout << "GOT: " << cc.size() << std::endl; + std::cout << "EXPECTED: " << gv.get_num_kruskal_ccs() << std::endl; + FAIL(); + } + } + } + std::ofstream file; + file.open ("omp_kron_results.txt", std::ios_base::app); + file << stream_file << " passed correctness test." << std::endl; + file.close(); + + } catch (BadStreamException& e) { + std::cout << "ERROR: Stream binary file not found." << std::endl; + } +} + +TEST(HybridGraphTiersSuite, omp_speed_test) { + // omp_set_dynamic(1); + try { + long time = 0; + BinaryGraphStream stream(stream_file, 100000); + + // height_factor = 1;//1./log2(log2(stream.nodes())); + height_factor = 1/log2(log2(stream.nodes())); + sketch_len = Sketch::calc_vector_length(stream.nodes()); + sketch_err = DEFAULT_SKETCH_ERR; + + std::random_device dev; + std::mt19937 rng(dev()); + std::uniform_int_distribution dist(0,MAX_INT); + uint64_t seed = dist(rng); + uint32_t num_tiers = compute_num_tiers(stream.nodes()); + HybridConnectivityManager hybrid_driver( + stream.nodes(), num_tiers, update_batch_size, seed + ); + int edgecount = stream.edges(); + start = std::chrono::high_resolution_clock::now(); + + START(timer); + for (int i = 0; i < edgecount; i++) { + GraphUpdate update = stream.get_edge(); + hybrid_driver.update(update); + unlikely_if (i % 1000000000 == 0) { + auto stop = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(stop - start); + std::cout << "FINISHED UPDATE " << i << " OUT OF " << edgecount << " IN " << stream_file << std::endl; + } + } + STOP(time, timer); + print_metrics(); + std::ofstream file; + file.open ("omp_kron_results.txt", std::ios_base::app); + file << stream_file << " time (ms): "<< time/1000 << std::endl; + file.close(); + + } catch (BadStreamException& e) { + std::cout << "ERROR: Stream binary file not found." << std::endl; + } +} + +TEST(HybridGraphTiersSuite, query_speed_test) { + // omp_set_dynamic(1); + try { + + BinaryGraphStream stream(stream_file, 100000); + + height_factor = 1/log2(log2(stream.nodes())); + sketch_len = Sketch::calc_vector_length(stream.nodes()); + sketch_err = DEFAULT_SKETCH_ERR; + + int nodecount = stream.nodes(); + + std::random_device dev; + std::mt19937 rng(dev()); + std::uniform_int_distribution dist(0,MAX_INT); + uint64_t sketch_seed = dist(rng); + uint32_t num_tiers = compute_num_tiers(nodecount); + HybridConnectivityManager hybrid_driver( + nodecount, num_tiers, update_batch_size, sketch_seed + ); + int edgecount = 150000; + + std::cout << "Building up graph..." << std::endl; + for (int i = 0; i < edgecount; i++) { + GraphUpdate update = stream.get_edge(); + hybrid_driver.update(update); + } + + int querycount = 1000000; + int seed = time(NULL); + srand(seed); + std::cout << "Performing queries..." << std::endl; + auto start = std::chrono::high_resolution_clock::now(); + for (int i = 0; i < querycount; i++) { + hybrid_driver.connectivity_query(rand()%nodecount, rand()%nodecount); + } + auto stop = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(stop - start); + std::cout << querycount << " Connectivity Queries, Time: " << duration.count() << std::endl; + start = std::chrono::high_resolution_clock::now(); + for (int i = 0; i < querycount/100; i++) { + hybrid_driver.cc_query(); + } + stop = std::chrono::high_resolution_clock::now(); + duration = std::chrono::duration_cast(stop - start); + std::cout << querycount/100 << " Connected Components Queries, Time: " << duration.count() << std::endl; + + + } catch (BadStreamException& e) { + std::cout << "ERROR: Stream binary file not found." << std::endl; + } +} diff --git a/test/hybrid_test_runner.cpp b/test/hybrid_test_runner.cpp new file mode 100644 index 0000000..99a458e --- /dev/null +++ b/test/hybrid_test_runner.cpp @@ -0,0 +1,28 @@ +#include +#include +#include "util.h" + + +std::string stream_file; +int hybrid_threshold_arg; +int batch_size_arg; +double height_factor_arg; + +int main(int argc, char** argv) { + MPI_Init(&argc, &argv); + + if (argc < 5) { + std::cerr << "INCORRECT NUMBER OF ARGUMENTS." << std::endl; + return EXIT_FAILURE; + } + + stream_file = argv[1]; + batch_size_arg = atoi(argv[2]); + height_factor_arg = atof(argv[3]); + hybrid_threshold_arg = atoi(argv[4]); + + testing::InitGoogleTest(&argc, argv); + int ret = RUN_ALL_TESTS(); + MPI_Finalize(); + return ret; +} diff --git a/test/hybrid_tests.cpp b/test/hybrid_tests.cpp new file mode 100644 index 0000000..3dde5de --- /dev/null +++ b/test/hybrid_tests.cpp @@ -0,0 +1,855 @@ +#include +#include +#include +#include +#include +#include +#include +// #include +#include "mpi_nodes.h" +#include "binary_graph_stream.h" +// #include "mat_graph_verifier.h" +#include "graph_verifier.h" +#include "mpi_hybrid_conn.h" +#include "util.h" + + +const int DEFAULT_BATCH_SIZE = 100; +const int DEFAULT_HYBRID_THRESHOLD = 1400; +const vec_t DEFAULT_SKETCH_ERR = 1; + +// TEST(GraphTierSuite, hybrid_mixed_speed_test) { +// int world_rank_buf; +// MPI_Comm_rank(MPI_COMM_WORLD, &world_rank_buf); +// uint32_t world_rank = world_rank_buf; +// int world_size_buf; +// MPI_Comm_size(MPI_COMM_WORLD, &world_size_buf); +// uint32_t world_size = world_size_buf; + +// BinaryGraphStream stream(stream_file, 100000); +// uint32_t num_nodes = stream.nodes(); +// uint32_t num_tiers = log2(num_nodes)/(log2(3)-1); + +// // Parameters +// int update_batch_size = (batch_size_arg==0) ? DEFAULT_BATCH_SIZE : batch_size_arg; +// height_factor = (height_factor_arg==0) ? 1./log2(log2(num_nodes)) : height_factor_arg; +// sketchless_height_factor = height_factor; +// sketch_len = Sketch::calc_vector_length(num_nodes); +// sketch_err = DEFAULT_SKETCH_ERR; + +// std::cout << "BATCH SIZE: " << update_batch_size << " HEIGHT FACTOR " << height_factor << " SKETCH BUFFER: " << SKETCH_BUFFER_SIZE << std::endl; + +// // Seeds +// std::random_device dev; +// std::mt19937 rng(dev()); +// std::uniform_int_distribution dist(0,MAX_INT); +// int seed = dist(rng); +// bcast(&seed, sizeof(int), 0); +// std::cout << "SEED: " << seed << std::endl; +// rng.seed(seed); +// for (int i = 0; i < world_rank; i++) +// dist(rng); +// int tier_seed = dist(rng); + +// if (world_size != num_tiers+1) +// FAIL() << "MPI world size too small for graph with " << num_nodes << " vertices. Correct world size is: " << num_tiers+1; + +// if (world_rank == 0) { +// int seed = time(NULL); +// srand(seed); +// std::cout << "InputNode seed: " << seed << std::endl; +// InputNode input_node(num_nodes, num_tiers, update_batch_size, seed); +// long edgecount = stream.edges(); +// // long count = 100000000; +// // edgecount = std::min(edgecount, count); +// long total_update_time = 0; +// long total_query_time = 0; +// auto update_timer = std::chrono::high_resolution_clock::now(); +// auto query_timer = update_timer; +// bool doing_updates = true; +// for (long i = 0; i < edgecount; i++) { +// // Read an update from the stream and have the input node process it +// GraphUpdate operation = stream.get_edge(); +// if (operation.type == 2) { // 2 is the symbol for queries +// unlikely_if (doing_updates) { +// total_update_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - update_timer).count(); +// doing_updates = false; +// query_timer = std::chrono::high_resolution_clock::now(); +// } +// input_node.connectivity_query(operation.edge.src, operation.edge.dst); +// } else { +// unlikely_if (!doing_updates) { +// total_query_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - query_timer).count(); +// doing_updates = true; +// update_timer = std::chrono::high_resolution_clock::now(); +// } +// input_node.update(operation); +// } +// unlikely_if(i%1000000 == 0 || i == edgecount-1) { +// std::cout << "FINISHED OPERATION " << i << " OUT OF " << edgecount << " IN " << stream_file << std::endl; +// } +// } +// if (doing_updates) { +// total_update_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - update_timer).count(); +// } else { +// total_query_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - query_timer).count(); +// } +// // Communicate to all other nodes that the stream has ended +// input_node.end(); +// std::cout << "Total update time(ms): " << (total_update_time/1000) << std::endl; +// std::cout << "Total query time(ms): " << (total_query_time/1000) << std::endl; +// std::cout << "Total time(ms): " << (total_query_time + total_update_time)/1000 << std::endl; + +// std::ofstream file; +// std::string out_file = "./../results/mpi_speed_results/" + stream_file.substr(stream_file.find("/") + 1) + ".txt"; +// std::cout << "WRITING RESULTS TO " << out_file << std::endl; +// file.open (out_file, std::ios_base::app); +// file << " UPDATES/SECOND: " << (0.9*edgecount)/(total_update_time) << std::endl; +// file << " QUERIES/SECOND: " << (0.1*edgecount)/(total_query_time) << std::endl; +// file.close(); + +// } else if (world_rank < num_tiers+1) { +// int tier_num = world_rank-1; +// TierNode tier_node(num_nodes, tier_num, num_tiers, update_batch_size, tier_seed); +// tier_node.main(); +// } +// } + +TEST(GraphTierSuite, hybrid_update_speed_test) { + int world_rank_buf; + MPI_Comm_rank(MPI_COMM_WORLD, &world_rank_buf); + uint32_t world_rank = world_rank_buf; + int world_size_buf; + MPI_Comm_size(MPI_COMM_WORLD, &world_size_buf); + uint32_t world_size = world_size_buf; + + BinaryGraphStream stream(stream_file, 100000); + uint32_t num_nodes = stream.nodes(); + uint32_t num_tiers = log2(num_nodes)/(log2(3)-1); + + // Parameters + int update_batch_size = (batch_size_arg==0) ? DEFAULT_BATCH_SIZE : batch_size_arg; + int threshold = (batch_size_arg==0) ? DEFAULT_HYBRID_THRESHOLD : hybrid_threshold_arg; + height_factor = (height_factor_arg==0) ? 1./log2(log2(num_nodes)) : height_factor_arg; + sketchless_height_factor = height_factor; + sketch_len = Sketch::calc_vector_length(num_nodes); + sketch_err = DEFAULT_SKETCH_ERR; + + std::cout << "BATCH SIZE: " << update_batch_size << " HEIGHT FACTOR " << height_factor << std::endl; + + // Seeds + std::random_device dev; + std::mt19937 rng(dev()); + std::uniform_int_distribution dist(0,MAX_INT); + int seed = dist(rng); + bcast(&seed, sizeof(int), 0); + std::cout << "SEED: " << seed << std::endl; + rng.seed(seed); + for (int i = 0; i < world_rank; i++) + dist(rng); + int tier_seed = dist(rng); + + if (world_size != num_tiers+1) + FAIL() << "MPI world size too small for graph with " << num_nodes << " vertices. Correct world size is: " << num_tiers+1; + + if (world_rank == 0) { + int seed = time(NULL); + srand(seed); + std::cout << "InputNode seed: " << seed << std::endl; + // InputNode input_node(num_nodes, num_tiers, update_batch_size, seed); + HybridConnectivityManager<> hybrid_manager( + num_nodes, num_tiers, update_batch_size, seed + ); + hybrid_manager.set_threshold(threshold); + long edgecount = stream.edges(); + // long count = 100000000; + // edgecount = std::min(edgecount, count); + auto X = std::chrono::high_resolution_clock::now(); + for (long i = 0; i < edgecount; i++) { + // Read an update from the stream and have the input node process it + GraphUpdate update = stream.get_edge(); + hybrid_manager.update(update); + unlikely_if(i%1000000 == 0 || i == edgecount-1) { + std::cout << "FINISHED UPDATE " << i << " OUT OF " << edgecount << " IN " << stream_file << std::endl; + // std::cout << "Memory usage: " << hybrid_manager.cf_algo.getMemUsage() / 1000000 << std::endl; + std::cout << "Sketched nodes: " << hybrid_manager.num_sketched_vertices() << " out of " << num_nodes << std::endl; + } + } + // Communicate to all other nodes that the stream has ended + hybrid_manager.sketching_algo.end(); + auto time = std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - X).count(); + std::cout << "Total time(ms): " << (time/1000) << std::endl; + + std::ofstream file; + file.open ("./../results/mpi_update_results.txt", std::ios_base::app); + file << stream_file << " UPDATES/SECOND: " << edgecount/(time/1000)*1000 << std::endl; + file.close(); + + } else if (world_rank < num_tiers+1) { + int tier_num = world_rank-1; + TierNode tier_node(num_nodes, tier_num, num_tiers, update_batch_size, tier_seed); + tier_node.main(); + } +} + +TEST(GraphTiersSuite, hybrid_query_speed_test) { + int world_rank_buf; + MPI_Comm_rank(MPI_COMM_WORLD, &world_rank_buf); + uint32_t world_rank = world_rank_buf; + int world_size_buf; + MPI_Comm_size(MPI_COMM_WORLD, &world_size_buf); + uint32_t world_size = world_size_buf; + + BinaryGraphStream stream(stream_file, 1000000); + uint32_t num_nodes = stream.nodes(); + uint32_t num_tiers = log2(num_nodes)/(log2(3)-1); + int nodecount = stream.nodes(); + int edgecount = stream.edges(); + if (edgecount > 100000000) edgecount = 100000000; + + // Parameters + int update_batch_size = (batch_size_arg==0) ? DEFAULT_BATCH_SIZE : batch_size_arg; + int threshold = (batch_size_arg==0) ? DEFAULT_HYBRID_THRESHOLD : hybrid_threshold_arg; + height_factor = (height_factor_arg==0) ? 1./log2(log2(num_nodes)) : height_factor_arg; + sketchless_height_factor = height_factor; + sketch_len = Sketch::calc_vector_length(num_nodes); + sketch_err = DEFAULT_SKETCH_ERR; + + // Seeds + std::random_device dev; + std::mt19937 rng(dev()); + std::uniform_int_distribution dist(0,MAX_INT); + int seed = dist(rng); + bcast(&seed, sizeof(int), 0); + std::cout << "SEED: " << seed << std::endl; + rng.seed(seed); + for (int i = 0; i < world_rank; i++) + dist(rng); + int tier_seed = dist(rng); + + if (world_size != num_tiers+1) + FAIL() << "MPI world size too small for graph with " << num_nodes << " vertices. Correct world size is: " << num_tiers+1; + + if (world_rank == 0) { + int seed = time(NULL); + srand(seed); + std::cout << "InputNode seed: " << seed << std::endl; + // InputNode input_node(num_nodes, num_tiers, update_batch_size, seed); + HybridConnectivityManager hybrid_driver( + num_nodes, num_tiers, update_batch_size, seed + ); + hybrid_driver.set_threshold(threshold); + + long total_time = 0; + for (int batch = 0; batch < 10; batch++) { + std::cout << stream_file << " update batch " << batch << std::endl; + for (int i = 0; i < edgecount/10; i++) { + GraphUpdate update = stream.get_edge(); + hybrid_driver.update(update); + } + + long querycount = 100000000; + + std::cout << "Performing queries..." << std::endl; + auto X = std::chrono::high_resolution_clock::now(); + for (int i = 0; i < querycount; i++) { + hybrid_driver.connectivity_query(rand()%nodecount, rand()%nodecount); + } + auto time = std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - X).count(); + std::cout << querycount << " Connectivity Queries, Time (ms): " << time/1000 << std::endl; + total_time += time; + } + hybrid_driver.sketching_algo.end(); + + std::cout << "TOTAL TIME(ms): " << total_time/1000 << std::endl; + std::cout << "QUERIES/SECOND: " << 1000000000/(total_time/1000)*1000 << std::endl; + std::ofstream file; + file.open ("./../results/mpi_query_results.txt", std::ios_base::app); + file << stream_file << " QUERIES/SECOND: " << 1000000000/(total_time/1000)*1000 << std::endl; + file.close(); + + } else if (world_rank < num_tiers+1) { + int tier_num = world_rank-1; + TierNode tier_node(num_nodes, world_rank-1, num_tiers, update_batch_size, tier_seed); + tier_node.main(); + } +} + +TEST(GraphTierSuite, hybrid_memory_test) { + int world_rank_buf; + MPI_Comm_rank(MPI_COMM_WORLD, &world_rank_buf); + uint32_t world_rank = world_rank_buf; + int world_size_buf; + MPI_Comm_size(MPI_COMM_WORLD, &world_size_buf); + uint32_t world_size = world_size_buf; + + BinaryGraphStream stream(stream_file, 100000); + uint32_t num_nodes = stream.nodes(); + // uint32_t num_tiers = log2(num_nodes)/(log2(3)-1); + std::cout << "Theory-informed number of tiers: " << log2(num_nodes)/(log2(3)-1) << std::endl; + // TEMPORARY CHANGE - MAKE THE USER DECIDE WORLD SIZE + uint32_t num_tiers = world_size-1; + std::cout << "Using number of tiers: " << num_tiers << std::endl; + + // Parameters + int update_batch_size = (batch_size_arg==0) ? DEFAULT_BATCH_SIZE : batch_size_arg; + int threshold = (batch_size_arg==0) ? DEFAULT_HYBRID_THRESHOLD : hybrid_threshold_arg; + height_factor = (height_factor_arg==0) ? 1./log2(log2(num_nodes)) : height_factor_arg; + sketchless_height_factor = height_factor; + sketch_len = Sketch::calc_vector_length(num_nodes); + sketch_err = DEFAULT_SKETCH_ERR; + + std::cout << "BATCH SIZE: " << update_batch_size << " HEIGHT FACTOR " << height_factor << std::endl; + + // Seeds + std::random_device dev; + std::mt19937 rng(dev()); + std::uniform_int_distribution dist(0,MAX_INT); + int seed = dist(rng); + bcast(&seed, sizeof(int), 0); + std::cout << "SEED: " << seed << std::endl; + rng.seed(seed); + for (int i = 0; i < world_rank; i++) + dist(rng); + int tier_seed = dist(rng); + + if (world_size != num_tiers+1) + FAIL() << "MPI world size too small for graph with " << num_nodes << " vertices. Correct world size is: " << num_tiers+1; + + if (world_rank == 0) { + int seed = time(NULL); + srand(seed); + std::cout << "InputNode seed: " << seed << std::endl; + // InputNode input_node(num_nodes, num_tiers, update_batch_size, seed); + HybridConnectivityManager<> hybrid_manager( + num_nodes, num_tiers, update_batch_size, seed + ); + hybrid_manager.set_threshold(threshold); + long edgecount = stream.edges(); + // long count = 100000000; + // edgecount = std::min(edgecount, count); + auto X = std::chrono::high_resolution_clock::now(); + for (long i = 0; i < edgecount; i++) { + // Read an update from the stream and have the input node process it + GraphUpdate update = stream.get_edge(); + hybrid_manager.update(update); + unlikely_if(i%1000000 == 0 || i == edgecount-1) { + std::cout << "FINISHED UPDATE " << i << " OUT OF " << edgecount << " IN " << stream_file << std::endl; + // std::cout << "Memory usage: " << hybrid_manager.cf_algo.getMemUsage() / 1000000 << std::endl; + std::cout << "Sketched nodes: " << hybrid_manager.num_sketched_vertices() << " out of " << num_nodes << std::endl; + if (i%20000000 == 0 || i == edgecount-1) { + std::cout << "Sketched nodes: " << hybrid_manager.sketched_node_count() << " out of " << stream.nodes() << std::endl; + std::cout << "- Space usage of CF: " << hybrid_manager.get_space_usage_cf()/(1024*1024) << " MB" << std::endl; + std::cout << "- Space usage of Driver: " << hybrid_manager.get_space_usage_driver()/(1024*1024) << " MB" << std::endl; + std::cout << "- Space usage of Sketches: " << hybrid_manager.space_usage_conn_sketch()/(1024*1024) << " MB" << std::endl; + std::cout << "- Space usage of Recovery Sketches: " << hybrid_manager.space_usage_recovery_sketch()/(1024*1024) << " MB" << std::endl; + std::cout << "- Total edges: " << hybrid_manager.total_edges() << std::endl; + std::cout << "- Sketched edges: " << hybrid_manager.num_sketched_edges() << std::endl; + double percent_sketched = 100.0 * ((double)hybrid_manager.num_sketched_edges()) / ((double)hybrid_manager.total_edges()); + std::cout << "- Percent sketched edges: " << percent_sketched << "%" << std::endl; + } + } + } + // Communicate to all other nodes that the stream has ended + hybrid_manager.sketching_algo.end(); + auto time = std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - X).count(); + std::cout << "Total time(ms): " << (time/1000) << std::endl; + + std::ofstream file; + file.open ("./../results/mpi_update_results.txt", std::ios_base::app); + file << stream_file << " UPDATES/SECOND: " << edgecount/(time/1000)*1000 << std::endl; + file.close(); + + } else if (world_rank < num_tiers+1) { + int tier_num = world_rank-1; + TierNode tier_node(num_nodes, tier_num, num_tiers, update_batch_size, tier_seed); + tier_node.main(); + } +} + +TEST(GraphTiersSuite, hybrid_mini_correctness_test) { + int world_rank_buf; + MPI_Comm_rank(MPI_COMM_WORLD, &world_rank_buf); + uint32_t world_rank = world_rank_buf; + int world_size_buf; + MPI_Comm_size(MPI_COMM_WORLD, &world_size_buf); + uint32_t world_size = world_size_buf; + + uint32_t num_nodes = 100; + uint32_t num_tiers = log2(num_nodes)/(log2(3)-1); + if (world_size != num_tiers+1) + FAIL() << "MPI world size too small for graph with " << num_nodes << " vertices. Correct world size is: " << num_tiers+1; + // Parameters + int update_batch_size = 1; + height_factor = 1; + sketch_len = Sketch::calc_vector_length(num_nodes); + sketch_err = DEFAULT_SKETCH_ERR; + + // Seeds + std::random_device dev; + std::mt19937 rng(dev()); + std::uniform_int_distribution dist(0,MAX_INT); + int seed = dist(rng); + bcast(&seed, sizeof(int), 0); + std::cout << "SEED: " << seed << std::endl; + rng.seed(seed); + for (int i = 0; i < world_rank; i++) + dist(rng); + int tier_seed = dist(rng); + + if (world_rank == 0) { + int seed = time(NULL); + srand(seed); + std::cout << "InputNode seed: " << seed << std::endl; + // InputNode input_node(num_nodes, num_tiers, update_batch_size, seed); + // + HybridConnectivityManager hybrid_driver( + num_nodes, num_tiers, update_batch_size, seed + ); + GraphVerifier gv(num_nodes); + // Link all of the nodes into 1 connected component + for (node_id_t i = 0; i < num_nodes-1; i++) { + hybrid_driver.update({{i, i+1}, INSERT}); + gv.edge_update({i,i+1}); + std::cout << "Attempting query" << std::endl; + std::vector> cc = hybrid_driver.cc_query(); + try { + // gv.reset_cc_state(); + gv.verify_cc_from_component_set(cc); + } catch (IncorrectCCException& e) { + std::cout << "Incorrect cc found after linking nodes " << i << " and " << i+1 << std::endl; + std::cout << "GOT: " << cc.size() << " components, EXPECTED: " << num_nodes-i-1 << " components" << std::endl; + FAIL(); + } + } + // One by one cut all of the nodes into singletons + for (node_id_t i = 0; i < num_nodes-1; i++) { + hybrid_driver.update({{i, i+1}, DELETE}); + gv.edge_update({i,i+1}); + std::vector> cc = hybrid_driver.cc_query(); + try { + // gv.reset_cc_state(); + gv.verify_cc_from_component_set(cc); + } catch (IncorrectCCException& e) { + std::cout << "Incorrect cc found after cutting nodes " << i << " and " << i+1 << std::endl; + std::cout << "GOT: " << cc.size() << " components, EXPECTED: " << i+2 << " components" << std::endl; + FAIL(); + } + } + // Communicate to all other nodes that the stream has ended + hybrid_driver.sketching_algo.end(); + } else if (world_rank < num_tiers+1) { + int tier_num = world_rank-1; + TierNode tier_node(num_nodes, tier_num, num_tiers, update_batch_size, tier_seed); + tier_node.main(); + } +} + +TEST(GraphTiersSuite, hybrid_small_correctness_test) { + int world_rank_buf; + MPI_Comm_rank(MPI_COMM_WORLD, &world_rank_buf); + uint32_t world_rank = world_rank_buf; + int world_size_buf; + MPI_Comm_size(MPI_COMM_WORLD, &world_size_buf); + uint32_t world_size = world_size_buf; + + uint32_t num_nodes = 512; + + uint32_t num_tiers = log2(num_nodes)/(log2(3)-1); + if (world_size != num_tiers+1) + FAIL() << "MPI world size too small for graph with " << num_nodes << " vertices. Correct world size is: " << num_tiers+1; + // Parameters + int update_batch_size = 1; + height_factor = 1; + sketch_len = Sketch::calc_vector_length(num_nodes); + sketch_err = DEFAULT_SKETCH_ERR; + + // Seeds + std::random_device dev; + std::mt19937 rng(dev()); + std::uniform_int_distribution dist(0,MAX_INT); + int seed = dist(rng); + bcast(&seed, sizeof(int), 0); + std::cout << "SEED: " << seed << std::endl; + rng.seed(seed); + for (int i = 0; i < world_rank; i++) + dist(rng); + int tier_seed = dist(rng); + + if (world_rank == 0) { + int seed = time(NULL); + srand(seed); + std::cout << "InputNode seed: " << seed << std::endl; + // InputNode input_node(num_nodes, num_tiers, update_batch_size, seed); + // + HybridConnectivityManager hybrid_driver( + num_nodes, num_tiers, update_batch_size, seed + ); + hybrid_driver.set_threshold(10); + GraphVerifier gv(num_nodes); + // Link all of the nodes into 1 connected component + for (node_id_t i = 0; i < num_nodes-1; i++) { + hybrid_driver.update({{i, i+1}, INSERT}); + gv.edge_update({i,i+1}); + // std::cout << "Attempting query" << std::endl; + std::vector> cc = hybrid_driver.cc_query(); + try { + // gv.reset_cc_state(); + gv.verify_cc_from_component_set(cc); + } catch (IncorrectCCException& e) { + std::cout << "Incorrect cc found after linking nodes " << i << " and " << i+1 << std::endl; + std::cout << "GOT: " << cc.size() << " components, EXPECTED: " << num_nodes-i-1 << " components" << std::endl; + FAIL(); + } + } + // augment first few nodes so that they are hubs for the first half of the nodes. + node_id_t hub_nodes = 25; + for (node_id_t i=0; i < hub_nodes; i++) { + // don't insert any edges that already exist: + for (node_id_t j = hub_nodes+2; j < num_nodes/2; j++) { + hybrid_driver.update({{i, j}, INSERT}); + gv.edge_update({i,j}); + } + std::vector> cc = hybrid_driver.cc_query(); + try { + // gv.reset_cc_state(); + gv.verify_cc_from_component_set(cc); + } catch (IncorrectCCException& e) { + std::cout << "Incorrect cc found after cutting nodes " << i << " and " << i+1 << std::endl; + std::cout << "GOT: " << cc.size() << " components, EXPECTED: " << i+2 << " components" << std::endl; + FAIL(); + } + } + std::cout << "Number of sketched nodes: " << hybrid_driver.num_sketched_vertices() << std::endl; + for (node_id_t i=0; i < hub_nodes; i++) { + for (node_id_t j = hub_nodes+2; j < num_nodes/2; j++) { + hybrid_driver.update({{i, j}, DELETE}); + gv.edge_update({i,j}); + } + std::vector> cc = hybrid_driver.cc_query(); + try { + // gv.reset_cc_state(); + gv.verify_cc_from_component_set(cc); + } catch (IncorrectCCException& e) { + std::cout << "Incorrect cc found after cutting nodes " << i << " and " << i+1 << std::endl; + std::cout << "GOT: " << cc.size() << " components, EXPECTED: " << i+2 << " components" << std::endl; + FAIL(); + } + } + std::cout << "Number of sketched nodes: " << hybrid_driver.num_sketched_vertices() << std::endl; + + // One by one cut all of the nodes into singletons + for (node_id_t i = 0; i < num_nodes-1; i++) { + hybrid_driver.update({{i, i+1}, DELETE}); + gv.edge_update({i,i+1}); + std::vector> cc = hybrid_driver.cc_query(); + try { + // gv.reset_cc_state(); + gv.verify_cc_from_component_set(cc); + } catch (IncorrectCCException& e) { + std::cout << "Incorrect cc found after cutting nodes " << i << " and " << i+1 << std::endl; + std::cout << "GOT: " << cc.size() << " components, EXPECTED: " << i+2 << " components" << std::endl; + FAIL(); + } + } + // Communicate to all other nodes that the stream has ended + hybrid_driver.sketching_algo.end(); + } else if (world_rank < num_tiers+1) { + int tier_num = world_rank-1; + TierNode tier_node(num_nodes, tier_num, num_tiers, update_batch_size, tier_seed); + tier_node.main(); + } +} + +// TEST(GraphTiersSuite, hybrid_mini_replacement_test) { +// int world_rank_buf; +// MPI_Comm_rank(MPI_COMM_WORLD, &world_rank_buf); +// uint32_t world_rank = world_rank_buf; +// int world_size_buf; +// MPI_Comm_size(MPI_COMM_WORLD, &world_size_buf); +// uint32_t world_size = world_size_buf; + +// uint32_t num_nodes = 100; +// uint32_t num_tiers = log2(num_nodes)/(log2(3)-1); +// if (world_size != num_tiers+1) +// FAIL() << "MPI world size too small for graph with " << num_nodes << " vertices. Correct world size is: " << num_tiers+1; +// // Parameters +// int update_batch_size = 1; +// height_factor = 1; +// sketch_len = Sketch::calc_vector_length(num_nodes); +// sketch_err = DEFAULT_SKETCH_ERR; + +// // Seeds +// std::random_device dev; +// std::mt19937 rng(dev()); +// std::uniform_int_distribution dist(0,MAX_INT); +// int seed = dist(rng); +// bcast(&seed, sizeof(int), 0); +// std::cout << "SEED: " << seed << std::endl; +// rng.seed(seed); +// for (int i = 0; i < world_rank; i++) +// dist(rng); +// int tier_seed = dist(rng); + +// if (world_rank == 0) { +// int seed = time(NULL); +// srand(seed); +// std::cout << "InputNode seed: " << seed << std::endl; +// InputNode input_node(num_nodes, num_tiers, update_batch_size, seed); +// GraphVerifier gv(num_nodes); +// // Link all of the nodes into 1 connected component +// for (node_id_t i = 0; i < num_nodes-1; i++) { +// input_node.update({{i, i+1}, INSERT}); +// gv.edge_update({i,i+1}); +// std::vector> cc = input_node.cc_query(); +// try { +// // gv.reset_cc_state(); +// gv.verify_cc_from_component_set(cc); +// } catch (IncorrectCCException& e) { +// std::cout << "Incorrect cc found after linking nodes " << i << " and " << i+1 << std::endl; +// std::cout << "GOT: " << cc.size() << " components, EXPECTED: " << num_nodes-i-1 << " components" << std::endl; +// FAIL(); +// } +// } +// // Generate a random bridge +// node_id_t first = rand() % num_nodes; +// node_id_t second = rand() % num_nodes; +// while(first == second || second == first+1 || first == second+1) +// second = rand() % num_nodes; +// input_node.update({{first, second}, INSERT}); +// gv.edge_update({first, second}); +// node_id_t distance = std::max(first, second) - std::min(first, second); +// // Cut a random edge that should be replaced by the bridge +// first = std::min(first, second) + rand() % (distance-1); +// input_node.update({{first, first+1}, DELETE}); +// gv.edge_update({first, first+1}); +// // Check the coonected components +// std::vector> cc = input_node.cc_query(); +// try { +// // gv.reset_cc_state(); +// gv.verify_cc_from_component_set(cc); +// } catch (IncorrectCCException& e) { +// std::cout << "Incorrect cc found after cutting nodes " << first << " and " << first+1 << std::endl; +// std::cout << "GOT: " << cc.size() << " components, EXPECTED: 1 components" << std::endl; +// FAIL(); +// } +// // Communicate to all other nodes that the stream has ended +// input_node.end(); +// } else if (world_rank < num_tiers+1) { +// int tier_num = world_rank-1; +// TierNode tier_node(num_nodes, tier_num, num_tiers, update_batch_size, tier_seed); +// tier_node.main(); +// } +// } + +// TEST(GraphTiersSuite, hybrid_mini_batch_test) { +// int world_rank_buf; +// MPI_Comm_rank(MPI_COMM_WORLD, &world_rank_buf); +// uint32_t world_rank = world_rank_buf; +// int world_size_buf; +// MPI_Comm_size(MPI_COMM_WORLD, &world_size_buf); +// uint32_t world_size = world_size_buf; + +// uint32_t num_nodes = 100; +// uint32_t num_tiers = log2(num_nodes)/(log2(3)-1); +// if (world_size != num_tiers+1) +// FAIL() << "MPI world size too small for graph with " << num_nodes << " vertices. Correct world size is: " << num_tiers+1; +// // Parameters +// int update_batch_size = 10; +// height_factor = 1; +// sketch_len = Sketch::calc_vector_length(num_nodes); +// sketch_err = DEFAULT_SKETCH_ERR; + +// // Seeds +// std::random_device dev; +// std::mt19937 rng(dev()); +// std::uniform_int_distribution dist(0,MAX_INT); +// int seed = dist(rng); +// bcast(&seed, sizeof(int), 0); +// std::cout << "SEED: " << seed << std::endl; +// rng.seed(seed); +// for (int i = 0; i < world_rank; i++) +// dist(rng); +// int tier_seed = dist(rng); + +// if (world_rank == 0) { +// int seed = time(NULL); +// srand(seed); +// std::cout << "InputNode seed: " << seed << std::endl; +// InputNode input_node(num_nodes, num_tiers, update_batch_size, seed); +// GraphVerifier gv(num_nodes); +// // Link all of the nodes into 1 connected component +// for (node_id_t i = 0; i < num_nodes-1; i++) { +// input_node.update({{i, i+1}, INSERT}); +// gv.edge_update({i,i+1}); +// std::vector> cc = input_node.cc_query(); +// try { +// // gv.reset_cc_state(); +// gv.verify_cc_from_component_set(cc); +// } catch (IncorrectCCException& e) { +// std::cout << "Incorrect cc found after linking nodes " << i << " and " << i+1 << std::endl; +// std::cout << "GOT: " << cc.size() << " components, EXPECTED: " << num_nodes-i-1 << " components" << std::endl; +// FAIL(); +// } +// } +// // Add a batch that has no isolations +// input_node.process_all_updates(); +// for (node_id_t i=0; i<(node_id_t)update_batch_size; i++) { +// input_node.update({{i, i+2}, INSERT}); +// gv.edge_update({i,i+2}); +// } +// // Check the coonected components +// std::vector> cc = input_node.cc_query(); +// try { +// // gv.reset_cc_state(); +// gv.verify_cc_from_component_set(cc); +// } catch (IncorrectCCException& e) { +// std::cout << "Incorrect cc found after batch with no isolations" << std::endl; +// std::cout << "GOT: " << cc.size() << " components, EXPECTED: 1 components" << std::endl; +// FAIL(); +// } +// for (node_id_t i=0; i<(node_id_t)update_batch_size; i++) { +// input_node.update({{i, i+2}, DELETE}); +// gv.edge_update({i,i+2}); +// } +// input_node.process_all_updates(); +// // Add a batch that has one isolated deletion in the middle +// for (node_id_t i=0; i<(node_id_t)update_batch_size/2-2; i++) { +// input_node.update({{i, i+2}, INSERT}); +// gv.edge_update({i,i+2}); +// } +// input_node.update({{(node_id_t)update_batch_size/2, (node_id_t)update_batch_size/2+1}, DELETE}); +// gv.edge_update({(node_id_t)update_batch_size/2, (node_id_t)update_batch_size/2+1}); +// for (node_id_t i=(node_id_t)update_batch_size/2+1; i<(node_id_t)update_batch_size+2; i++) { +// input_node.update({{i, i+3}, INSERT}); +// gv.edge_update({i,i+3}); +// } +// // Check the coonected components +// cc = input_node.cc_query(); +// try { +// // gv.reset_cc_state(); +// gv.verify_cc_from_component_set(cc); +// } catch (IncorrectCCException& e) { +// std::cout << "Incorrect cc found after batch with one isolated deletion" << std::endl; +// std::cout << "GOT: " << cc.size() << " components, EXPECTED: 1 components" << std::endl; +// FAIL(); +// } +// input_node.update({{(node_id_t)update_batch_size/2, (node_id_t)update_batch_size/2+1}, INSERT}); +// gv.edge_update({(node_id_t)update_batch_size/2, (node_id_t)update_batch_size/2+1}); +// input_node.process_all_updates(); +// // Add a batch with multiple forest edge deletions +// for (node_id_t i=0; i<(node_id_t)update_batch_size/2-2; i++) { +// input_node.update({{i, i+3}, INSERT}); +// gv.edge_update({i,i+3}); +// } +// input_node.update({{2*(node_id_t)update_batch_size, 2*(node_id_t)update_batch_size+2}, INSERT}); // Add a replacement edge +// gv.edge_update({2*(node_id_t)update_batch_size, 2*(node_id_t)update_batch_size+2}); +// input_node.update({{2*(node_id_t)update_batch_size+2, 2*(node_id_t)update_batch_size+3}, DELETE}); // First isolation +// gv.edge_update({2*(node_id_t)update_batch_size+2, 2*(node_id_t)update_batch_size+3}); +// input_node.update({{2*(node_id_t)update_batch_size+4, 2*(node_id_t)update_batch_size+5}, DELETE}); // Non-replacing delete +// gv.edge_update({2*(node_id_t)update_batch_size+4, 2*(node_id_t)update_batch_size+5}); +// input_node.update({{2*(node_id_t)update_batch_size, 2*(node_id_t)update_batch_size+1}, DELETE}); // Replacement delete +// gv.edge_update({2*(node_id_t)update_batch_size, 2*(node_id_t)update_batch_size+1}); +// for (node_id_t i=(node_id_t)update_batch_size/2+1; i<(node_id_t)update_batch_size; i++) { +// input_node.update({{i, i+3}, INSERT}); +// gv.edge_update({i,i+3}); +// } +// // Check the coonected components +// cc = input_node.cc_query(); +// try { +// // gv.reset_cc_state(); +// gv.verify_cc_from_component_set(cc); +// } catch (IncorrectCCException& e) { +// std::cout << "Incorrect cc found after batch with one isolated deletion" << std::endl; +// std::cout << "GOT: " << cc.size() << " components, EXPECTED: 1 components" << std::endl; +// FAIL(); +// } +// // Communicate to all other nodes that the stream has ended +// input_node.end(); +// } else if (world_rank < num_tiers+1) { +// int tier_num = world_rank-1; +// TierNode tier_node(num_nodes, tier_num, num_tiers, update_batch_size, tier_seed); +// tier_node.main(); +// } +// } + +TEST(GraphTiersSuite, hybrid_correctness_test) { + int world_rank_buf; + MPI_Comm_rank(MPI_COMM_WORLD, &world_rank_buf); + uint32_t world_rank = world_rank_buf; + int world_size_buf; + MPI_Comm_size(MPI_COMM_WORLD, &world_size_buf); + uint32_t world_size = world_size_buf; + + BinaryGraphStream stream(stream_file, 100000); + uint32_t num_nodes = stream.nodes(); + uint32_t num_tiers = log2(num_nodes)/(log2(3)-1); + // Parameters + int update_batch_size = DEFAULT_BATCH_SIZE; + height_factor = 1./log2(log2(num_nodes)); + sketch_len = Sketch::calc_vector_length(num_nodes); + sketch_err = DEFAULT_SKETCH_ERR; + + // Seeds + std::random_device dev; + std::mt19937 rng(dev()); + std::uniform_int_distribution dist(0,MAX_INT); + int seed = dist(rng); + bcast(&seed, sizeof(int), 0); + std::cout << "SEED: " << seed << std::endl; + rng.seed(seed); + for (int i = 0; i < world_rank; i++) + dist(rng); + int tier_seed = dist(rng); + + if (world_size != num_tiers+1) + FAIL() << "MPI world size too small for graph with " << num_nodes << " vertices. Correct world size is: " << num_tiers+1; + + if (world_rank == 0) { + int seed = time(NULL); + srand(seed); + std::cout << "InputNode seed: " << seed << std::endl; + // initialize data structures + // InputNode input_node(num_nodes, num_tiers, update_batch_size, seed); + // SCCWN cluster_forest(num_nodes); + HybridConnectivityManager hybrid_driver( + num_nodes, num_tiers, update_batch_size, seed + ); + + GraphVerifier gv(num_nodes); + int edgecount = stream.edges(); + int count = 20000000; + edgecount = std::min(edgecount, count); + for (int i = 0; i < edgecount; i++) { + // Read an update from the stream and have the input node process it + GraphUpdate update = stream.get_edge(); + hybrid_driver.update(update); + // Correctness testing by performing a cc query + gv.edge_update(update.edge); + unlikely_if(i%100000 == 0 || i == edgecount-1) { + std::vector> cc = hybrid_driver.cc_query(); + try { + // gv.reset_cc_state(); + gv.verify_cc_from_component_set(cc); + std::cout << "Update " << i << ", CCs correct." << std::endl; + } catch (IncorrectCCException& e) { + std::cout << "Incorrect connected components found at update " << i << std::endl; + std::cout << "GOT: " << cc.size() << std::endl; + hybrid_driver.sketching_algo.end(); + FAIL(); + } + } + } + std::ofstream file; + file.open ("mpi_kron_results.txt", std::ios_base::app); + file << stream_file << " passed correctness test." << std::endl; + file.close(); + // Communicate to all other nodes that the stream has ended + hybrid_driver.sketching_algo.end(); + + } else if (world_rank < num_tiers+1) { + int tier_num = world_rank-1; + TierNode tier_node(num_nodes, tier_num, num_tiers, update_batch_size, tier_seed); + tier_node.main(); + } +} diff --git a/test/link_cut_tree_test.cpp b/test/link_cut_tree_test.cpp index 312f567..7be18cc 100644 --- a/test/link_cut_tree_test.cpp +++ b/test/link_cut_tree_test.cpp @@ -78,34 +78,35 @@ TEST(LinkCutTreeSuite, join_split_test) { // power of 2 node count int nodecount = 1024; LinkCutTree lct(nodecount); + lct.initialize_all_nodes(); // Join every 2,4,8,16... nodes for (int i = 2; i <= nodecount; i*=2) { for (int j = 0; j < nodecount; j+=i) { - lct.nodes[j].splay(); - lct.nodes[j+i/2].splay(); + lct.node(j).splay(); + lct.node(j+i/2).splay(); //std::cout << "Join nodes: " << &nodes[j] << " and " << &nodes[j+i/2] << "\n"; - LinkCutNode* p = lct.join(&lct.nodes[j], &lct.nodes[j+i/2]); - EXPECT_EQ(p->get_head(), &lct.nodes[j]); - EXPECT_EQ(p->get_tail(), &lct.nodes[j+i-1]); + LinkCutNode* p = lct.join(&lct.node(j), &lct.node(j+i/2)); + EXPECT_EQ(p->get_head(), &lct.node(j)); + EXPECT_EQ(p->get_tail(), &lct.node(j+i-1)); } // Validate all nodes for (int i = 0; i < nodecount; i++) { - validate(&lct.nodes[i]); + validate(&lct.node(i)); } } // Split Every ...16,8,4,2 nodes for (int i = nodecount; i > 1; i/=2) { for (int j = 0; j < nodecount; j+=i) { //std::cout << "Split on node: " << &nodes[j+i/2-1] << "\n"; - std::pair paths = lct.split(&lct.nodes[j+i/2-1]); - EXPECT_EQ(paths.first->get_head(), &lct.nodes[j]); - EXPECT_EQ(paths.first->get_tail(), &lct.nodes[j+i/2-1]); - EXPECT_EQ(paths.second->get_head(), &lct.nodes[j+i/2]); - EXPECT_EQ(paths.second->get_tail(), &lct.nodes[j+i-1]); + std::pair paths = lct.split(&lct.node(j+i/2-1)); + EXPECT_EQ(paths.first->get_head(), &lct.node(j)); + EXPECT_EQ(paths.first->get_tail(), &lct.node(j+i/2-1)); + EXPECT_EQ(paths.second->get_head(), &lct.node(j+i/2)); + EXPECT_EQ(paths.second->get_tail(), &lct.node(j+i-1)); } // Validate all nodes for (int i = 0; i < nodecount; i++) { - validate(&lct.nodes[i]); + validate(&lct.node(i)); } } } @@ -114,48 +115,57 @@ TEST(LinkCutTreeSuite, expose_simple_test) { int pathcount = 100; int nodesperpath = 100; LinkCutTree lct(nodesperpath*pathcount); + lct.initialize_all_nodes(); // Link all the nodes in each path together for (int path = 0; path < pathcount; path++) { for (int node = 0; node < nodesperpath-1; node++) { - lct.nodes[path*nodesperpath+node].splay(); - lct.join(&lct.nodes[path*nodesperpath+node], &lct.nodes[path*nodesperpath+node+1]); + lct.node(path*nodesperpath+node).splay(); + lct.join(&lct.node(path*nodesperpath+node), &lct.node(path*nodesperpath+node+1)); } } // Link all the paths together with dparent pointers half way up the previous path for (int path = 1; path < pathcount; path++) { - lct.nodes[path*nodesperpath].set_dparent(&lct.nodes[path*nodesperpath-nodesperpath/2]); + lct.node(path*nodesperpath).set_dparent(&lct.node(path*nodesperpath-nodesperpath/2)); } // Call expose on the node half way up the bottom path - LinkCutNode* p = lct.expose(&lct.nodes[pathcount*nodesperpath-nodesperpath/2]); + LinkCutNode* p = lct.expose(&lct.node(pathcount*nodesperpath-nodesperpath/2)); // Validate all nodes for (int i = 0; i < pathcount*nodesperpath; i++) { - validate(&lct.nodes[i]); + validate(&lct.node(i)); } // Validate head and tail of returned path - EXPECT_EQ(p->get_head(), &lct.nodes[0]); - EXPECT_EQ(p->get_tail(), &lct.nodes[pathcount*nodesperpath-nodesperpath/2]) << "Exposed node not tail of path"; + EXPECT_EQ(p->get_head(), &lct.node(0)); + EXPECT_EQ(p->get_tail(), &lct.node(pathcount*nodesperpath-nodesperpath/2)) << "Exposed node not tail of path"; // Validate all dparent pointers for (int path = 0; path < pathcount; path++) { - EXPECT_EQ(lct.nodes[(path+1)*nodesperpath-nodesperpath/2+1].get_dparent(), &lct.nodes[(path+1)*nodesperpath-nodesperpath/2]); + EXPECT_EQ(lct.node((path+1)*nodesperpath-nodesperpath/2+1).get_dparent(), &lct.node((path+1)*nodesperpath-nodesperpath/2)); } } TEST(LinkCutTreeSuite, random_links_and_cuts) { + // TODO - restore the test cases. int nodecount = 1000; LinkCutTree lct(nodecount); + lct.initialize_all_nodes(); int seed = time(NULL); // Link all nodes for (int i = 0; i < nodecount-1; i++) { lct.link(i,i+1, rand()%100); - ASSERT_TRUE(std::all_of(lct.nodes.begin(), lct.nodes.end(), [](auto& node){return validate(&node);})) - << "One or more invalid nodes found" << std::endl; + // ASSERT_TRUE(std::all_of(lct.nodes.begin(), lct.nodes.end(), [](auto& node){return validate(&node);})) + // << "One or more invalid nodes found" << std::endl; + for (int j = 0; j < nodecount; j++) { + ASSERT_TRUE(validate(&lct.node(j))); + } } // Cut every node for (int i = 0; i < nodecount-1; i+=1) { lct.cut(i,i+1); - ASSERT_TRUE(std::all_of(lct.nodes.begin(), lct.nodes.end(), [](auto& node){return validate(&node);})) - << "One or more invalid nodes found" << std::endl; + // ASSERT_TRUE(std::all_of(lct.nodes.begin(), lct.nodes.end(), [](auto& node){return validate(&node);})) + // << "One or more invalid nodes found" << std::endl; + for (int j = 0; j < nodecount; j++) { + ASSERT_TRUE(validate(&lct.node(j))); + } } // Do random links and cuts int n = 5000; @@ -169,21 +179,24 @@ TEST(LinkCutTreeSuite, random_links_and_cuts) { //std::cout << i << ": Linking " << a << " and " << b << " weight " << weight << std::endl; lct.link(a, b, weight); //print_paths(&lct.nodes); - } else if (lct.nodes[a].edges.find(&lct.nodes[b]-&lct.nodes[0]) != lct.nodes[a].edges.end()) { + } else if (lct.node(a).edges.find(&lct.node(b)-&lct.node(0)) != lct.node(a).edges.end()) { //std::cout << i << ": Cutting " << a << " and " << b << std::endl; lct.cut(a, b); //print_paths(&lct.nodes); } - ASSERT_TRUE(std::all_of(lct.nodes.begin(), lct.nodes.end(), [](auto& node){return validate(&node);})) - << "One or more invalid nodes found" << std::endl; + // ASSERT_TRUE(std::all_of(lct.nodes.begin(), lct.nodes.end(), [](auto& node){return validate(&node);})) + // << "One or more invalid nodes found" << std::endl; + for (int j = 0; j < nodecount; j++) { + ASSERT_TRUE(validate(&lct.node(j))); + } } } // Manually compute the aggregates for each aux tree std::map path_aggregates; for (int i = 0; i < nodecount; i++) { - uint32_t nodemax = std::max(lct.nodes[i].edges[lct.nodes[i].preferred_edges.first], - lct.nodes[i].edges[lct.nodes[i].preferred_edges.second]); - LinkCutNode* curr = &lct.nodes[i]; + uint32_t nodemax = std::max(lct.node(i).edges[lct.node(i).preferred_edges.first], + lct.node(i).edges[lct.node(i).preferred_edges.second]); + LinkCutNode* curr = &lct.node(i); while (curr) { if (curr->get_parent() == nullptr) { if (path_aggregates.find(curr) != path_aggregates.end()) { diff --git a/test/mpi_graph_tiers_test.cpp b/test/mpi_graph_tiers_test.cpp index 3c05fd1..95fb799 100644 --- a/test/mpi_graph_tiers_test.cpp +++ b/test/mpi_graph_tiers_test.cpp @@ -5,17 +5,18 @@ #include #include #include -#include +// #include #include "mpi_nodes.h" #include "binary_graph_stream.h" -#include "mat_graph_verifier.h" +// #include "mat_graph_verifier.h" +#include "graph_verifier.h" #include "util.h" const int DEFAULT_BATCH_SIZE = 100; const vec_t DEFAULT_SKETCH_ERR = 1; -TEST(GraphTierSuite, mpi_update_speed_test) { +TEST(GraphTierSuite, mpi_mixed_speed_test) { int world_rank_buf; MPI_Comm_rank(MPI_COMM_WORLD, &world_rank_buf); uint32_t world_rank = world_rank_buf; @@ -25,8 +26,109 @@ TEST(GraphTierSuite, mpi_update_speed_test) { BinaryGraphStream stream(stream_file, 100000); uint32_t num_nodes = stream.nodes(); - uint32_t num_tiers = log2(num_nodes)/(log2(3)-1); + // uint32_t num_tiers = log2(num_nodes)/(log2(3)-1); + uint32_t num_tiers = world_size - 1; + std::cout << "NUM TIERS: " << num_tiers << std::endl; + + // Parameters + int update_batch_size = (batch_size_arg==0) ? DEFAULT_BATCH_SIZE : batch_size_arg; + height_factor = (height_factor_arg==0) ? 1./log2(log2(num_nodes)) : height_factor_arg; + sketchless_height_factor = height_factor; + sketch_len = Sketch::calc_vector_length(num_nodes); + sketch_err = DEFAULT_SKETCH_ERR; + std::cout << "BATCH SIZE: " << update_batch_size << " HEIGHT FACTOR " << height_factor << " SKETCH BUFFER: " << SKETCH_BUFFER_SIZE << std::endl; + + // Seeds + std::random_device dev; + std::mt19937 rng(dev()); + std::uniform_int_distribution dist(0,MAX_INT); + int seed = dist(rng); + bcast(&seed, sizeof(int), 0); + std::cout << "SEED: " << seed << std::endl; + rng.seed(seed); + for (int i = 0; i < world_rank; i++) + dist(rng); + int tier_seed = dist(rng); + + if (world_size != num_tiers+1) + FAIL() << "MPI world size too small for graph with " << num_nodes << " vertices. Correct world size is: " << num_tiers+1; + + if (world_rank == 0) { + int seed = time(NULL); + srand(seed); + std::cout << "InputNode seed: " << seed << std::endl; + InputNode input_node(num_nodes, num_tiers, update_batch_size, seed); + input_node.initialize_all_nodes(); + long edgecount = stream.edges(); + // long count = 100000000; + // edgecount = std::min(edgecount, count); + long total_update_time = 0; + long total_query_time = 0; + auto update_timer = std::chrono::high_resolution_clock::now(); + auto query_timer = update_timer; + bool doing_updates = true; + for (long i = 0; i < edgecount; i++) { + // Read an update from the stream and have the input node process it + GraphUpdate operation = stream.get_edge(); + if (operation.type == 2) { // 2 is the symbol for queries + unlikely_if (doing_updates) { + total_update_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - update_timer).count(); + doing_updates = false; + query_timer = std::chrono::high_resolution_clock::now(); + } + input_node.connectivity_query(operation.edge.src, operation.edge.dst); + } else { + unlikely_if (!doing_updates) { + total_query_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - query_timer).count(); + doing_updates = true; + update_timer = std::chrono::high_resolution_clock::now(); + } + input_node.update(operation); + } + unlikely_if(i%1000000 == 0 || i == edgecount-1) { + std::cout << "FINISHED OPERATION " << i << " OUT OF " << edgecount << " IN " << stream_file << std::endl; + } + } + if (doing_updates) { + total_update_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - update_timer).count(); + } else { + total_query_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - query_timer).count(); + } + // Communicate to all other nodes that the stream has ended + input_node.end(); + std::cout << "Total update time(ms): " << (total_update_time/1000) << std::endl; + std::cout << "Total query time(ms): " << (total_query_time/1000) << std::endl; + std::cout << "Total time(ms): " << (total_query_time + total_update_time)/1000 << std::endl; + + std::ofstream file; + std::string out_file = "./../results/mpi_speed_results/" + stream_file.substr(stream_file.find("/") + 1) + ".txt"; + std::cout << "WRITING RESULTS TO " << out_file << std::endl; + file.open (out_file, std::ios_base::app); + file << " UPDATES/SECOND: " << (0.9*edgecount)/(total_update_time) << std::endl; + file << " QUERIES/SECOND: " << (0.1*edgecount)/(total_query_time) << std::endl; + file.close(); + + } else if (world_rank < num_tiers+1) { + int tier_num = world_rank-1; + TierNode tier_node(num_nodes, tier_num, num_tiers, update_batch_size, tier_seed); + tier_node.main(); + } +} + +TEST(GraphTierSuite, mpi_update_speed_test) { + int world_rank_buf; + MPI_Comm_rank(MPI_COMM_WORLD, &world_rank_buf); + uint32_t world_rank = world_rank_buf; + int world_size_buf; + MPI_Comm_size(MPI_COMM_WORLD, &world_size_buf); + uint32_t world_size = world_size_buf; + + BinaryGraphStream stream(stream_file, 100000); + uint32_t num_nodes = stream.nodes(); + // uint32_t num_tiers = log2(num_nodes)/(log2(3)-1); + uint32_t num_tiers = world_size - 1; + std::cout << "NUM TIERS: " << num_tiers << std::endl; // Parameters int update_batch_size = (batch_size_arg==0) ? DEFAULT_BATCH_SIZE : batch_size_arg; height_factor = (height_factor_arg==0) ? 1./log2(log2(num_nodes)) : height_factor_arg; @@ -56,6 +158,7 @@ TEST(GraphTierSuite, mpi_update_speed_test) { srand(seed); std::cout << "InputNode seed: " << seed << std::endl; InputNode input_node(num_nodes, num_tiers, update_batch_size, seed); + input_node.initialize_all_nodes(); long edgecount = stream.edges(); // long count = 100000000; // edgecount = std::min(edgecount, count); @@ -127,6 +230,7 @@ TEST(GraphTiersSuite, mpi_query_speed_test) { srand(seed); std::cout << "InputNode seed: " << seed << std::endl; InputNode input_node(num_nodes, num_tiers, update_batch_size, seed); + input_node.initialize_all_nodes(); long total_time = 0; for (int batch = 0; batch < 10; batch++) { @@ -198,15 +302,16 @@ TEST(GraphTiersSuite, mpi_mini_correctness_test) { srand(seed); std::cout << "InputNode seed: " << seed << std::endl; InputNode input_node(num_nodes, num_tiers, update_batch_size, seed); - MatGraphVerifier gv(num_nodes); + input_node.initialize_all_nodes(); + GraphVerifier gv(num_nodes); // Link all of the nodes into 1 connected component for (node_id_t i = 0; i < num_nodes-1; i++) { input_node.update({{i, i+1}, INSERT}); - gv.edge_update(i,i+1); + gv.edge_update({i,i+1}); std::vector> cc = input_node.cc_query(); try { - gv.reset_cc_state(); - gv.verify_soln(cc); + // gv.reset_cc_state(); + gv.verify_cc_from_component_set(cc); } catch (IncorrectCCException& e) { std::cout << "Incorrect cc found after linking nodes " << i << " and " << i+1 << std::endl; std::cout << "GOT: " << cc.size() << " components, EXPECTED: " << num_nodes-i-1 << " components" << std::endl; @@ -216,11 +321,11 @@ TEST(GraphTiersSuite, mpi_mini_correctness_test) { // One by one cut all of the nodes into singletons for (node_id_t i = 0; i < num_nodes-1; i++) { input_node.update({{i, i+1}, DELETE}); - gv.edge_update(i,i+1); + gv.edge_update({i,i+1}); std::vector> cc = input_node.cc_query(); try { - gv.reset_cc_state(); - gv.verify_soln(cc); + // gv.reset_cc_state(); + gv.verify_cc_from_component_set(cc); } catch (IncorrectCCException& e) { std::cout << "Incorrect cc found after cutting nodes " << i << " and " << i+1 << std::endl; std::cout << "GOT: " << cc.size() << " components, EXPECTED: " << i+2 << " components" << std::endl; @@ -271,15 +376,16 @@ TEST(GraphTiersSuite, mpi_mini_replacement_test) { srand(seed); std::cout << "InputNode seed: " << seed << std::endl; InputNode input_node(num_nodes, num_tiers, update_batch_size, seed); - MatGraphVerifier gv(num_nodes); + input_node.initialize_all_nodes(); + GraphVerifier gv(num_nodes); // Link all of the nodes into 1 connected component for (node_id_t i = 0; i < num_nodes-1; i++) { input_node.update({{i, i+1}, INSERT}); - gv.edge_update(i,i+1); + gv.edge_update({i,i+1}); std::vector> cc = input_node.cc_query(); try { - gv.reset_cc_state(); - gv.verify_soln(cc); + // gv.reset_cc_state(); + gv.verify_cc_from_component_set(cc); } catch (IncorrectCCException& e) { std::cout << "Incorrect cc found after linking nodes " << i << " and " << i+1 << std::endl; std::cout << "GOT: " << cc.size() << " components, EXPECTED: " << num_nodes-i-1 << " components" << std::endl; @@ -292,17 +398,17 @@ TEST(GraphTiersSuite, mpi_mini_replacement_test) { while(first == second || second == first+1 || first == second+1) second = rand() % num_nodes; input_node.update({{first, second}, INSERT}); - gv.edge_update(first, second); + gv.edge_update({first, second}); node_id_t distance = std::max(first, second) - std::min(first, second); // Cut a random edge that should be replaced by the bridge first = std::min(first, second) + rand() % (distance-1); input_node.update({{first, first+1}, DELETE}); - gv.edge_update(first, first+1); + gv.edge_update({first, first+1}); // Check the coonected components std::vector> cc = input_node.cc_query(); try { - gv.reset_cc_state(); - gv.verify_soln(cc); + // gv.reset_cc_state(); + gv.verify_cc_from_component_set(cc); } catch (IncorrectCCException& e) { std::cout << "Incorrect cc found after cutting nodes " << first << " and " << first+1 << std::endl; std::cout << "GOT: " << cc.size() << " components, EXPECTED: 1 components" << std::endl; @@ -352,15 +458,16 @@ TEST(GraphTiersSuite, mpi_mini_batch_test) { srand(seed); std::cout << "InputNode seed: " << seed << std::endl; InputNode input_node(num_nodes, num_tiers, update_batch_size, seed); - MatGraphVerifier gv(num_nodes); + input_node.initialize_all_nodes(); + GraphVerifier gv(num_nodes); // Link all of the nodes into 1 connected component for (node_id_t i = 0; i < num_nodes-1; i++) { input_node.update({{i, i+1}, INSERT}); - gv.edge_update(i,i+1); + gv.edge_update({i,i+1}); std::vector> cc = input_node.cc_query(); try { - gv.reset_cc_state(); - gv.verify_soln(cc); + // gv.reset_cc_state(); + gv.verify_cc_from_component_set(cc); } catch (IncorrectCCException& e) { std::cout << "Incorrect cc found after linking nodes " << i << " and " << i+1 << std::endl; std::cout << "GOT: " << cc.size() << " components, EXPECTED: " << num_nodes-i-1 << " components" << std::endl; @@ -371,13 +478,13 @@ TEST(GraphTiersSuite, mpi_mini_batch_test) { input_node.process_all_updates(); for (node_id_t i=0; i<(node_id_t)update_batch_size; i++) { input_node.update({{i, i+2}, INSERT}); - gv.edge_update(i,i+2); + gv.edge_update({i,i+2}); } // Check the coonected components std::vector> cc = input_node.cc_query(); try { - gv.reset_cc_state(); - gv.verify_soln(cc); + // gv.reset_cc_state(); + gv.verify_cc_from_component_set(cc); } catch (IncorrectCCException& e) { std::cout << "Incorrect cc found after batch with no isolations" << std::endl; std::cout << "GOT: " << cc.size() << " components, EXPECTED: 1 components" << std::endl; @@ -385,55 +492,55 @@ TEST(GraphTiersSuite, mpi_mini_batch_test) { } for (node_id_t i=0; i<(node_id_t)update_batch_size; i++) { input_node.update({{i, i+2}, DELETE}); - gv.edge_update(i,i+2); + gv.edge_update({i,i+2}); } input_node.process_all_updates(); // Add a batch that has one isolated deletion in the middle for (node_id_t i=0; i<(node_id_t)update_batch_size/2-2; i++) { input_node.update({{i, i+2}, INSERT}); - gv.edge_update(i,i+2); + gv.edge_update({i,i+2}); } input_node.update({{(node_id_t)update_batch_size/2, (node_id_t)update_batch_size/2+1}, DELETE}); - gv.edge_update(update_batch_size/2, update_batch_size/2+1); + gv.edge_update({(node_id_t)update_batch_size/2, (node_id_t)update_batch_size/2+1}); for (node_id_t i=(node_id_t)update_batch_size/2+1; i<(node_id_t)update_batch_size+2; i++) { input_node.update({{i, i+3}, INSERT}); - gv.edge_update(i,i+3); + gv.edge_update({i,i+3}); } // Check the coonected components cc = input_node.cc_query(); try { - gv.reset_cc_state(); - gv.verify_soln(cc); + // gv.reset_cc_state(); + gv.verify_cc_from_component_set(cc); } catch (IncorrectCCException& e) { std::cout << "Incorrect cc found after batch with one isolated deletion" << std::endl; std::cout << "GOT: " << cc.size() << " components, EXPECTED: 1 components" << std::endl; FAIL(); } input_node.update({{(node_id_t)update_batch_size/2, (node_id_t)update_batch_size/2+1}, INSERT}); - gv.edge_update(update_batch_size/2, update_batch_size/2+1); + gv.edge_update({(node_id_t)update_batch_size/2, (node_id_t)update_batch_size/2+1}); input_node.process_all_updates(); // Add a batch with multiple forest edge deletions for (node_id_t i=0; i<(node_id_t)update_batch_size/2-2; i++) { input_node.update({{i, i+3}, INSERT}); - gv.edge_update(i,i+3); + gv.edge_update({i,i+3}); } input_node.update({{2*(node_id_t)update_batch_size, 2*(node_id_t)update_batch_size+2}, INSERT}); // Add a replacement edge - gv.edge_update(2*update_batch_size, 2*update_batch_size+2); + gv.edge_update({2*(node_id_t)update_batch_size, 2*(node_id_t)update_batch_size+2}); input_node.update({{2*(node_id_t)update_batch_size+2, 2*(node_id_t)update_batch_size+3}, DELETE}); // First isolation - gv.edge_update(2*update_batch_size+2, 2*update_batch_size+3); + gv.edge_update({2*(node_id_t)update_batch_size+2, 2*(node_id_t)update_batch_size+3}); input_node.update({{2*(node_id_t)update_batch_size+4, 2*(node_id_t)update_batch_size+5}, DELETE}); // Non-replacing delete - gv.edge_update(2*update_batch_size+4, 2*update_batch_size+5); + gv.edge_update({2*(node_id_t)update_batch_size+4, 2*(node_id_t)update_batch_size+5}); input_node.update({{2*(node_id_t)update_batch_size, 2*(node_id_t)update_batch_size+1}, DELETE}); // Replacement delete - gv.edge_update(2*update_batch_size, 2*update_batch_size+1); + gv.edge_update({2*(node_id_t)update_batch_size, 2*(node_id_t)update_batch_size+1}); for (node_id_t i=(node_id_t)update_batch_size/2+1; i<(node_id_t)update_batch_size; i++) { input_node.update({{i, i+3}, INSERT}); - gv.edge_update(i,i+3); + gv.edge_update({i,i+3}); } // Check the coonected components cc = input_node.cc_query(); try { - gv.reset_cc_state(); - gv.verify_soln(cc); + // gv.reset_cc_state(); + gv.verify_cc_from_component_set(cc); } catch (IncorrectCCException& e) { std::cout << "Incorrect cc found after batch with one isolated deletion" << std::endl; std::cout << "GOT: " << cc.size() << " components, EXPECTED: 1 components" << std::endl; @@ -485,7 +592,7 @@ TEST(GraphTiersSuite, mpi_correctness_test) { srand(seed); std::cout << "InputNode seed: " << seed << std::endl; InputNode input_node(num_nodes, num_tiers, update_batch_size, seed); - MatGraphVerifier gv(num_nodes); + GraphVerifier gv(num_nodes); int edgecount = stream.edges(); int count = 20000000; edgecount = std::min(edgecount, count); @@ -494,12 +601,12 @@ TEST(GraphTiersSuite, mpi_correctness_test) { GraphUpdate update = stream.get_edge(); input_node.update(update); // Correctness testing by performing a cc query - gv.edge_update(update.edge.src, update.edge.dst); + gv.edge_update(update.edge); unlikely_if(i%1000 == 0 || i == edgecount-1) { std::vector> cc = input_node.cc_query(); try { - gv.reset_cc_state(); - gv.verify_soln(cc); + // gv.reset_cc_state(); + gv.verify_cc_from_component_set(cc); std::cout << "Update " << i << ", CCs correct." << std::endl; } catch (IncorrectCCException& e) { std::cout << "Incorrect connected components found at update " << i << std::endl; diff --git a/test/skiplist_test.cpp b/test/skiplist_test.cpp index 3b74b6a..1f9344a 100644 --- a/test/skiplist_test.cpp +++ b/test/skiplist_test.cpp @@ -2,8 +2,10 @@ #include #include "skiplist.h" #include "euler_tour_tree.h" +#include "sketch_interfacing.h" -bool SkipListNode::isvalid() { +template requires(SketchColumnConcept) +bool SkipListNode::isvalid() { bool valid = true; if (this->up && this->up->down != this) valid = false; if (this->down && this->down->up != this) valid = false; @@ -14,7 +16,8 @@ bool SkipListNode::isvalid() { return valid; } -int SkipListNode::print_list() { +template requires(SketchColumnConcept) +int SkipListNode::print_list() { SkipListNode* curr = this->get_first(); while (curr) { if (curr->node) std::cout << curr->node->vertex << ":\t"; @@ -31,15 +34,17 @@ int SkipListNode::print_list() { return 0; } -bool aggregate_correct(SkipListNode* node) { - Sketch* naive_agg = new Sketch(sketch_len, node->node->get_seed(), 1, sketch_err); - std::set component = node->get_component(); +bool aggregate_correct(SkipListNode* node) { + // Sketch* naive_agg = new Sketch(sketch_len, node->node->get_seed(), 1, sketch_err); + DefaultSketchColumn *naive_agg = new DefaultSketchColumn( + DefaultSketchColumn::suggest_capacity(sketch_len), node->node->get_seed()); + std::set*> component = node->get_component(); for (auto ett_node : component) { naive_agg->update(ett_node->vertex); } node->get_root()->process_updates(); - Sketch* list_agg = node->get_list_aggregate(); - return *naive_agg == *list_agg; + const DefaultSketchColumn &list_agg = node->get_list_aggregate(); + return *naive_agg == list_agg; } TEST(SkipListSuite, join_split_test) { @@ -50,33 +55,34 @@ TEST(SkipListSuite, join_split_test) { long seed = time(NULL); srand(seed); - EulerTourTree ett(num_elements, 0, seed); - SkipListNode* nodes[num_elements]; + EulerTourTree ett(num_elements, 0, seed); + ett.initialize_all_nodes(); + SkipListNode* nodes[num_elements]; // Construct all of the ett_nodes and singleton SkipList nodes for (int i = 0; i < num_elements; i++) { ett.update_sketch(i, (vec_t)i); - nodes[i] = ett.ett_nodes[i].allowed_caller; + nodes[i] = ett.ett_node(i).allowed_caller; } // Link all the nodes two at a time, then link them all - for (int i = 0; i < num_elements; i+=2) SkipListNode::join(nodes[i], nodes[i+1]); + for (int i = 0; i < num_elements; i+=2) SkipListNode::join(nodes[i], nodes[i+1]); for (int i = 0; i < num_elements; i++) { ASSERT_TRUE(nodes[i]->isvalid()); ASSERT_TRUE(aggregate_correct(nodes[i])) << "Node " << i << " agg incorrect"; } - for (int i = 0; i < num_elements-2; i+=2) SkipListNode::join(nodes[i], nodes[i+2]); + for (int i = 0; i < num_elements-2; i+=2) SkipListNode::join(nodes[i], nodes[i+2]); for (int i = 0; i < num_elements; i++) { ASSERT_TRUE(nodes[i]->isvalid()); ASSERT_TRUE(aggregate_correct(nodes[i])) << "Node " << i << " agg incorrect"; } // Split all nodes into pairs, then split each pair - for (int i = 0; i < num_elements-2; i+=2) SkipListNode::split_left(nodes[i+2]); + for (int i = 0; i < num_elements-2; i+=2) SkipListNode::split_left(nodes[i+2]); for (int i = 0; i < num_elements; i++) { ASSERT_TRUE(nodes[i]->isvalid()); ASSERT_TRUE(aggregate_correct(nodes[i])) << "Node " << i << " agg incorrect"; } - for (int i = 0; i < num_elements; i+=2) SkipListNode::split_left(nodes[i+1]); + for (int i = 0; i < num_elements; i+=2) SkipListNode::split_left(nodes[i+1]); for (int i = 0; i < num_elements; i++) { ASSERT_TRUE(nodes[i]->isvalid()); ASSERT_TRUE(aggregate_correct(nodes[i])) << "Node " << i << " agg incorrect";