diff --git a/CMakeLists.txt b/CMakeLists.txt
index 52fc59f..4f2fb12 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,11 +2,13 @@ cmake_minimum_required(VERSION 3.15)
 project(DynamicQueriesCC)
 include (FetchContent)
 
+set(Boost_USE_STATIC_LIBS ON)
+find_package(Boost REQUIRED COMPONENTS regex context)
 # Force IPO is enabled
 cmake_policy(SET CMP0069 NEW) 
 set(CMAKE_POLICY_DEFAULT_CMP0069 NEW)
 
-set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD 20)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 set(CMAKE_CXX_EXTENSIONS ON)
 #set(INTERPROCEDURAL_OPTIMIZATION TRUE)
@@ -20,9 +22,10 @@ if(NOT CMAKE_BUILD_TYPE)
 endif()
 message(STATUS "DynamicQueries Build Type: ${CMAKE_BUILD_TYPE}")
 
+# controversial choice: return-type warnings should be promoted to errors
 if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
   message("Adding GNU compiler flags")
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -W -Wall")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -W -Wall -Werror=return-type")
 elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
   message("Adding MSVC compiler flags")
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Wall")
@@ -30,6 +33,12 @@ else()
   message("${CMAKE_CXX_COMPILER_ID} not recognized, no flags added")
 endif()
 
+
+# add_compile_options(-fsanitize=address)
+# add_link_options(-fsanitize=address)
+# add_compile_options(-fsanitize=undefined)
+# add_link_options(-fsanitize=undefined)
+
 ######
 # Get MPI for distributed communication
 ######
@@ -47,15 +56,32 @@ FetchContent_Declare(
   GraphZeppelinVerifyCC
 
   GIT_REPOSITORY      https://github.com/GraphStreamingProject/GraphZeppelin
-  GIT_TAG             db06e662aa7563716e49e3f5036e773a97a7dd64 #main
+  # GIT_TAG             2c633f5814f7edc79632cceb629280f6221b0281 #ksparse_recovery
+  # GIT_TAG             892b5b71b139100c309c79bb6ab94a5596fc606d
+  GIT_TAG            c31add74bae9d826b07216acca0558747d34efc0
+)
+add_compile_definitions(GLOG_USE_GLOG_EXPORT)
+
+
+# NOTE - not sure if this is gonna work
+# Install DynamicConnectivity Package
+# it doesn't. the
+
+FetchContent_Declare(
+  dycon
+  GIT_REPOSITORY  https://github.com/GraphStreamingProject/DynamicConnectivity
+  GIT_TAG         733d99e4a6985ef656df716ffe23719a1f29786b
 )
 
+
+# TODO - THERE should be a better way to do this that doesn't rely on populate and add_subdirectory
+FetchContent_Populate(dycon)
+add_subdirectory(${dycon_SOURCE_DIR} ${dycon_BINARY_DIR} EXCLUDE_FROM_ALL)
+
 FetchContent_MakeAvailable(GraphZeppelinVerifyCC)
 
-#add_compile_options(-fsanitize=address)
-#add_link_options(-fsanitize=address)
-#add_compile_options(-fsanitize=undefined)
-#add_link_options(-fsanitize=undefined)
+
+find_package(TBB REQUIRED)
 
 add_executable(dynamicCC_tests
   test/test_runner.cpp
@@ -66,13 +92,18 @@ add_executable(dynamicCC_tests
 
   src/skiplist.cpp
   src/euler_tour_tree.cpp
+  src/sketchless_skiplist.cpp
+  src/sketchless_euler_tour_tree.cpp
   src/link_cut_tree.cpp
   src/graph_tiers.cpp
+  src/batch_tiers.cpp
 )
-
 target_include_directories(dynamicCC_tests PUBLIC include ${MPI_C_INCLUDE_PATH})
-add_dependencies(dynamicCC_tests GraphZeppelinVerifyCC)
-target_link_libraries(dynamicCC_tests PRIVATE GraphZeppelinVerifyCC ${MPI_LIBRARIES})
+add_dependencies(dynamicCC_tests GraphZeppelinVerifyCC dycon)
+target_link_libraries(dynamicCC_tests PRIVATE GraphZeppelinVerifyCC dycon ${MPI_LIBRARIES} TBB::tbb)
+
+target_compile_options(dynamicCC_tests PUBLIC -fopenmp)
+target_link_options(dynamicCC_tests PUBLIC -fopenmp)
 
 add_executable(mpi_dynamicCC_tests
   test/mpi_test_runner.cpp
@@ -86,10 +117,56 @@ add_executable(mpi_dynamicCC_tests
   src/input_node.cpp
   src/tier_node.cpp
 )
-
+target_compile_definitions(mpi_dynamicCC_tests PUBLIC SKETCH_BUFFER_SIZE=${SKETCH_BUFFER_SIZE})
 target_include_directories(mpi_dynamicCC_tests PUBLIC include ${MPI_C_INCLUDE_PATH})
-add_dependencies(mpi_dynamicCC_tests GraphZeppelinVerifyCC)
-target_link_libraries(mpi_dynamicCC_tests PRIVATE GraphZeppelinVerifyCC ${MPI_LIBRARIES})
+add_dependencies(mpi_dynamicCC_tests GraphZeppelinVerifyCC dycon)
+target_link_libraries(mpi_dynamicCC_tests PRIVATE GraphZeppelinVerifyCC dycon ${MPI_LIBRARIES})
+
+target_compile_options(mpi_dynamicCC_tests PUBLIC -fopenmp)
+target_link_options(mpi_dynamicCC_tests PUBLIC -fopenmp)
+
+
+add_executable(hybrid_mpi_dynamicCC_tests
+  test/hybrid_test_runner.cpp
+  test/hybrid_tests.cpp
+
+  src/skiplist.cpp
+  src/sketchless_skiplist.cpp
+  src/euler_tour_tree.cpp
+  src/sketchless_euler_tour_tree.cpp
+  src/link_cut_tree.cpp
+  src/input_node.cpp
+  src/tier_node.cpp
+)
+target_compile_definitions(hybrid_mpi_dynamicCC_tests PUBLIC SKETCH_BUFFER_SIZE=${SKETCH_BUFFER_SIZE})
+target_include_directories(hybrid_mpi_dynamicCC_tests PUBLIC include ${MPI_C_INCLUDE_PATH})
+add_dependencies(hybrid_mpi_dynamicCC_tests GraphZeppelinVerifyCC dycon)
+target_link_libraries(hybrid_mpi_dynamicCC_tests PRIVATE GraphZeppelinVerifyCC dycon ${MPI_LIBRARIES})
+
+target_compile_options(hybrid_mpi_dynamicCC_tests PUBLIC -fopenmp)
+target_link_options(hybrid_mpi_dynamicCC_tests PUBLIC -fopenmp)
+
+add_executable(hybrid_shmem_dynamicCC_tests
+  test/hybrid_shmem_test_runner.cpp
+  test/hybrid_shmem_tests.cpp
+
+  src/skiplist.cpp
+  src/graph_tiers.cpp
+  src/batch_tiers.cpp
+  src/sketchless_skiplist.cpp
+  src/euler_tour_tree.cpp
+  src/sketchless_euler_tour_tree.cpp
+  src/link_cut_tree.cpp
+  # src/input_node.cpp
+  # src/tier_node.cpp
+)
+target_compile_definitions(hybrid_shmem_dynamicCC_tests PUBLIC SKETCH_BUFFER_SIZE=${SKETCH_BUFFER_SIZE})
+target_include_directories(hybrid_shmem_dynamicCC_tests PUBLIC include ${MPI_C_INCLUDE_PATH})
+add_dependencies(hybrid_shmem_dynamicCC_tests GraphZeppelinVerifyCC dycon)
+target_link_libraries(hybrid_shmem_dynamicCC_tests PRIVATE GraphZeppelinVerifyCC dycon ${MPI_LIBRARIES} TBB::tbb)
+
+target_compile_options(hybrid_shmem_dynamicCC_tests PUBLIC -fopenmp)
+target_link_options(hybrid_shmem_dynamicCC_tests PUBLIC -fopenmp)
 
 #######
 # TODO: Is MPI INCLUDE PATH necessary?
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..b58ad80
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,119 @@
+# build
+FROM ubuntu:24.04 AS build
+ENV DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends \
+      build-essential \
+      ca-certificates \
+      cmake \
+      git \
+      sudo \
+      gnupg \
+    #   libboost-context-dev \
+    #   libboost-filesystem-dev \
+    #   libboost-program-options-dev \
+    #   libboost-system-dev \
+    #   libboost-thread-dev \
+      libboost-all-dev \
+      libdouble-conversion-dev \
+      libfast-float-dev \
+      libevent-dev \
+      libfmt-dev \
+      libgflags-dev \
+      libgoogle-glog-dev \
+      libjemalloc-dev \
+      libmimalloc-dev \
+      libssl-dev \
+      libunwind-dev \
+      libzstd-dev \
+      ninja-build \
+      openmpi-bin \
+      libopenmpi-dev \
+      python3 \
+      python3-venv \
+      wget \
+      libtbb-dev \
+ && rm -rf /var/lib/apt/lists/*
+ 
+
+WORKDIR /opt/deps
+
+# install the latest version of fast-float
+
+# TODO - checkout a specific tag/release
+RUN git clone https://github.com/fastfloat/fast_float.git \
+ && cd fast_float \
+ && cmake -B build -DFASTFLOAT_TEST=OFF \
+ && sudo cmake --build build --target install
+# build folly + install dependencies we may have missed
+# TODO - would be good to enumerate them
+# RUN git clone https://github.com/facebook/folly.git
+# WORKDIR /opt/deps/folly
+# RUN python3 ./build/fbcode_builder/getdeps.py install-system-deps --recursive
+# ENV FOLLY_PREFIX=/opt/deps/folly/_build/opt/facebook
+# ENV CMAKE_PREFIX_PATH=${FOLLY_PREFIX}:${CMAKE_PREFIX_PATH}
+
+
+# build DynamicQueriesCC in Release
+WORKDIR /opt/dynamiccc
+COPY . .
+# remove old build director
+RUN rm -rf build
+RUN cmake -S . -B build \
+      -DCMAKE_BUILD_TYPE=Release \
+      -DCMAKE_PREFIX_PATH=${FOLLY_PREFIX} \
+      -DSKETCH_BUFFER_SIZE=5 \
+      -DPARLAY_TBB=On \
+      
+ && cmake --build build --target \
+      dynamicCC_tests \
+      mpi_dynamicCC_tests \
+      hybrid_mpi_dynamicCC_tests \
+      hybrid_shmem_dynamicCC_tests \
+      -j "$(nproc)"
+      
+
+# ------------------------------
+
+# runtime
+FROM ubuntu:24.04 AS runtime
+ENV DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends \
+      ca-certificates \
+      git \
+      libboost-context-dev \
+      libboost-filesystem-dev \
+      libboost-program-options-dev \
+      libboost-system-dev \
+      libboost-thread-dev \
+      libdouble-conversion-dev \
+      libevent-dev \
+      libfmt-dev \
+      libgflags-dev \
+      libgoogle-glog-dev \
+      libjemalloc-dev \
+      libmimalloc-dev \
+      libssl-dev \
+      libunwind-dev \
+      libzstd-dev \
+      openmpi-bin \
+      libopenmpi-dev \
+      python3 \
+      wget \
+      libtbb-dev \
+ && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /opt/dynamiccc
+
+# project binaries and libs
+COPY --from=build /opt/dynamiccc/build/*tests /opt/dynamiccc/bin/
+COPY scripts ./scripts
+
+ENV PATH="/opt/dynamiccc/bin:${PATH}"
+ENV LD_LIBRARY_PATH="/opt/dynamiccc/lib:${LD_LIBRARY_PATH}"
+
+
+ENTRYPOINT ["/bin/bash"]
\ No newline at end of file
diff --git a/include/batch_tiers.h b/include/batch_tiers.h
new file mode 100644
index 0000000..1101056
--- /dev/null
+++ b/include/batch_tiers.h
@@ -0,0 +1,206 @@
+#pragma once
+#include "types.h"
+#include <vector>
+#include <atomic>
+#include <parlay/sequence.h>
+#include <parlay/primitives.h>
+// #include <folly/AtomicHashArray.h>
+// #include <folly/concurrency/ConcurrentHashMap.h>
+#include "parlay_hash/unordered_map.h"
+
+#include "euler_tour_tree.h"
+// #include "link_cut_tree.h"
+#include "lct_v2.h"
+#include "union_find_local.h"
+#include "sketchless_euler_tour_tree.h"
+// #include "parlay_hash/unordered_set.h"
+
+template <typename SketchClass = DefaultSketchColumn> requires(SketchColumnConcept<SketchClass, vec_t>)
+class BatchTiers {
+    private:
+        size_t num_nodes;
+        uint64_t seed;
+        // size_t maximum_batch_size = 512;
+        // size_t maximum_batch_size = 100;
+        // size_t maximum_batch_size = 1 << 20;
+        size_t maximum_batch_size = 1 << 20;
+        // size_t maximum_batch_size = 200;
+        // size_t maximum_batch_size = 1 << 14;
+        // size_t maximum_batch_size = 1024;
+        size_t granularity = 1 << 11;  // suggested number of tier-updates per thread 
+        std::vector<EulerTourTree<SketchClass>> ett;  // one ETT for each tier
+        LinkCutTreeMaxAgg<int8_t> link_cut_tree;
+        SketchlessEulerTourTree<> query_ett;
+        std::mutex lct_and_query_ett_lock;
+        parlay::sequence<int32_t> _unique_update_ids;
+        
+        std::vector<GraphUpdate> transaction_log;
+
+        // TODO - add the sketchless ETT for querying 
+        // 
+
+        // "root" nodes for each candidate component at each tier.
+        union_find_local<int32_t> _component_reps_dsu;
+                
+        // static thread_local parlay::sequence<ColumnEntryDelta> _deltas_buffer;
+        // static thread_local SketchClass _scratch_sketch;
+        // matrix of [num_tiers x ( batch_size * 2 )]
+        std::vector<parlay::sequence<SkipListNode<SketchClass>*>> _root_nodes;
+        
+        // jagged array: track isolated components/probably isolated components. 
+        // why are we doing this instead of just using root_nodes?
+        
+        // a vector mapping each tier to the set of its components that need
+        // to be checked for isolation
+        // parlay::sequence<parlay::sequence<SkipListNode<SketchClass>*>> _updated_components;
+        // TODO - see if we can get rid of redundant checks
+        // and only do one PER component. ie if some components share the same
+        // root, we need not check them.
+        // parlay::sequence<parlay::sequence<SkipListNode<SketchClass>*>> _updated_components;
+        parlay::sequence<parlay::sequence<node_id_t>> _updated_components;
+        
+        // tracks components that were already checked for isolation and had their
+        // associated link/cut instructions logged.
+        // parlay::sequence<SkipListNode<SketchClass>*> _current_isolated_components;
+        
+        // key: a root node ptr (to identify same component at current tier)
+        // folly::ConcurrentHashMap<size_t, node_id_t> _already_checked_components;
+        parlay::parlay_unordered_map_direct<size_t, node_id_t> _already_checked_components;
+        
+        
+        // links to "broadcast" to all higher tiers
+        parlay::sequence<Edge> _pending_links;
+        // cuts to "broadcast" to all higher tiers, plus the index of the first tier
+        // where the cut should be made
+        parlay::sequence<std::pair<Edge, uint32_t>> _pending_cuts;
+        
+        parlay::sequence<GraphUpdate> update_buffer;
+        
+        
+        
+    public:
+        BatchTiers(node_id_t num_nodes, uint64_t seed);
+        BatchTiers(node_id_t num_nodes, uint32_t num_tiers, int batch_size, size_t seed);
+        ~BatchTiers();
+        
+        bool is_initialized(node_id_t u) {
+            // no-op with vector implementation
+            return ett[0].is_initialized(u);
+        };
+        
+        void initialize_node(node_id_t u) {
+            for (auto &tree: ett) {
+                tree.initialize_node(u);
+            }
+            query_ett.initialize_node(u);
+            link_cut_tree.initialize_node(u);
+        }
+
+        void uninitialize_node(node_id_t u) {
+            for (auto &tree: ett) {
+                tree.uninitialize_node(u);
+            }
+            query_ett.uninitialize_node(u);
+            link_cut_tree.uninitialize_node(u);
+        }
+        
+        void initialize_all_nodes() {
+            // TODO - parallel_for?
+            for (auto &tree: ett) {
+                tree.initialize_all_nodes(num_nodes);
+            }
+            query_ett.initialize_all_nodes(num_nodes);
+            link_cut_tree.initialize_all_nodes(num_nodes);
+        }
+        
+        void flush_transaction_log() {
+            transaction_log.clear();
+        }
+
+        const std::vector<GraphUpdate>& get_transaction_log() const {
+            return transaction_log;
+        }
+        
+        void process_all_updates() {
+            if (update_buffer.size() > 0) {
+                update_batch(update_buffer);
+                update_buffer.clear();
+            }
+        }
+
+        void update_batch(const parlay::sequence<GraphUpdate> &updates);
+        
+        bool is_tree_edge(node_id_t a, node_id_t b) {
+            return query_ett.has_edge(a, b);
+        }
+        
+        void update(const GraphUpdate &update) {
+            // if (!is_initialized(update.edge.src) || !is_initialized(update.edge.dst)) {
+            //     std::cout << "ruh oh" << std::endl;
+            // }
+            assert(this->is_initialized(update.edge.src));
+            assert(this->is_initialized(update.edge.dst));
+            // add to buffer:
+            update_buffer.push_back(update);
+            // bool is_tree_edge_deletion = (update.type == DELETE &&
+            //                               is_tree_edge(update.edge.src, update.edge.dst));
+            // if (update_buffer.size() >= maximum_batch_size || is_tree_edge_deletion) {
+            if (update_buffer.size() >= maximum_batch_size) {
+                // std::cout << "is_tree_edge_deletion: " << is_tree_edge_deletion << ", buffer size: " << update_buffer.size() << std::endl;
+                // process the batch
+                update_batch(update_buffer);
+                // clear the buffer
+                update_buffer.clear();
+            }
+        }
+        
+        void flush_buffer() {
+            if (update_buffer.size() > 0) {
+                update_batch(update_buffer);
+                update_buffer.clear();
+            }
+        }
+        size_t space_usage_bytes() {
+            size_t total = sizeof(BatchTiers<SketchClass>);
+            for (auto &tree: ett) {
+                total += tree.space_usage_bytes();
+            }
+            // total += query_ett.space_usage_bytes();
+            // total += link_cut_tree.space_usage_bytes();
+            // total += _component_reps_dsu.space_usage_bytes();
+            // total += _already_checked_components.max_size() * (sizeof(size_t) + sizeof(node_id_t) + sizeof(void*)); // rough estimate
+            // total += ((parlay::unordered_map_internal) _already_checked_components).size();
+            // TODO - measure the overhead of the actual batch_ttiers class`
+            return total;
+        }
+
+        bool is_connected(node_id_t a, node_id_t b);
+
+        // query for the connected components of the graph
+        std::vector<std::set<node_id_t>> get_cc();
+        
+        // return the number of tiers
+        size_t num_tiers() const {
+            return ett.size();
+        }
+        // find the index of the highest everywhere-maximal tier.
+        
+        
+    private:
+        SkipListNode<SketchClass>*& root_node(size_t tier, size_t update_idx, bool src_or_dst) {
+            return _root_nodes[tier][update_idx * 2 + (src_or_dst ? 0 : 1)];
+        };
+        void _process_sketch_aggs_only(const parlay::sequence<GraphUpdate> &updates);
+        
+        
+        // same thing but seperates by tiers. this avoids the needs for atomics.
+        void _process_sketch_aggs_tier_sequential(const parlay::sequence<GraphUpdate> &updates);
+        
+        // same thing but use our CAS tricks
+        void _process_sketch_aggs_with_cas(const parlay::sequence<GraphUpdate> &updates);
+
+        uint32_t _search_for_isolated_components(const parlay::sequence<GraphUpdate> &updates);
+        
+        bool _fix_isolations_at_tier(const parlay::sequence<GraphUpdate> &updates, uint32_t tier_idx);
+};
+
diff --git a/include/euler_tour_tree.h b/include/euler_tour_tree.h
index 98d8ff1..59a26ca 100644
--- a/include/euler_tour_tree.h
+++ b/include/euler_tour_tree.h
@@ -3,64 +3,211 @@
 #include <unordered_map>
 
 #include <skiplist.h>
+#include "sketch/sketch_concept.h"
+#include "sketch_interfacing.h"
 
+#include <absl/container/flat_hash_map.h>
 
+
+
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
 class EulerTourNode {
   FRIEND_TEST(EulerTourTreeSuite, random_links_and_cuts);
   FRIEND_TEST(EulerTourTreeSuite, get_aggregate);
   FRIEND_TEST(SkipListSuite, join_split_test);
   FRIEND_TEST(GraphTiersSuite, mini_correctness_test);
   
-  std::unordered_map<EulerTourNode*, SkipListNode*> edges;
+  std::unordered_map<EulerTourNode<SketchClass>*, SkipListNode<SketchClass>*> edges;
 
-  Sketch* temp_sketch = nullptr;
   long seed = 0;
 
-  SkipListNode* make_edge(EulerTourNode* other, Sketch* temp_sketch);
-  void delete_edge(EulerTourNode* other, Sketch* temp_sketch);
+  SkipListNode<SketchClass>* make_edge(EulerTourNode<SketchClass>* other, SketchClass &temp_sketch);
+  SkipListNode<SketchClass>* make_edge(EulerTourNode<SketchClass>* other);
+  void delete_edge(EulerTourNode<SketchClass>* other, SketchClass &temp_sketch);
 
 public:
   const node_id_t vertex = 0;
   const uint32_t tier = 0;
-  SkipListNode* allowed_caller = nullptr;
+  SkipListNode<SketchClass>* allowed_caller = nullptr;
 
   EulerTourNode(long seed, node_id_t vertex, uint32_t tier);
   EulerTourNode(long seed);
   ~EulerTourNode();
-  bool link(EulerTourNode& other, Sketch* temp_sketch);
-  bool cut(EulerTourNode& other, Sketch* temp_sketch);
+  bool link(EulerTourNode<SketchClass>& other, SketchClass &temp_sketch);
+  bool cut(EulerTourNode<SketchClass>& other, SketchClass &temp_sketch);
 
   bool isvalid() const;
 
-  Sketch* get_sketch(SkipListNode* caller);
-  SkipListNode* update_sketch(vec_t update_idx);
+  SketchClass& get_sketch(SkipListNode<SketchClass>* caller);
+  SkipListNode<SketchClass>* update_sketch(vec_t update_idx);
+  SkipListNode<SketchClass>* update_sketch(const ColumnEntryDelta &delta);
+  SkipListNode<SketchClass>* update_sketch(const ColumnEntryDeltas &deltas);
+  SkipListNode<SketchClass>* update_sketch(const SketchClass &sketch);
+  SkipListNode<SketchClass>* update_sketch_atomic(vec_t update_idx);
+  SkipListNode<SketchClass>* update_sketch_atomic(const ColumnEntryDelta &delta);
+  SkipListNode<SketchClass>* update_sketch_atomic(const ColumnEntryDeltas &deltas);
+
+  SkipListNode<SketchClass>* get_allowed_caller() {
+      return this->allowed_caller;
+  }
+  
+  // update just this node's sketch
+  // plus return the allowed caller
+  SkipListNode<SketchClass>* update_sketch_noagg_atomic(const ColumnEntryDelta &delta);
+  // void update_sketch_noagg_atomic(const SketchClass &sketch);
+  SkipListNode<SketchClass>* update_sketch_atomic_to_level(const ColumnEntryDelta &delta, uint32_t level);
+  
+  //recompute the parent aggregates
+  void recompute_aggregates_parallel();
+
+  const ColumnEntryDelta generate_entry_delta(vec_t update) const {
+    return this->allowed_caller->sketch_agg.generate_entry_delta(update);
+  }
 
-  SkipListNode* get_root();
+  SkipListNode<SketchClass>* get_root() const;
 
-  Sketch* get_aggregate();
+  const SketchClass& get_aggregate();
   uint32_t get_size();
-  bool has_edge_to(EulerTourNode* other);
+  bool has_edge_to(EulerTourNode<SketchClass>* other);
 
-  std::set<EulerTourNode*> get_component();
+  std::set<EulerTourNode<SketchClass>*> get_component();
 
   long get_seed() {return seed;};
 
-  friend std::ostream& operator<<(std::ostream& os, const EulerTourNode& ett);
+  template <typename T> requires(SketchColumnConcept<T, vec_t>)
+  friend std::ostream& operator<<(std::ostream& os, const EulerTourNode<T>& ett);
 };
 
+
+using VectorContainer = std::vector<EulerTourNode<DefaultSketchColumn>>;
+using HashmapContainer = absl::flat_hash_map<node_id_t, EulerTourNode<DefaultSketchColumn>*>;
+
+template <typename SketchClass = DefaultSketchColumn, 
+// typename Container = std::vector<EulerTourNode<SketchClass>>>
+typename Container = absl::flat_hash_map<node_id_t, EulerTourNode<SketchClass>*>>
+requires(SketchColumnConcept<SketchClass, vec_t>)
 class EulerTourTree {
-  Sketch* temp_sketch;
+  SketchClass temp_sketch;
+private:
+  size_t seed;
+  node_id_t max_num_nodes;
+  uint32_t tier_num;
 public:
-  std::vector<EulerTourNode> ett_nodes;
+  // std::vector<EulerTourNode<SketchClass>> ett_nodes;
+  // absl::flat_hash_map<node_id_t, EulerTourNode<SketchClass>*> ett_nodes;
+  Container ett_nodes;
+  
   
-  EulerTourTree(node_id_t num_nodes, uint32_t tier_num, int seed);
+  EulerTourTree(node_id_t max_num_nodes, uint32_t tier_num, int seed);
+
+  EulerTourNode<SketchClass>& ett_node(node_id_t u) {
+    if constexpr (std::is_same_v<Container, std::vector<EulerTourNode<SketchClass>>>) {
+        assert(u < ett_nodes.size());
+        return ett_nodes[u];
+    } else {
+        assert(ett_nodes.find(u) != ett_nodes.end());
+        return *ett_nodes[u];
+    }
+  }
+  
+  void initialize_node(node_id_t u) {
+    // no-op with vector implementation
+    if constexpr (!std::is_same_v<Container, std::vector<EulerTourNode<SketchClass>>>) {
+        // assert(ett_nodes.find(u) == ett_nodes.end());
+        // TODO - this is kinda gross - fix later
+        if (ett_nodes.find(u) == ett_nodes.end())
+          ett_nodes[u] = new EulerTourNode<SketchClass>(this->seed, u, this->tier_num);
+    }
+  };
+  void uninitialize_node(node_id_t u) {
+    // no-op with vector implementation
+    if constexpr (!std::is_same_v<Container, std::vector<EulerTourNode<SketchClass>>>) {
+        assert(ett_nodes.find(u) != ett_nodes.end());
+        delete ett_nodes[u];
+        // TODO - actually delete form ett
+    }
+  };
+  
+  void initialize_all_nodes() {
+    for (node_id_t i = 0; i < max_num_nodes; ++i) {
+        initialize_node(i);
+    }
+  };
+  void initialize_all_nodes(node_id_t until) {
+    assert(until <= max_num_nodes);
+    for (node_id_t i = 0; i < until; ++i) {
+        initialize_node(i);
+    }
+  }
+  bool is_initialized(node_id_t u) {
+    // no-op with vector implementation
+    if constexpr (std::is_same_v<Container, std::vector<EulerTourNode<SketchClass>>>) {
+        return true;
+    } else {
+        return ett_nodes.find(u) != ett_nodes.end();
+    }
+  };
 
   void link(node_id_t u, node_id_t v);
   void cut(node_id_t u, node_id_t v);
   bool has_edge(node_id_t u, node_id_t v);
-  SkipListNode* update_sketch(node_id_t u, vec_t update_idx);
-  std::pair<SkipListNode*, SkipListNode*> update_sketches(node_id_t u, node_id_t v, vec_t update_idx);
-  SkipListNode* get_root(node_id_t u);
-  Sketch* get_aggregate(node_id_t u);
+  SkipListNode<SketchClass>* update_sketch(node_id_t u, vec_t update_idx);
+  SkipListNode<SketchClass>* update_sketch(node_id_t u, const ColumnEntryDelta &delta);
+  SkipListNode<SketchClass>* update_sketch(node_id_t u, const ColumnEntryDeltas &deltas);
+  SkipListNode<SketchClass>* update_sketch(node_id_t u, const SketchClass &sketch);
+  SkipListNode<SketchClass>* update_sketch_atomic(node_id_t u, vec_t update_idx);
+  SkipListNode<SketchClass>* update_sketch_atomic(node_id_t u, const ColumnEntryDelta &delta);
+  SkipListNode<SketchClass>* update_sketch_atomic(node_id_t u, const ColumnEntryDeltas &deltas);
+  
+  // returns the allowed caller
+  // SkipListNode<SketchClass>* update_sketch_noagg_atomic(const ColumnEntryDelta &delta);
+  // void update_sketch_noagg_atomic(const SketchClass &sketch);
+  
+  //recompute the parent aggregates
+  void recompute_aggregates_parallel();
+
+  ColumnEntryDelta generate_entry_delta(node_id_t u, vec_t update) {
+      // TODO - the specific node isnt actually meaningful here.
+      return ett_node(u).generate_entry_delta(update);
+  }
+
+  std::pair<SkipListNode<SketchClass>*, SkipListNode<SketchClass>*> update_sketches(node_id_t u, node_id_t v, vec_t update_idx);
+  SkipListNode<SketchClass>* get_root(node_id_t u);
+  const SketchClass& get_aggregate(node_id_t u);
   uint32_t get_size(node_id_t u);
+  uint32_t num_components() {
+    std::set<void*> roots;
+    for (node_id_t i = 0; i < ett_nodes.size(); ++i) {
+      if (!is_initialized(i)) {
+        continue;
+      }
+      auto root = ett_node(i).get_root();
+      roots.insert(root);
+    }
+    return roots.size();
+  }
+  size_t space_usage_bytes() {
+    size_t total = 0;
+    if constexpr (std::is_same_v<Container, std::vector<EulerTourNode<SketchClass>>>) {
+        total += sizeof(EulerTourNode<SketchClass>) * ett_nodes.capacity();
+    } else {
+        size_t num_buckets = ett_nodes.bucket_count();
+        total += sizeof(std::pair<node_id_t, EulerTourNode<SketchClass>>*) * num_buckets;
+    }
+    std::unordered_set<SkipListNode<SketchClass>*> roots;
+    for (node_id_t i = 0; i < ett_nodes.size(); ++i) {
+      if constexpr (!std::is_same_v<Container, std::vector<EulerTourNode<SketchClass>>>) {
+        if (ett_nodes.find(i) == ett_nodes.end()) {
+          continue;
+        }
+      }
+      SkipListNode<SketchClass>* root = ett_node(i).get_root();
+      roots.insert(root);
+    }
+    for (SkipListNode<SketchClass>* root : roots) {
+      total += root->compute_space_usage();
+    }
+    return total;
+  }
 };
+  
diff --git a/include/graph_tiers.h b/include/graph_tiers.h
index 2d71859..8b627ec 100644
--- a/include/graph_tiers.h
+++ b/include/graph_tiers.h
@@ -6,6 +6,7 @@
 
 #include "euler_tour_tree.h"
 #include "link_cut_tree.h"
+// #include "lct_v2.h"
 
 
 // Global variables for performance testing
@@ -24,16 +25,18 @@ extern std::atomic<long> num_sketch_batches;
 
 // maintains the tiers of the algorithm
 // and the spanning forest of the entire graph
+template <typename SketchClass = DefaultSketchColumn> requires(SketchColumnConcept<SketchClass, vec_t>)
 class GraphTiers {
-  FRIEND_TEST(GraphTiersSuite, mini_correctness_test);
+  // FRIEND_TEST(GraphTiersSuite, mini_correctness_test);
 private:
-  std::vector<EulerTourTree> ett;  // one ETT for each tier
-  std::vector<SkipListNode*> root_nodes;
-  LinkCutTree link_cut_tree;
-  void refresh(GraphUpdate update);
+  std::vector<EulerTourTree<SketchClass>> ett;  // one ETT for each tier
+  std::vector<SkipListNode<SketchClass>*> root_nodes;
+  // LinkCutTreeMaxAgg<int8_t> link_cut_tree;
+  LinkCutTree<> link_cut_tree;
+  void refresh(GraphUpdate update, bool did_cut);
 
 public:
-  GraphTiers(node_id_t num_nodes);
+  GraphTiers(node_id_t num_nodes, uint64_t seed);
   ~GraphTiers();
 
   // apply an edge update
diff --git a/include/lct_v2.h b/include/lct_v2.h
new file mode 100644
index 0000000..fca4dc1
--- /dev/null
+++ b/include/lct_v2.h
@@ -0,0 +1,381 @@
+#pragma once
+
+#include <algorithm>
+#include <cstddef>
+#include <functional>
+#include <limits>
+#include <type_traits>
+#include <utility>
+
+#include "types.h"
+#include "util.h"
+
+#include <absl/container/flat_hash_map.h>
+
+
+template <typename WeightT>
+class NodeMaxLCT {
+  public:
+    using NodePair = std::pair<NodeMaxLCT<WeightT>*, NodeMaxLCT<WeightT>*>;
+    // static_assert(std::is_integral_v<WeightT>, "WeightT must be an integral type");
+
+   NodeMaxLCT(node_id_t node_id);
+ 
+   void link(NodeMaxLCT* child, WeightT weight);
+   void cut(NodeMaxLCT* neighbor);
+   void evert(); // reroot
+   node_id_t get_node_id() const { return node_id; };
+   NodeMaxLCT* get_root();
+  std::pair<NodePair, WeightT> path_query(NodeMaxLCT<WeightT>* other);
+ 
+  private:
+
+   static constexpr WeightT sentinel() {
+     return std::numeric_limits<WeightT>::lowest();
+   }
+
+   NodeMaxLCT<WeightT>* par; // parent
+   NodeMaxLCT<WeightT>* c[2]; // children
+   WeightT w[2]; // store the weights of the up and down preferred edges
+   WeightT max; // maintain the maximum edge weight in the splay tree subtree rooted at this
+   node_id_t node_id;
+   bool head; // whether the node is a head of a path, so don't use value w[0]
+   bool flip; // whether children are reversed; used for evert()
+
+   NodeMaxLCT<WeightT>* get_real_par();
+   NodeMaxLCT<WeightT>* get_leftmost();
+   NodeMaxLCT<WeightT>* get_predecessor();
+   NodeMaxLCT<WeightT>* get_successor();
+   NodePair get_edge_with_weight(WeightT weight);
+   void rot();
+   void splay();
+   NodeMaxLCT* expose();
+   void fix_c();
+   void recompute_max();
+   void push_flip();
+
+ };
+
+
+template <
+typename WeightT,
+typename Container = absl::flat_hash_map<node_id_t, NodeMaxLCT<WeightT>*>
+// typename Container = std::vector<NodeMaxLCT<WeightT>>
+>
+class LinkCutTreeMaxAgg {
+ public:
+  static_assert(std::is_integral_v<WeightT>, "WeightT must be an integral type");
+
+  explicit LinkCutTreeMaxAgg(int _num_verts);
+  LinkCutTreeMaxAgg(node_id_t n) : LinkCutTreeMaxAgg(static_cast<int>(n)) {}
+  ~LinkCutTreeMaxAgg();
+  
+  void link(node_id_t u, node_id_t v, WeightT weight = WeightT{});
+  void link(node_id_t u, node_id_t v, std::pair<Edge, WeightT> weight) { link(u, v, weight.second); }
+  void cut(node_id_t u, node_id_t v);
+  bool connected(node_id_t u, node_id_t v);
+  std::pair<Edge, WeightT> path_query(node_id_t u, node_id_t v);
+  size_t space_usage_bytes() const;
+ private:
+  Container verts;
+  size_t num_verts;
+  NodeMaxLCT<WeightT>& vert(node_id_t id) { 
+    if constexpr (std::is_same_v<Container, std::vector<NodeMaxLCT<WeightT>>>) {
+        assert(id < verts.size());
+        return verts[id];
+    } else {
+        assert(verts.find(id) != verts.end());
+        return *verts[id];
+    }
+  }
+  NodeMaxLCT<WeightT>* vert_ptr(node_id_t id) { 
+    if constexpr (std::is_same_v<Container, std::vector<NodeMaxLCT<WeightT>>>) {
+        assert(id < verts.size());
+        return &verts[id];
+    } else {
+        assert(verts.find(id) != verts.end());
+        return verts[id];
+    }
+  }
+public:
+  void initialize_node(node_id_t v) {
+      // no-op with vector implementation
+      if constexpr (!std::is_same_v<Container, std::vector<NodeMaxLCT<WeightT>>>) {
+          assert(verts.find(v) == verts.end());
+          verts[v] = new NodeMaxLCT<WeightT>(v);
+      }
+  }
+  void uninitialize_node(node_id_t v) {
+      // no-op with vector implementation
+      if constexpr (!std::is_same_v<Container, std::vector<NodeMaxLCT<WeightT>>>) {
+          assert(verts.find(v) != verts.end());
+          delete verts[v];
+      }
+  }
+  void initialize_all_nodes() {
+      for (node_id_t i = 0; i < num_verts; ++i) {
+          initialize_node(i);
+      }
+  }
+  void initialize_all_nodes(node_id_t until) {
+      assert(until <= num_verts);
+      for (node_id_t i = 0; i < until; ++i) {
+          initialize_node(i);
+      }
+  }
+};
+ 
+template <typename WeightT>
+NodeMaxLCT<WeightT>::NodeMaxLCT(node_id_t node_id) : par(nullptr), c{nullptr, nullptr}, w{sentinel(), sentinel()},
+     max(sentinel()), node_id(node_id), head(true), flip(false) {}
+ 
+template <typename WeightT>
+NodeMaxLCT<WeightT>* NodeMaxLCT<WeightT>::get_real_par() {
+   return par != nullptr && this != par->c[0] && this != par->c[1] ? nullptr : par;
+ }
+ 
+template <typename WeightT>
+NodeMaxLCT<WeightT>* NodeMaxLCT<WeightT>::get_leftmost() {
+   NodeMaxLCT* left = this;
+   push_flip();
+   while (left->c[0] != nullptr) {
+     left = left->c[0];
+     left->push_flip();
+   }
+   left->splay();
+   return left;
+ }
+ 
+template <typename WeightT>
+NodeMaxLCT<WeightT>* NodeMaxLCT<WeightT>::get_predecessor() {
+   push_flip();
+   NodeMaxLCT* curr = c[0];
+   curr->push_flip();
+   while (curr->c[1] != nullptr) {
+     curr = curr->c[1];
+     curr->push_flip();
+   }
+   curr->splay();
+   return curr;
+ }
+ 
+template <typename WeightT>
+NodeMaxLCT<WeightT>* NodeMaxLCT<WeightT>::get_successor() {
+   push_flip();
+   NodeMaxLCT* curr = c[1];
+   curr->push_flip();
+   while (curr->c[0] != nullptr) {
+     curr = curr->c[0];
+     curr->push_flip();
+   }
+   curr->splay();
+   return curr;
+ }
+ 
+template <typename WeightT>
+typename NodeMaxLCT<WeightT>::NodePair NodeMaxLCT<WeightT>::get_edge_with_weight(WeightT weight) {
+   NodeMaxLCT* node = this;
+   while (node->w[0] != weight && node->w[1] != weight) {
+     for (int i = 0; i < 2; i++)
+       if (node->c[i] != nullptr && node->c[i]->max == weight)
+         node = node->c[i];
+   }
+   node->splay();
+   if (node->w[0] == weight)
+     return {node, node->get_predecessor()};
+   return {node, node->get_successor()};
+ }
+ 
+ 
+template <typename WeightT>
+void NodeMaxLCT<WeightT>::fix_c() {
+   for (int i = 0; i < 2; i++)
+     if (c[i] != nullptr)
+       c[i]->par = this;
+ }
+ 
+template <typename WeightT>
+void NodeMaxLCT<WeightT>::recompute_max() {
+   max = head ? w[1]: std::max(w[0], w[1]);
+   for (int i = 0; i < 2; i++)
+     if (c[i] != nullptr)
+       max = std::max(max, c[i]->max);
+ }
+ 
+template <typename WeightT>
+void NodeMaxLCT<WeightT>::push_flip() {
+   if (flip) {
+     flip = false;
+     std::swap(c[0], c[1]);
+     std::swap(w[0], w[1]);
+     for (int i = 0; i < 2; i++)
+       if (c[i] != nullptr)
+         c[i]->flip = !c[i]->flip;
+   }
+ }
+ 
+template <typename WeightT>
+void NodeMaxLCT<WeightT>::rot() { // rotate v towards its parent; v must have real parent
+   NodeMaxLCT* p = get_real_par();
+   par = p->par;
+   if (par != nullptr)
+     for (int i = 0; i < 2; i++)
+       if (par->c[i] == p) {
+         par->c[i] = this;
+         par->fix_c();
+       }
+   const bool rot_dir = this == p->c[0];
+   p->c[!rot_dir] = c[rot_dir];
+   c[rot_dir] = p;
+   p->fix_c();
+   p->recompute_max();
+   fix_c();
+   recompute_max();
+ }
+ 
+template <typename WeightT>
+void NodeMaxLCT<WeightT>::splay() {
+   NodeMaxLCT* p, * gp;
+   push_flip(); // guarantee flip bit isn't set after calling splay()
+   while ((p = get_real_par()) != nullptr) {
+     gp = p->get_real_par();
+     if (gp != nullptr)
+       gp->push_flip();
+     p->push_flip();
+     push_flip();
+     if (gp != nullptr)
+       ((gp->c[0] == p) == (p->c[0] == this) ? p : this)->rot();
+     rot();
+   }
+ }
+ 
+// returns the root of the tree
+template <typename WeightT>
+NodeMaxLCT<WeightT>* NodeMaxLCT<WeightT>::expose() {
+   NodeMaxLCT* curr = this;
+   NodeMaxLCT* prev = nullptr;
+   while (curr) {
+     curr->splay();
+     NodeMaxLCT* lower = curr->c[1];
+     curr->c[1] = prev;
+     curr->w[1] = sentinel();
+     if (prev) {
+       curr->w[1] = prev->w[0];
+       prev->head = false;
+       prev->recompute_max();
+     }
+     curr->recompute_max();
+     if (lower) {
+       NodeMaxLCT* left = lower->get_leftmost();
+       left->head = true;
+       left->recompute_max();
+     }
+     prev = curr->get_leftmost();
+     curr = prev->par;
+   }
+   return prev;
+ }
+ 
+template <typename WeightT>
+void NodeMaxLCT<WeightT>::evert() {
+   NodeMaxLCT* head_node = expose();
+   head_node->flip = !head_node->flip;
+   head_node->push_flip();
+ }
+ 
+template <typename WeightT>
+NodeMaxLCT<WeightT>* NodeMaxLCT<WeightT>::get_root() {
+   return expose();
+ }
+
+template <typename WeightT>
+auto NodeMaxLCT<WeightT>::path_query(NodeMaxLCT<WeightT>* other) -> std::pair<std::pair<NodeMaxLCT<WeightT>*, NodeMaxLCT<WeightT>*>, WeightT> {
+   evert();
+   other->expose();
+   std::pair<std::pair<NodeMaxLCT<WeightT>*, NodeMaxLCT<WeightT>*>, WeightT> max_edge;
+   max_edge.first = get_edge_with_weight(max);
+   max_edge.second = max; 
+   return max_edge;
+ }
+ 
+template <typename WeightT>
+void NodeMaxLCT<WeightT>::cut(NodeMaxLCT* neighbor) {
+   neighbor->evert();
+   evert();
+   neighbor->push_flip();
+   push_flip();
+   neighbor->c[0] = nullptr;
+   neighbor->w[0] = sentinel();
+   neighbor->recompute_max();
+   par = nullptr;
+   w[1] = sentinel();
+   recompute_max();
+ }
+ 
+template <typename WeightT>
+void NodeMaxLCT<WeightT>::link(NodeMaxLCT* child, WeightT weight) {
+   child->evert();
+   child->splay();
+   child->par = this;
+   child->w[0] = weight;
+   child->head = true;
+ }
+ 
+template <typename WeightT, typename Container>
+LinkCutTreeMaxAgg<WeightT, Container>::LinkCutTreeMaxAgg(int _num_verts) : num_verts(static_cast<size_t>(_num_verts)) {
+    if constexpr (std::is_same_v<Container, std::vector<NodeMaxLCT<WeightT>>>) {
+        verts.resize(static_cast<size_t>(_num_verts), NodeMaxLCT<WeightT>(0));
+        for (node_id_t i = 0; i < num_verts; ++i) {
+            verts[static_cast<size_t>(i)] = NodeMaxLCT<WeightT>(i);
+        }
+    }
+}
+
+template <typename WeightT, typename Container>
+LinkCutTreeMaxAgg<WeightT, Container>::~LinkCutTreeMaxAgg() {
+  if constexpr (std::is_same_v<Container, std::vector<NodeMaxLCT<WeightT>>>) {
+      verts.clear();
+  }
+}
+
+template <typename WeightT, typename Container>
+void LinkCutTreeMaxAgg<WeightT, Container>::link(node_id_t u, node_id_t v, WeightT weight) {
+  const auto u_idx = static_cast<size_t>(u);
+  const auto v_idx = static_cast<size_t>(v);
+  vert(u_idx).link(vert_ptr(v_idx), weight);
+}
+
+template <typename WeightT, typename Container>
+void LinkCutTreeMaxAgg<WeightT, Container>::cut(node_id_t u, node_id_t v) {
+  const auto u_idx = static_cast<size_t>(u);
+  const auto v_idx = static_cast<size_t>(v);
+  vert(u_idx).cut(vert_ptr(v_idx));
+}
+
+template <typename WeightT, typename Container>
+bool LinkCutTreeMaxAgg<WeightT, Container>::connected(node_id_t u, node_id_t v) {
+  const auto u_idx = static_cast<size_t>(u);
+  const auto v_idx = static_cast<size_t>(v);
+  return vert(u_idx).get_root() == vert(v_idx).get_root();
+}
+
+template <typename WeightT, typename Container>
+std::pair<Edge, WeightT> LinkCutTreeMaxAgg<WeightT, Container>::path_query(node_id_t u, node_id_t v) {
+  const auto u_idx = static_cast<size_t>(u);
+  const auto v_idx = static_cast<size_t>(v);
+  auto pointer_edge = vert(u_idx).path_query(vert_ptr(v_idx));
+  std::pair<Edge, WeightT> edge;
+  edge.first.src = static_cast<node_id_t>(pointer_edge.first.first->get_node_id());
+  edge.first.dst = static_cast<node_id_t>(pointer_edge.first.second->get_node_id());
+  edge.second = pointer_edge.second;
+  return edge;
+}
+
+template <typename WeightT, typename Container>
+size_t LinkCutTreeMaxAgg<WeightT, Container>::space_usage_bytes() const{
+   size_t max_space = sizeof(LinkCutTreeMaxAgg<WeightT, Container>) + (num_verts * (sizeof(NodeMaxLCT<WeightT>*) + sizeof(NodeMaxLCT<WeightT>)));
+   return max_space;
+}
+
+
+template class LinkCutTreeMaxAgg<int8_t>;
\ No newline at end of file
diff --git a/include/link_cut_tree.h b/include/link_cut_tree.h
index bf4356a..a89e11e 100644
--- a/include/link_cut_tree.h
+++ b/include/link_cut_tree.h
@@ -1,12 +1,15 @@
 #pragma once
 
 #include <gtest/gtest.h>
+#include <algorithm>
 #include "types.h"
 #include "util.h"
 
+#include <absl/container/flat_hash_map.h>
+
 #define MAX_UINT64 (std::numeric_limits<uint64_t>::max())
-class LinkCutTree;
-class SplayTree;
+// class LinkCutTree<>;
+// class SplayTree;
 
 class LinkCutNode {
   FRIEND_TEST(LinkCutTreeSuite, random_links_and_cuts);
@@ -36,6 +39,10 @@ class LinkCutNode {
   void rotate_up();
 
   public:
+    // delete copy constructor and assignment operator
+    // LinkCutNode(const LinkCutNode&) = delete;
+    // LinkCutNode& operator=(const LinkCutNode&) = delete;
+    
     LinkCutNode* splay();
 
     void link_left(LinkCutNode* left);
@@ -70,12 +77,16 @@ class LinkCutNode {
     bool get_reversed();
 };
 
+template <
+// typename Container = std::vector<LinkCutNode>>
+typename Container = absl::flat_hash_map<node_id_t, LinkCutNode*>>
 class LinkCutTree {
   FRIEND_TEST(LinkCutTreeSuite, join_split_test);
   FRIEND_TEST(LinkCutTreeSuite, expose_simple_test);
   FRIEND_TEST(LinkCutTreeSuite, random_links_and_cuts);
   
-  std::vector<LinkCutNode> nodes;
+  Container nodes;
+  node_id_t max_nodes;
 
   // Concatenate the paths with aux trees rooted at v and w and return the root of the combined aux tree
   LinkCutNode* join(LinkCutNode* v, LinkCutNode* w);
@@ -104,4 +115,60 @@ class LinkCutTree {
 
     // Query for the CC algorithm
     std::vector<std::set<node_id_t>> get_cc();
+    
+    LinkCutNode& node(node_id_t u) {
+        if constexpr (std::is_same_v<Container, std::vector<LinkCutNode>>) {
+            assert(u < nodes.size());
+            return nodes[u];
+        } else {
+            assert(nodes.find(u) != nodes.end());
+            return *nodes[u];
+        }
+    }
+    
+    LinkCutNode* get_node_ptr(node_id_t u) {
+        if constexpr (std::is_same_v<Container, std::vector<LinkCutNode>>) {
+            assert(u < nodes.size());
+            return &nodes[u];
+        } else {
+            assert(nodes.find(u) != nodes.end());
+            return nodes[u];
+        }
+    }
+    
+    void initialize_node(node_id_t u) {
+      // no-op with vector implementation
+      if constexpr (!std::is_same_v<Container, std::vector<LinkCutNode>>) {
+          nodes[u] = new LinkCutNode();
+      }
+    };
+    void uninitialize_node(node_id_t u) {
+      // no-op with vector implementation
+      if constexpr (!std::is_same_v<Container, std::vector<LinkCutNode>>) {
+          assert(nodes.find(u) != nodes.end());
+          delete nodes[u];
+      }
+    };
+
+    void initialize_all_nodes() {
+      for (node_id_t i = 0; i < max_nodes; ++i) {
+          initialize_node(i);
+      }
+    };
+
+    void initialize_all_nodes(node_id_t upto) {
+      for (node_id_t i = 0; i < upto; ++i) {
+          initialize_node(i);
+      }
+    };
+
+    bool is_initialized(node_id_t u) {
+      // no-op with vector implementation
+      if constexpr (std::is_same_v<Container, std::vector<LinkCutNode>>) {
+          return true;
+      } else {
+          return nodes.find(u) != nodes.end();
+      }
+    };
+    
 };
diff --git a/include/mpi_hybrid_conn.h b/include/mpi_hybrid_conn.h
new file mode 100644
index 0000000..5af1d0c
--- /dev/null
+++ b/include/mpi_hybrid_conn.h
@@ -0,0 +1,559 @@
+#include "mpi_nodes.h"
+#include "graph_tiers.h"
+#include <dycon/localTree/SCCWN.hpp>
+#include "recovery.h"
+
+template <typename T>
+concept DynamicSketchConcept = requires(T t) {
+    { t.process_all_updates()} -> std::same_as<void>;
+    { t.initialize_node( std::declval<node_id_t>() ) } -> std::same_as<void>;
+    { t.uninitialize_node( std::declval<node_id_t>() ) } -> std::same_as<void>;
+    { t.initialize_all_nodes() } -> std::same_as<void>;
+    { t.get_transaction_log() } -> std::same_as<const std::vector<GraphUpdate>&>;
+    { t.update( std::declval<GraphUpdate>() ) } -> std::same_as<void>;
+    { t.space_usage_bytes() } -> std::same_as<size_t>;
+};
+
+template <typename SketchAlgoClass = InputNode> requires(DynamicSketchConcept<SketchAlgoClass>)
+class HybridConnectivityManager {
+    // TODO 
+    public:
+        // TODO - make this not public
+        SketchAlgoClass sketching_algo;
+        SCCWN<> cf_algo;
+        
+        void set_threshold(size_t threshold) {
+            // TODO - do this in an aesthetically better way lol.
+            DENSE_THRESHOLD = threshold;
+        }
+        node_id_t sketched_node_count() const {
+            return this->recovery_sketches.size();
+        }
+    private:
+        // TODO - this aint a great way
+        size_t MOVE_TO_SKETCH = 40;
+        size_t DENSE_THRESHOLD = 2000;
+        // size_t MOVE_TO_SKETCH = 1000000;
+        
+        size_t seed;
+        node_id_t num_nodes;
+        // GraphTiers<DefaultSketchColumn> sketching_algo;
+        // TODO - move semantics for sparserecovery?
+        absl::flat_hash_map<node_id_t, SparseRecovery*> recovery_sketches;
+        
+        
+        // tracks which of our CF edges are from the sketching algo
+        absl::flat_hash_set<edge_id_t> edges_from_sketch;
+        
+        // tracks how many dense edges are still in the CF
+        // generate plot with varying batch size
+        // keeping a global buffer is likely sufficient
+        // doing vertex-level might make checkpointing harder - think about this
+        std::vector<uint16_t> num_pending_dense_edges;
+        
+        // tracks total amount of edges incident.
+        // we WONT rely on the CF to track edges.
+        std::vector<uint32_t> num_edges;
+        std::vector<uint32_t> num_cf_edges;
+
+        size_t total_num_edges = 0;
+        size_t total_sketched_edges = 0;
+        
+        // buffer for when we need to collect all neighbors
+        std::vector<node_id_t> _neighbors_buffer;
+        
+        // non-tree deletion buffer
+        std::vector<edge_id_t> non_tree_deletion_buffer;
+
+        // TODO - this might be replaced by something internal to modified-cupcake
+        // can also just be a vector probably
+        absl::flat_hash_set<node_id_t> _is_vertex_sketched;
+
+        
+        size_t count_explicit_neighbors(node_id_t vertex) {
+            // std::cout << "count_explicit_neighbors for vertex: " << vertex << std::endl;
+            localTree *cf_leaf = cf_algo.leaves[vertex];
+            size_t count = 0;
+            for (auto &level_edges: cf_leaf->vertex->E) {
+                count += level_edges.second->size();
+            }
+            // if (num_cf_edges[vertex] > 1000) {
+            // if (count > 1000) {
+            //     std::cout << "THIS IS WAY TOO HIGH: " << num_cf_edges[vertex] << std::endl;
+            //     std::cout << "  counted as" << count << std::endl;
+            //     std::cout << " total degree: " << num_edges[vertex] << std::endl;
+            //     std::cout << " num pending dense edges: " << num_pending_dense_edges[vertex] << std::endl;
+            //     // std::cout << "count_explicit_neighbors for vertex: " << vertex << " returning cached value: " << num_cf_edges[vertex] << std::endl;
+            //     // return cf_algo.leaves[vertex]->getEdgeLevelCount();
+            //     // return cf_algo.leaves[vertex]->getEdgeLevelCount();
+            // }
+            return num_cf_edges[vertex];
+            // return count;
+        }
+        
+        inline void insert_to_sketch(node_id_t u, node_id_t v) {
+            node_id_t src = std::min(u, v);
+            node_id_t dst = std::max(u, v);
+            sketching_algo.update(GraphUpdate{Edge{u, v}, INSERT});
+            auto edge_id = concat_pairing_fn(u, v);
+            recovery_sketches[u]->update(edge_id);
+            recovery_sketches[v]->update(edge_id);
+            total_sketched_edges++;
+        }
+        inline void delete_from_sketch(node_id_t u, node_id_t v) {
+            node_id_t src = std::min(u, v);
+            node_id_t dst = std::max(u, v);
+            sketching_algo.update(GraphUpdate{Edge{u, v}, DELETE});
+            auto edge_id = concat_pairing_fn(u, v);
+            recovery_sketches[u]->update(edge_id);
+            recovery_sketches[v]->update(edge_id);
+            total_sketched_edges--;
+        }
+        
+        bool is_forest_edge_from_sketch(Edge edge) {
+            // TODO - watch out for performance penalty of this.
+            // might be a reason to use an alternate scheme
+            return edges_from_sketch.find(concat_pairing_fn(edge.src, edge.dst)) != edges_from_sketch.end();
+        }
+        
+        bool is_edge_in_cf(Edge edge) {
+            // TODO - watch out for performance penalty of this.
+            // might be a reason to use an alternate scheme
+            // return cf_algo.leaves[edge.src]->getEdgeLevel(edge.dst) != MAX_LEVEL + 2; 
+            // auto ret = cf_algo.leaves[edge.src]->getEdgeLevel(edge.dst) <= MAX_LEVEL;
+            return cf_algo.leaves[edge.src]->getEdgeLevel(edge.dst) <= MAX_LEVEL; 
+        }
+
+        bool is_vertex_sketched(node_id_t vertex) {
+            return _is_vertex_sketched.find(vertex) != _is_vertex_sketched.end();
+        }
+        
+        void initialize_vertex_sketch(node_id_t vertex) {
+            // std::cout << "Initializing sketch for vertex " << vertex << std::endl << " with neighbors count "
+                    //   << count_explicit_neighbors(vertex) << std::endl;
+            // TODO - is basically a no-op from the perspective of the sketching algo
+            if (is_vertex_sketched(vertex)) {
+                return;
+            }
+            sketching_algo.initialize_node(vertex);
+            _is_vertex_sketched.insert(vertex);
+            // TODO - realistically, we dont need THAT many recovery sketches
+            // i think 5 sample should be enough?
+            double cleanup_adjustment_factor = 5.0 / (log2(num_nodes));
+            // double cleanup_adjustment_factor = 1.0;
+            recovery_sketches[vertex] = new SparseRecovery(num_nodes, 128, cleanup_adjustment_factor, seed);
+            
+            // update your neighbors' dense edge counts
+            for (auto &level_edges: cf_algo.leaves[vertex]->vertex->E) {
+                for (node_id_t neighbor: *level_edges.second) {
+                    if (is_vertex_sketched(neighbor)) {
+                        num_pending_dense_edges[neighbor]++;
+                    }
+                }
+            }
+            // for (size_t level=0; level < MAX_LEVEL; level++) {
+            //     auto edge_set = localTree::getEdgeSet(cf_algo.leaves[vertex], level);
+            //     if (edge_set) {
+            //         for (node_id_t neighbor: *edge_set) {
+            //             if (is_vertex_sketched(neighbor)) {
+            //                 num_pending_dense_edges[neighbor]++;
+            //             }
+            //         }
+            //     }
+            // }
+        }
+
+        void uninitialize_vertex_sketch(node_id_t vertex) {
+            // WEIRD CASE - even though this doesnt put the edges into the CF from the sketch,
+            // it takes responsibility of updating dense edge counts.
+            // WHICH MEANS - it's gonna remove a pending dense edge that was NEVER counted.
+            // unless unintiialize is called before flushing
+            // std::cout << "Uninitializing sketch for vertex " << vertex << std::endl;
+            unlikely_if (!is_vertex_sketched(vertex)) {
+                return;
+            }
+            _is_vertex_sketched.erase(vertex);
+            // TODO - for now, the cleanup sketch isnt deleted by destructing
+            delete recovery_sketches[vertex]->cleanup_sketch;
+            delete recovery_sketches[vertex];
+            recovery_sketches.erase(vertex);
+            // std::cout << "Uninitialized sketch for vertex " << vertex << std::endl;
+            
+            //update your neighbors' dense edge counts
+            for (auto &level_edges: cf_algo.leaves[vertex]->vertex->E) {
+                for (node_id_t neighbor: *level_edges.second) {
+                    // note that we do this for EVERY edge in the CF
+                    // EXCEPT for the ones that are because of the sketching algo
+                    if (!is_forest_edge_from_sketch(Edge{vertex, neighbor})) {
+                        num_pending_dense_edges[neighbor]--;
+                    }
+                }
+            }
+            sketching_algo.uninitialize_node(vertex);
+        }
+        
+        void flush_transaction_log() {
+            // std::cout << "Flushing transaction log of size: " << sketching_algo.get_transaction_log().size() << std::endl;
+            // TODO - maybe get rid of this line, but rn we need it for correctness potentially:
+            // sketching_algo.process_all_updates();
+            for (auto &update: sketching_algo.get_transaction_log()) {
+                if (update.type == DELETE) {
+                    remove_from_cf(update.edge.src, update.edge.dst);
+                    edges_from_sketch.erase(concat_pairing_fn(update.edge.src, update.edge.dst));
+                }
+                else {
+                    insert_to_cf(update.edge.src, update.edge.dst);
+                    edges_from_sketch.insert(concat_pairing_fn(update.edge.src, update.edge.dst));
+                }
+            }
+            sketching_algo.flush_transaction_log();
+        }
+
+    public:
+        HybridConnectivityManager(node_id_t num_nodes, uint32_t num_tiers, int batch_size, size_t seed)
+            : num_nodes(num_nodes), sketching_algo(num_nodes, num_tiers, batch_size, seed), cf_algo(num_nodes), seed(seed) {
+                num_pending_dense_edges.resize(num_nodes, 0);
+                num_cf_edges.resize(num_nodes, 0);
+                num_edges.resize(num_nodes, 0);
+            }
+
+        ~HybridConnectivityManager() {}
+        void flush_edges_to_sketch(node_id_t vertex_to_flush) {
+            // 1) find all edges incident to vertex_to_flush AND to a dense edge
+            _neighbors_buffer.clear();
+            for (auto &level_edges: cf_algo.leaves[vertex_to_flush]->vertex->E) {
+                for (node_id_t neighbor: *level_edges.second) {
+                    // TODO - double check if this is the right way to do this
+                    if (is_vertex_sketched(neighbor) && !is_forest_edge_from_sketch(Edge{vertex_to_flush, neighbor})) {
+                        // if the edge is not from the sketching algo, and it's connected to a dense vertex
+                        // add it to the buffer and 
+                        // and increment the pending dense edge count
+                        _neighbors_buffer.push_back(neighbor);
+                    }
+                }
+            }
+
+            // remove duplicates
+            // std::sort(_neighbors_buffer.begin(), _neighbors_buffer.end());
+            // auto last = std::unique(_neighbors_buffer.begin(), _neighbors_buffer.end());
+            // _neighbors_buffer.resize(std::distance(_neighbors_buffer.begin(), last));
+            // reason for separate loops: see if improvements can be had from figuring out
+            // a bulk insertion strategy
+            
+            // 2) increment their pending_dense_edge counts (but don't flush them yourself)
+            // (since this vertex is about to densify)
+            // for (node_id_t neighbor: _neighbors_buffer) {
+            //     num_pending_dense_edges[neighbor]++;
+            // }
+            // remove edges from the cluster forest
+            // NO longer doing step 2 since we initialized elsewhere
+            for (node_id_t neighbor: _neighbors_buffer) {
+                remove_from_cf(vertex_to_flush, neighbor);
+            }
+            
+            // 3) insert them into the sketching algo
+            // AND the recovery sketches
+            for (node_id_t neighbor: _neighbors_buffer) {
+                if (neighbor != vertex_to_flush) {
+                    insert_to_sketch(vertex_to_flush, neighbor);
+                }
+            }
+            // clear pending_num_dense_edges for this vertex
+            num_pending_dense_edges[vertex_to_flush] = 0;
+            // apply the transaction log
+            // flush_transaction_log();
+            // TODO - just do this in reads for now.
+            // we should think about this
+                        
+        }
+        
+        bool check_and_perform_recovery(node_id_t vertex) {
+            // TODO - there is still a bug with updating pending dense edges
+            return false;
+            /*
+                Assumes the vertex is sketched
+                Checks if the recovery sketch is sufficiently sparse
+                If so, performs a recovery attempt
+            */
+            // or use the explicit degree because of well-formed stream assumption 
+            if (!is_vertex_sketched(vertex)) {
+                return false;
+            }
+            // std::cout << "Checking recovery for vertex " << vertex << std::endl;
+            // std::cout << "num edges for vertex " << vertex << " is " << num_edges[vertex] << std::endl;
+            likely_if (num_edges[vertex] > MOVE_TO_SKETCH / 4) {
+                return false;
+            }
+            // likely_if (!recovery_sketches[vertex]->worth_recovery_attempt()) {
+            //     return false;
+            // }
+            auto recovery_attempt = recovery_sketches[vertex]->recover();
+            unlikely_if (recovery_attempt.result == FAILURE) {
+                // TODO - handle failure case
+                return false;
+            }
+            // std::cout << "RECOVERY SUCCEEDED YA HURD" << std::endl;
+            // std::cout << "edge count for vertex " << vertex << " is " << num_edges[vertex] << std::endl;
+            // std::cout << "edge count in cf for vertex " << vertex << " is " << num_cf_edges[vertex] << std::endl;
+            // std::cout << "recovered: " << recovery_attempt.recovered_indices.size() << std::endl;
+            // then remove the edge from neighbors' recovery structures
+            for (vec_t &vec: recovery_attempt.recovered_indices) {
+                Edge edge = inv_concat_pairing_fn(vec);
+                node_id_t other_vertex = edge.src == vertex ? edge.dst : edge.src;
+                recovery_sketches[other_vertex]->update(vec);
+            }
+            // and flush the edges out of the sketching algo
+            for (vec_t &vec: recovery_attempt.recovered_indices) {
+                Edge edge = inv_concat_pairing_fn(vec);
+                sketching_algo.update(GraphUpdate{edge, DELETE});
+            }
+            // before we flush the transaction log - uninitialize
+            // this has to happen here by current designs, since we only want to decrement
+            // pending_dense_edges for edges that WERE NOT already part of the recovery process
+            // std::cout << "Spooky: Uninitializing sketch for vertex " << vertex << std::endl;
+            uninitialize_vertex_sketch(vertex);
+
+            // and apply the transaction log
+            flush_transaction_log();
+            // and add the edges back to the cluster forest
+            // NOTE - WE KNOW THAT none of the edges are already in the cluster forest
+            // this is because we applied the transaction log, so any edges in the forest that
+            // came for a sketch forest were removed.
+            // TODO - it might be worth thinking about this and optimizing
+            //     i.e. if we just apply the transaction log, we might delete an edge from the cf,
+            //     and then put it right back here later.
+            //     NOTE - THERE MIGHT BE DOUBLE-DIPPED EDGES
+            //     
+            for (vec_t &vec: recovery_attempt.recovered_indices) {
+                Edge edge = inv_concat_pairing_fn(vec);
+                insert_to_cf(edge.src, edge.dst);
+            }
+            return true;
+        }
+
+        inline void insert_to_cf(node_id_t src, node_id_t dst) {
+            cf_algo.insert(src, dst);
+            num_cf_edges[src]++;
+            num_cf_edges[dst]++;
+        }
+        inline void remove_from_cf(node_id_t src, node_id_t dst) {
+            cf_algo.remove(src, dst);
+            num_cf_edges[src]--;
+            num_cf_edges[dst]--;
+        }
+
+        void update(GraphUpdate update) {
+            // external garauntee: well-formed stream. a remove is only called if the edge exists
+            // would be nice to get rid of assumption
+            if (update.edge.src == update.edge.dst) {
+                // no self-loops
+                std::cout << "WARNING: self-loop detected on vertex " << update.edge.src << std::endl;
+                return;
+            }
+            if (update.edge.src > update.edge.dst) {
+                std::swap(update.edge.src, update.edge.dst);
+            }
+            if (update.type == INSERT) {
+                num_edges[update.edge.src]++;
+                num_edges[update.edge.dst]++;
+                total_num_edges++;
+                
+                // if both endpoints are sketched AND the endpoints are connected in the cf
+                // we can shortcut and just insert into the sketching algo
+                if (is_vertex_sketched(update.edge.src) && is_vertex_sketched(update.edge.dst)) {
+                    if (cf_algo.is_connected(update.edge.src, update.edge.dst)) {
+                        // std::cout << "Inserting edge from sketching algo: " <<  update.edge.src << ", "<< update.edge.dst << std::endl;
+                        insert_to_sketch(update.edge.src, update.edge.dst);
+                        return;
+                    }
+                }
+
+                insert_to_cf(update.edge.src, update.edge.dst);
+                
+                // update num_pending_dense_edges to reflect the edge
+                // being inserted
+                if (is_vertex_sketched(update.edge.src)) {
+                    num_pending_dense_edges[update.edge.dst]++;
+                }
+                if (is_vertex_sketched(update.edge.dst)) {
+                    num_pending_dense_edges[update.edge.src]++;
+                }
+                // i.e. the state of this should be correct BEFORE we
+                // potentially initialize the sketches below
+
+                // check to see if we densified the vertices enough to initialize their sketches
+                unlikely_if (!is_vertex_sketched(update.edge.src) && num_edges[update.edge.src] >= DENSE_THRESHOLD) {
+                    // these functions should be no-ops on dense edges
+                    // std::cout << "neighbor count for " << update.edge.src << " is " << count_explicit_neighbors(update.edge.src) << std::endl;
+                    initialize_vertex_sketch(update.edge.src);
+                    flush_edges_to_sketch(update.edge.src);
+
+                }
+                unlikely_if (!is_vertex_sketched(update.edge.dst) && num_edges[update.edge.dst] >= DENSE_THRESHOLD) {
+                    // std::cout << "neighbor count for " << update.edge.dst << " is " << count_explicit_neighbors(update.edge.dst) << std::endl;
+                    initialize_vertex_sketch(update.edge.dst);
+                    flush_edges_to_sketch(update.edge.dst);
+                }
+                
+                // logic for updating pending dense edge counts + potentially flushing out
+                // dense edges to the sketching structure
+                if (is_vertex_sketched(update.edge.dst)) {
+                    if (num_pending_dense_edges[update.edge.dst] >= MOVE_TO_SKETCH) {
+                        flush_edges_to_sketch(update.edge.dst);
+                    }
+                }
+                if (is_vertex_sketched(update.edge.src)) {
+                    if (num_pending_dense_edges[update.edge.src] >= MOVE_TO_SKETCH) {
+                        flush_edges_to_sketch(update.edge.src);
+                    }   
+                }
+            }
+            else if (update.type == DELETE) {
+                num_edges[update.edge.src]--;
+                num_edges[update.edge.dst]--;
+                total_num_edges--;
+
+                // TODO - eventually do more precise casework
+                // if edge exists in the CF (1):
+                //      * a) edge originally comes from the sketch forest: update the sketch algo; apply transaction log
+                //      * b) edge originally comes from the CF: remove it from the CF and you're done.
+                
+                // if not in cluster forest (2):
+                // TODO - this logic should check the cf for which edges exist in it
+                // if (cf_edges[update.edge.src].find(update.edge.dst) != cf_edges[update.edge.src].end()) {
+                // if (cf_algo.has_edge(update.edge.src, update.edge.dst)) {
+                if (this->is_edge_in_cf(update.edge)) {
+                    edge_id_t edge_id = concat_pairing_fn(update.edge.src, update.edge.dst);
+                    // if edge comes from sketching algo:
+                    if (edges_from_sketch.find(edge_id) != edges_from_sketch.end()) {
+                        // std::cout << "Connectivity edge from sketching algo: " <<  update.edge.src << ", "<< update.edge.dst << std::endl;
+                        // case a)
+                        // deleting from sketching algo
+                        delete_from_sketch(update.edge.src, update.edge.dst);
+                        // TODO - can we be lazier about this?
+                        sketching_algo.process_all_updates();                    
+                        flush_transaction_log();
+                        check_and_perform_recovery(update.edge.src);
+                        check_and_perform_recovery(update.edge.dst);
+                        // can we do defered work: yes
+                        // do we have to: ??? figure out
+                    }
+                    else {
+                        //case b) edge does not come from sketching algo
+                        remove_from_cf(update.edge.src, update.edge.dst);
+
+                        // TODO - same logic is needed as above to DECREMENT pending dense edges
+                        // in the sparse part, if this were the case.
+
+                        if (is_vertex_sketched(update.edge.dst))
+                        {
+                            num_pending_dense_edges[update.edge.src]--;
+                        }
+
+                        if (is_vertex_sketched(update.edge.src))
+                        {
+                            num_pending_dense_edges[update.edge.dst]--;
+                        }
+                    }
+                }
+                // 2) edge does not exist in the CF:
+                //  * it must be in the sketch algo, so update the sketch algo and apply transaction log.
+                else {
+                    // we can buffer this deletion as long as:
+                    // 1) we know the edge does not disconnect two components
+
+                    // non_tree_deletion_buffer.push_back(concat_pairing_fn(update.edge.src, update.edge.dst));
+                    if (non_tree_deletion_buffer.size() >= 100) {
+                        // std::cout << "Flushing non-tree deletion buffer of size: " << non_tree_deletion_buffer.size() << std::endl;
+                        for (edge_id_t edge_id: non_tree_deletion_buffer) {
+                            Edge edge = inv_concat_pairing_fn(edge_id);
+                            delete_from_sketch(edge.src, edge.dst);
+                            check_and_perform_recovery(edge.src);
+                            check_and_perform_recovery(edge.dst);
+                        }
+                        non_tree_deletion_buffer.clear();
+                        flush_transaction_log();
+                    }
+                    // sketching_algo.update(update);
+                    // delete_from_sketch(update.edge.src, update.edge.dst);
+                    // TODO - verify that we don't need to flush transaction log
+                    // flush_transaction_log();
+                    // check_and_perform_recovery(update.edge.src);
+                    // check_and_perform_recovery(update.edge.dst);
+                }
+                // TODO - eventually implement a check to see if we need to remove
+                // one of the vertices from the sketch algo and dump the edges out.
+            }
+        }
+
+        bool connectivity_query(node_id_t a, node_id_t b) {
+            sketching_algo.process_all_updates();
+            flush_transaction_log();
+            return cf_algo.is_connected(a, b);
+        }
+        
+        std::vector<std::set<node_id_t>> cc_query() {
+            sketching_algo.process_all_updates();
+            flush_transaction_log();
+            // TODO - this aint great.
+            std::vector<std::set<node_id_t>> ret;
+            std::unordered_map<uint64_t, std::set<node_id_t>> component_map;
+            for (node_id_t i=0; i < num_nodes; i++) {
+                localTree *root = localTree::getRoot(cf_algo.leaves[i]);
+                uint64_t root_id = (uint64_t) root;
+                // std::cout << "root_id " << root_id << " for node " << i << std::endl;
+                auto it = component_map.find(root_id);
+                if (it == component_map.end()) {
+                    component_map[root_id] = std::set<node_id_t>();
+                }
+                component_map[root_id].insert(i);
+            }
+            for (auto &pair: component_map) {
+                ret.push_back(pair.second);
+            }
+            return ret;
+        }
+
+        size_t num_sketched_vertices() const {
+            return _is_vertex_sketched.size();
+        }
+        size_t total_edges() const {
+            return total_num_edges;
+        }
+        size_t num_sketched_edges() const {
+            return total_sketched_edges;
+        }
+        
+        size_t get_space_usage_cf() {
+            return cf_algo.getMemUsage();
+        }
+        size_t get_space_usage_driver() {
+            // get the space usage of the driver itself
+            size_t total = sizeof(*this);
+            
+            total += num_pending_dense_edges.capacity() * sizeof(uint16_t);
+            total += num_edges.capacity() * sizeof(uint32_t);
+            total += num_cf_edges.capacity() * sizeof(uint32_t);
+            
+            total += _neighbors_buffer.capacity() * sizeof(node_id_t);
+            total += non_tree_deletion_buffer.capacity() * sizeof(edge_id_t);
+            
+            total += _is_vertex_sketched.bucket_count() * sizeof(node_id_t);
+            total += edges_from_sketch.bucket_count() * sizeof(edge_id_t);
+            
+
+            return total;
+        }
+        size_t space_usage_conn_sketch() {
+            return sketching_algo.space_usage_bytes();
+        }
+        size_t space_usage_recovery_sketch() {
+            size_t total = 0;
+            for (auto &pair: recovery_sketches) {
+                total += pair.second->space_usage_bytes();
+            }
+            total += recovery_sketches.bucket_count() * sizeof(std::pair<node_id_t, SparseRecovery*>);
+            return total;
+        }
+
+};
diff --git a/include/mpi_nodes.h b/include/mpi_nodes.h
index 3800365..be5873b 100644
--- a/include/mpi_nodes.h
+++ b/include/mpi_nodes.h
@@ -6,12 +6,16 @@
 #include "types.h"
 #include "euler_tour_tree.h"
 #include "sketchless_euler_tour_tree.h"
-#include "link_cut_tree.h"
+// #include "link_cut_tree.h"
+#include "lct_v2.h"
 #include "mpi_functions.h"
+#include "sketch/sketch_concept.h"
+#include "sketch/sketch_columns.h"
+#include "sketch_interfacing.h"
 
 
 enum TreeOperationType {
-  NOT_ISOLATED=0, ISOLATED=1, EMPTY, LINK, CUT, LCT_QUERY
+  NOT_ISOLATED=0, ISOLATED=1, EMPTY, LINK, CUT, LCT_QUERY, MAXIMIZED
 };
 
 typedef struct {
@@ -41,7 +45,7 @@ typedef struct {
 typedef struct {
   node_id_t v = 0;
   uint32_t prev_tier_size = 0;
-  SketchSample sketch_query_result;
+  SketchSample<vec_t> sketch_query_result;
 } RefreshEndpoint;
 
 typedef struct {
@@ -56,9 +60,14 @@ typedef struct {
 class InputNode {
   node_id_t num_nodes;
   uint32_t num_tiers;
-  LinkCutTree link_cut_tree;
-  SketchlessEulerTourTree query_ett;
+  // LinkCutTree<> link_cut_tree;
+  LinkCutTreeMaxAgg<int8_t> link_cut_tree;
+  SketchlessEulerTourTree<> query_ett;
   UpdateMessage* update_buffer;
+  
+  std::vector<GraphUpdate> transaction_log;
+
+
   int buffer_size;
   int buffer_capacity;
   int* split_revert_buffer;
@@ -70,15 +79,42 @@ class InputNode {
 public:
   InputNode(node_id_t num_nodes, uint32_t num_tiers, int batch_size, int seed);
   ~InputNode();
+  // TODO - in reality, the input node needs to communicate
+  // wihh its tier nodes to initialize data structures.
+  // in any hybrid tests, we're just gonna do this ahead of time.
+  void initialize_node(node_id_t u) {
+    query_ett.initialize_node(u);
+    link_cut_tree.initialize_node(u);
+  }; // no-op
+  void uninitialize_node(node_id_t u) {
+    query_ett.uninitialize_node(u);
+    link_cut_tree.uninitialize_node(u);
+  }; // no-op
+  void initialize_all_nodes() {
+    query_ett.initialize_all_nodes(num_nodes);
+    link_cut_tree.initialize_all_nodes(num_nodes);
+  }; // no-op
   void update(GraphUpdate update);
   void process_all_updates();
   bool connectivity_query(node_id_t a, node_id_t b);
   std::vector<std::set<node_id_t>> cc_query();
   void end();
+
+  void flush_transaction_log() {
+    transaction_log.clear();
+  };
+  size_t space_usage_bytes() const {
+    return 0; // TODO - implement
+  }
+  
+  const std::vector<GraphUpdate>& get_transaction_log() const {
+    return transaction_log;
+  }
+  
 };
 
 class TierNode {
-  EulerTourTree ett;
+  EulerTourTree<DefaultSketchColumn> ett;
   uint32_t tier_num;
   uint32_t num_tiers;
   int batch_size;
@@ -88,6 +124,18 @@ class TierNode {
   SampleResult* query_result_buffer;
   bool* split_revert_buffer;
   bool using_sliding_window = false;
+  void initialize_node(node_id_t u) {
+      ett.initialize_node(u);
+  };
+  void uninitialize_node(node_id_t u) {
+      ett.uninitialize_node(u);
+  };
+  void initialize_all_nodes(node_id_t max_num_nodes) {
+      ett.initialize_all_nodes(max_num_nodes);
+  };
+  bool is_initialized(node_id_t u) {
+      return ett.is_initialized(u);
+  };
   void update_tier(GraphUpdate update);
   void ett_update_tier(EttUpdateMessage message);
   void refresh_tier(RefreshMessage messsage);
diff --git a/include/parlay_hash/BUILD b/include/parlay_hash/BUILD
new file mode 100644
index 0000000..67b6627
--- /dev/null
+++ b/include/parlay_hash/BUILD
@@ -0,0 +1,22 @@
+cc_library(
+    name = "epoch",
+    hdrs = ["epoch.h"],
+    deps = [
+        "@parlaylib//parlay:primitives",
+    ],
+)
+
+cc_library(
+    name = "lock",
+    hdrs = ["lock.h"],
+)
+
+cc_library(
+    name = "unordered_map",
+    hdrs = ["unordered_map.h"],
+    deps = [
+        ":epoch",
+        ":lock",
+    ],
+    visibility = ["//visibility:public"],
+)
diff --git a/include/parlay_hash/bigatomic.h b/include/parlay_hash/bigatomic.h
new file mode 100644
index 0000000..32e90f6
--- /dev/null
+++ b/include/parlay_hash/bigatomic.h
@@ -0,0 +1,95 @@
+// An implementation of big_atomic using a SeqLock.
+//
+//  Supports:
+//  - Blocking loads (loads never obstruct each-other, but can be indefinitely blocked by writers)
+//  - Blocking stores
+//  - Blocking CAS
+//
+// No additional space usage
+//
+
+#ifndef PARLAYATOMIC_H_
+#define PARLAYATOMIC_H_
+
+#include <atomic>
+#include <functional>
+#include <parlay/primitives.h>
+#include <parlay/sequence.h>
+#include <utils/lock.h>
+
+namespace parlay {
+
+template<typename V, class KeyEqual = std::equal_to<V>>
+struct alignas(32) big_atomic {
+
+  using vtype = long;
+  using tag = vtype;
+
+  std::atomic<vtype> version;
+  V val;
+
+  big_atomic(const V& v) : version(0), val(v) {}
+  big_atomic() : version(0) {}
+
+  void store_sequential(const V& v) { val = v; }
+  
+  V load() {
+    while (true) {
+      vtype ver = version.load(std::memory_order_acquire);
+      V v = val;
+      std::atomic_thread_fence(std::memory_order_acquire);
+      if ((ver & 1) == 0 && version.load(std::memory_order_relaxed) == ver) return v;
+    }
+  }
+
+  std::pair<V,tag> ll_speculative() {
+    vtype ver = version.load(std::memory_order_acquire);
+    V v = val;
+    std::atomic_thread_fence(std::memory_order_acquire);
+    return std::pair(v, ver);
+  }
+
+  std::pair<V,tag> ll() {
+    while (true) {
+      int delay = 100;
+      vtype ver = version.load(std::memory_order_acquire);
+      V v = val;
+      std::atomic_thread_fence(std::memory_order_acquire);
+      if ((ver & 1) == 0 && version.load(std::memory_order_relaxed) == ver)
+	return std::pair(v,ver);
+      for (volatile int i = 0; i < delay; i++);
+      delay = std::min(2 * delay, 1000);
+    }
+  }
+
+  bool lv(tag tg) {
+    return version.load() == tg;
+  }
+
+  bool sc(tag expected_tag, const V& v) {
+    bool result = true;
+    int delay = 100;
+    while (true) {
+      vtype ver = version.load();
+      if (ver != expected_tag) return false;
+      if (get_locks().try_lock((long)this, [&] {
+            if (version.load(std::memory_order_acquire) != expected_tag)
+              result = false;
+            else {
+              version.store(ver + 1, std::memory_order_relaxed);
+              std::atomic_thread_fence(std::memory_order_release);
+              val = v;
+              version.store(ver + 2, std::memory_order_release);
+            }
+            return true;
+          }))
+        return result;
+      for (volatile int i = 0; i < delay; i++);
+      delay = std::min(2 * delay, 2000);
+    }
+  }
+
+};
+
+}  // namespace parlay
+#endif  // PARLAYATOMIC_H_
diff --git a/include/parlay_hash/parallel.h b/include/parlay_hash/parallel.h
new file mode 100644
index 0000000..0394791
--- /dev/null
+++ b/include/parlay_hash/parallel.h
@@ -0,0 +1,36 @@
+#ifdef USE_PARLAY
+#include <parlay/primitives.h>
+#include <parlay/sequence.h>
+#include <parlay/delayed.h>
+namespace parlay {
+#define PARLAY_USE_STD_ALLOC 1
+
+  using scheduler_type = internal::scheduler_type;
+
+  template <typename F>
+  long tabulate_reduce(long n, const F& f) {
+    return parlay::reduce(parlay::delayed::tabulate(n, [&] (size_t i) {
+	     return f(i);}));
+  }
+}
+#else
+namespace parlay {
+
+  struct scheduler_type {
+    scheduler_type(int num_procs) {}
+  };
+
+  template <typename F>
+  long tabulate_reduce(long n, const F& f) {
+    long r = 0;
+    for (long i=0; i < n; i++)
+      r += f(i);
+    return r;
+  }
+  
+  template <typename F>
+  void parallel_for(long n, const F& f) {
+    for (long i=0; i < n; i++) f(i);
+  }
+}
+#endif
diff --git a/include/parlay_hash/parlay_hash.h b/include/parlay_hash/parlay_hash.h
new file mode 100644
index 0000000..31ede06
--- /dev/null
+++ b/include/parlay_hash/parlay_hash.h
@@ -0,0 +1,1181 @@
+#ifndef PARLAY_HASH_H_
+#define PARLAY_HASH_H_
+
+#include <algorithm>
+#include <atomic>
+#include <cmath>
+#include <functional>
+#include <iterator>
+#include <optional>
+#include <thread>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include <utils/epoch.h>
+#include "bigatomic.h"
+#include "parallel.h"
+
+constexpr bool PrintGrow = false;
+
+namespace parlay {
+
+template <typename Entries>
+struct parlay_hash {
+  using Entry = typename Entries::Entry;
+  using K = typename Entry::Key;
+
+  // *********************************************
+  // Various parameters
+  // *********************************************
+  
+  // set to grow by factor of 8 (2^3)
+  static constexpr int log_grow_factor = 2;
+  static constexpr int grow_factor = 1 << log_grow_factor;
+
+  // groups of block_size buckets are copied over by a single thread
+  // the block size typically grows with size, but starts here
+  static constexpr long min_block_size = 4;
+
+  // buffer_size is picked so state fits in a cache line (if it can)
+  static constexpr long buffer_size = (sizeof(Entry) > 24) ? 1 : 48 / sizeof(Entry);
+
+  // log_2 of the expected number of entries in a bucket (<= buffer_size)
+  static constexpr long log_bucket_size = 
+    (buffer_size == 1) ? 0 : ((buffer_size == 2) ? 1 : ((buffer_size <= 4) ? 2 : ((buffer_size <= 8) ? 3 : 3)));
+
+  static long get_block_size(int num_bits) {
+    return num_bits < 16 ? 16 : 256; }
+
+  // The size of a bucket that causes the table to grow, i.e. if any
+  // insert causes the bucket to reach the given size, then start
+  // growing.
+  // Technically this should be something like c log (n) / log(log n))
+  // for a small constant c if each bucket is expected to hold 1
+  // element, but.... each bucket can be expected to hold more than one.
+  static long get_overflow_size(int num_bits) {
+    if constexpr (log_bucket_size == 0) return num_bits < 18 ? 10 : 16;
+    else if constexpr (log_bucket_size == 1) return num_bits < 18 ? 11 : 18;
+    else if constexpr (log_bucket_size == 2) return num_bits < 18 ? 12 : 20;
+    else if constexpr (log_bucket_size == 3) return num_bits < 18 ? 14 : 22;
+    else return num_bits < 18 ? 20 : 24;
+  }
+
+  // clear_at_end will cause the scheduler and epoch-based collector
+  // to clear their state on destruction
+  static constexpr bool default_clear_at_end = true;
+  bool clear_memory_and_scheduler_at_end;
+
+  // a reference to the scheduler (null if not to be cleared)
+  parlay::scheduler_type* sched_ref;
+
+  // *********************************************
+  // The state structure for each bucket
+  // *********************************************
+  
+  // for overflow lists for each bucket
+  struct link {
+    Entry entry;
+    link* next;
+    link(const Entry& entry, link* next) : entry(entry), next(next) { }
+  };
+
+  // for delayed reclamation of links using an epoch-based collector
+  epoch::memory_pool<link>* link_pool;
+
+  link* new_link(const Entry& entry, link* l) {
+    return link_pool->New(entry, l); }
+  void retire_link(link* l) { link_pool->Retire(l);}
+
+  // Each bucket contains a "state", which consists of a fixed size
+  // buffer of entries (buffer_size) and an overflow list.  The first
+  // buffer_size entries in the bucket are kept in the buffer, and any
+  // overflow goes to the list.  The head stores both the pointer to
+  // the overflow list (lower 56 bits) and the number of elements in
+  // the buffer, or buffer_size+1 if overfull (top 8 bits).
+  struct state {
+  public:
+    size_t list_head;
+    Entry buffer[buffer_size];
+    state() : list_head(0) {}
+    state(const Entry& e) : list_head(1ul << 48) {
+      buffer[0] = e;
+    }
+    static constexpr size_t forwarded_val = 1ul;
+    
+    size_t make_head(link* l, size_t bsize) {
+      return (((size_t) l) | (bsize << 48)); }
+
+    // update overflow list with new ptr (assumes buffer is full)
+    state(const state& s, link* ptr)
+      : list_head(make_head(ptr, buffer_size + (ptr != nullptr))) {
+      for (int i=0; i < buffer_size; i++)
+	buffer[i] = s.buffer[i];
+    }
+
+    // add entry to the bucket state (in buffer if fits, otherwise at head of overflow list)
+    template <typename NL>
+    state(const state& s, Entry e, const NL& new_link) {
+      for (int i=0; i < std::min(s.buffer_cnt(), buffer_size); i++) 
+	buffer[i] = s.buffer[i];
+      if (s.buffer_cnt() < buffer_size) {
+	buffer[s.buffer_cnt()] = e;
+	list_head = make_head(nullptr, s.buffer_cnt() + 1);
+      } else {
+	link* l = new_link(e, s.overflow_list());
+	list_head = make_head(l, buffer_size + 1);
+      }
+    }
+
+    // add entry to buffer (assumes it fits) -- specialization of above
+    state(const state& s, Entry e) : list_head(make_head(nullptr, s.buffer_cnt() + 1)) {
+      for (int i=0; i < s.buffer_cnt(); i++) 
+	buffer[i] = s.buffer[i];
+      buffer[s.buffer_cnt()] = e;
+    }
+
+    // remove buffer entry j, replace with first from overflow list (assumes there is overflow)
+    state(const state& s, link* ptr, int j)
+      : list_head(make_head(ptr->next, buffer_size + (ptr->next != nullptr))) {
+      for (int i=0; i < buffer_size; i++)
+	buffer[i] = s.buffer[i];
+      buffer[j] = Entry{ptr->entry};
+    }
+
+    // remove buffer entry j, replace with last entry in buffer (assumes no overflow)
+    state(const state& s, int j) : list_head(make_head(nullptr, s.buffer_cnt() - 1)) {
+      if (s.overflow_list() != nullptr) abort();
+      for (int i=0; i < s.buffer_cnt(); i++)
+	buffer[i] = s.buffer[i];
+      buffer[j] = buffer[s.buffer_cnt() - 1];
+    }
+
+    state(bool x) : list_head(forwarded_val) {}
+    
+    bool is_forwarded() const {return list_head == forwarded_val ;}
+
+    // number of entries in buffer, or buffer_size+1 if overflow
+    long buffer_cnt() const {return (list_head >> 48) & 255ul ;}
+
+    // number of entries in bucket (includes those in the overflow list)
+    long size() const {
+      if (buffer_cnt() <= buffer_size) return buffer_cnt();
+      return buffer_size + list_length(overflow_list());
+    }
+
+    // get the overflow list
+    link* overflow_list() const {
+      return (link*) (list_head & ((1ul << 48) - 1));}
+  };
+
+  // returns std::optional(f(entry)) for entry with given key
+  template <typename F>
+  static auto find_in_list(const link* nxt, const K& k, const F& f) {
+    using rtype = typename std::invoke_result<F,Entry>::type;
+    long cnt = 0;
+    while (nxt != nullptr && !nxt->entry.equal(k)) {
+      nxt = nxt->next;
+      cnt++;
+    }
+    if (nxt == nullptr)
+      return std::pair(std::optional<rtype>(), cnt);
+    else
+      return std::pair(std::optional<rtype>(f(nxt->entry)), 0l);
+  }
+
+  // If k is found copies list elements up to k, and keeps the old
+  // tail past k.  Returns the number of new nodes that will need to
+  // be reclaimed, the head of the new list, and the link that is removed.
+  // Returns [0, nullptr, nullptr] if k is not found
+  std::tuple<int, link*, link*> remove_from_list(link* nxt, const K& k) {
+    if (nxt == nullptr)
+      return std::tuple(0, nullptr, nullptr);
+    else if (nxt->entry.equal(k))
+      return std::tuple(1, nxt->next, nxt);
+    else {
+      auto [len, ptr, removed] = remove_from_list(nxt->next, k);
+      if (len == 0) return std::tuple(0, nullptr, nullptr);
+      return std::tuple(len + 1, new_link(nxt->entry, ptr), removed);
+    }
+  }
+
+  // update element with a given key in a list.  Uses path copying.
+  // Returns a triple consisting of the position of the key in the list (1 based),
+  // the head of the new list with the key updated, and the old link that is replaced.
+  // If the key is not found, nothing is done, the last two results are nullptr, and
+  // the first result is the length of the list.
+  template <typename Constr>
+  std::tuple<int, link*, link*> update_list(link* nxt, const K& k, const Constr& constr) {
+    if (nxt == nullptr) 
+      return std::tuple(0, nullptr, nullptr);
+    else if (nxt->entry.equal(k))
+      return std::tuple(1, link_pool->New(constr(std::optional(nxt->entry)), nxt->next), nxt);
+    else {
+      auto [len, ptr, updated] = update_list(nxt->next, k, constr);
+      if (ptr == nullptr) return std::tuple(len + 1, nullptr, nullptr);
+      return std::tuple(len + 1, link_pool->New(nxt->entry, ptr), updated);
+    }
+  }
+
+  // retires first n elements of a list, but not the entries
+  void retire_list_n(link* nxt, int n) {
+    while (n > 0) {
+      n--;
+      link* tmp = nxt->next;
+      retire_link(nxt);
+      nxt = tmp;
+    }
+  }
+
+  // Retires full list and their entries.  Used when destructing the
+  // table.
+  void retire_list_all(link* nxt) {
+    while (nxt != nullptr) {
+      link* tmp = nxt->next;
+      entries_->retire_entry(nxt->entry);
+      retire_link(nxt);
+      nxt = tmp;
+    }
+  }
+
+  // Retires full list, but not their entries. Used when copying to a
+  // new list during expansion, i.e. the entries will be in the new
+  // list and don't need to be retired.
+  void retire_list(link* nxt) {
+    while (nxt != nullptr) {
+      link* tmp = nxt->next;
+      retire_link(nxt);
+      nxt = tmp;
+    }
+  }
+
+  static long list_length(link* nxt) {
+    long len = 0;
+    while (nxt != nullptr) {
+      len++;
+      nxt = nxt->next;
+    }
+    return len;
+  }
+
+  // Find key if it is in the buffer. Return index.
+  int find_in_buffer(const state& s, const K& k) {
+    long len = s.buffer_cnt();
+    for (long i = 0; i < std::min(len, buffer_size); i++)
+      if (s.buffer[i].equal(k))
+	return i;
+    return -1;
+  }
+
+  // Apply f to all entries in the state.
+  template <typename F>
+  void static for_each_in_state(const state& s, const F& f) {
+    for (long i = 0; i < std::min(s.buffer_cnt(), buffer_size); i++)
+      f(s.buffer[i]);
+    link* l = s.overflow_list();
+    while (l != nullptr) {
+      f(l->entry);
+      l = l->next;
+    }
+  }
+    
+  // Find entry with given key if in the bucket (state).  Return
+  // optional of f applied to the entry if found, otherwise
+  // std::nullopt.
+  template <typename F>
+  auto find_in_state(const state& s, const K& k, const F& f)
+    -> std::optional<typename std::invoke_result<F,Entry>::type>
+  {
+    long len = s.buffer_cnt();
+    for (long i = 0; i < std::min(len, buffer_size); i++)
+      if (s.buffer[i].equal(k))
+	return std::optional(f(s.buffer[i]));
+    if (len <= buffer_size) return std::nullopt;
+    return find_in_list(s.overflow_list(), k, f).first;
+  }
+
+  // A bucket is just an "atomic" state.
+  // a big_atomic<x> is sort of like an std::atomic<x> but supports
+  // load-linked, store-conditional, and is efficient when the x does
+  // not fit in a machine word.
+  using bckt = big_atomic<state>;
+
+  // used for load-linked, store-conditionals
+  using tag_type = typename big_atomic<state>::tag;
+
+  // wrapper to ensure alignment
+  struct alignas(64) bucket { bckt v; };
+
+  // initialize an uninitialized bucket
+  static void initialize(bucket& bck) {
+    new (&bck.v) big_atomic<state>(state());
+  }
+
+  // *********************************************
+  // The table structures
+  // Each version increases in size, by grow_factor
+  // *********************************************
+
+  // status of a block of buckets, used when initializing and when copying to a new version
+  enum status : char {Uninit, Initializing, Empty, Working, Done};
+
+  // A single version of the table.
+  // A version includes a sequence of "size" "buckets".
+  // New versions are added as the hash table grows, and each holds a
+  // pointer to the next larger version, if one exists.
+  struct table_version {
+    std::atomic<table_version*> next; // points to next version if created
+    std::atomic<long> finished_block_count; //number of blocks finished copying
+    long num_bits;  // log_2 of size
+    size_t size; // number of buckets
+    long block_size; // size of each block used for copying
+    int overflow_size; // size of bucket to trigger next expansion
+    bucket* buckets; // sequence of buckets
+    //sequence<bucket> buckets; // sequence of buckets
+    std::atomic<status>* block_status; // status of each block while copying
+
+    // The index of a key is the highest num_bits of the lowest
+    // 48-bits of the hash value.  Using the highest num_bits ensures
+    // that when growing, a bucket will go to grow_factor contiguous
+    // buckets in the next table.
+    long get_index(const K& k) {
+      size_t h = Entry::hash(k);
+      return (h >> (48 - num_bits))  & (size-1u);}
+
+    bckt* get_bucket(const K& k) {
+      return &buckets[get_index(k)].v; }
+
+    // initial table version, n indicating size
+    table_version(long n) 
+      : next(nullptr),
+	finished_block_count(0),
+	num_bits(std::max<long>((long) std::ceil(std::log2(min_block_size-1)),
+				(long) std::ceil(std::log2(1.5*n)) - log_bucket_size)),
+	size(1ul << num_bits),
+	block_size(num_bits < 10 ? min_block_size : get_block_size(num_bits)),
+	overflow_size(get_overflow_size(num_bits))
+    {
+      //if (PrintGrow) std::cout << "initial size: " << size << std::endl;
+      buckets = (bucket*) malloc(sizeof(bucket)*size);
+      block_status = (std::atomic<status>*) malloc(sizeof(std::atomic<status>) * size/block_size);
+      parallel_for(size, [&] (long i) { initialize(buckets[i]);});
+      parallel_for(size/block_size, [&] (long i) { block_status[i] = Empty;});
+    }
+
+    // expanded table version copied from smaller version t
+    table_version(table_version* t)
+      : next(nullptr),
+	finished_block_count(0),
+	num_bits(t->num_bits + log_grow_factor),
+	size(t->size * grow_factor),
+	block_size(get_block_size(num_bits)),
+	overflow_size(get_overflow_size(num_bits))
+    {
+      buckets = (bucket*) malloc(sizeof(bucket)*size);
+      block_status = (std::atomic<status>*) malloc(sizeof(std::atomic<status>) * size/min_block_size);
+    }
+
+    ~table_version() {
+      free(buckets);
+      free(block_status);
+    }
+  };
+
+  // the current table version
+  std::atomic<table_version*> current_table_version;
+
+  // the initial table version, used for cleanup on destruction
+  table_version* initial_table_version;
+
+  // *********************************************
+  // Functions for expanding the table
+  // *********************************************
+
+  // Called when table should be expanded (i.e. when some bucket is too large).
+  // Allocates a new table version and links the old one to it.
+  void expand_table(table_version* ht) {
+    table_version* htt = current_table_version.load();
+    if (htt->next == nullptr) {
+      long n = ht->size;
+      // if fail on lock, someone else is working on it, so skip
+      get_locks().try_lock((long) ht, [&] {
+	 if (ht->next == nullptr) {
+	   ht->next = new table_version(ht);
+	   //if (PrintGrow)
+	   //  std::cout << "expand to: " << n * grow_factor << std::endl;
+	 }
+	 return true;});
+    }
+  }
+
+  // Copies a bucket into grow_factor new buckets.
+  void copy_bucket(table_version* t, table_version* next, long i) {
+    long exp_start = i * grow_factor;
+    // Clear grow_factor buckets in the next table version to put them in.
+    for (int j = exp_start; j < exp_start + grow_factor; j++)
+      initialize(next->buckets[j]); 
+    // copy bucket to grow_factor new buckets in next table version
+    while (true) {
+      // the bucket to copy
+      auto [s, tag] = t->buckets[i].v.ll();
+
+      // insert into grow_factor buckets (states) for next larger table
+      state hold[grow_factor];
+      size_t mask = grow_factor-1;
+      for_each_in_state(s, [&] (const Entry& entry) {
+	size_t idx = next->get_index(entry.get_key()) & mask;
+       	hold[idx] = state(hold[idx], entry,
+			  [&] (const Entry& e, link* l) {return new_link(e,l);});
+      });
+
+      // now store the buckets into table
+      for (int j = 0; j < grow_factor; j++)
+	next->buckets[grow_factor * i + j].v.store_sequential(hold[j]);
+
+      // try to replace original bucket with forwarded marker
+      if (t->buckets[i].v.sc(tag, state(true))) {
+	retire_list(s.overflow_list()); 
+	break;
+      }
+      
+      // If the attempt failed then someone updated bucket in the meantime so need to retry.
+      // Before retrying need to clear out already added buckets.
+      for (int j = exp_start; j < exp_start + grow_factor; j++) {
+	state ss = next->buckets[j].v.load();
+	retire_list(ss.overflow_list());
+	next->buckets[j].v.store_sequential(state());
+      }
+    }
+  }
+
+  // If copying is ongoing (i.e., next is not null), and if the the
+  // hash bucket given by hashid is not already copied, tries to copy
+  // the block_size buckets that containing hashid to the next larger
+  // table version.
+  void copy_if_needed(table_version* t, long hashid) {
+    table_version* next = t->next.load();
+    if (next != nullptr) {
+      long num_blocks = t->size/t->block_size;
+      long block_num = hashid & (num_blocks -1);
+      long start = block_num * t->block_size;
+      status st = t->block_status[block_num];
+      status old = Empty;
+      if (st == Done) return;
+
+      // if data is uninitialized, need to initialize
+      // if (st == Uninit || st == Initializing) {
+      // 	status x = Uninit;
+      // 	if (t->block_status[block_num].compare_exchange_strong(x, Working)) {
+      // 	  for (int i = start; i < start + t->block_size; i++) 
+      // 	    initialize(t->buckets[i]);
+      // 	  t->block_status[block_num] = Empty;
+      // 	} else {
+      // 	  while (t->block_status[block_num] == Initializing)
+      // 	    for (volatile int i=0; i < 100; i++);
+      // 	}
+      // }
+	
+      // This is effectively a try lock on the block_num.
+      // It blocks other updates on the buckets associated with the block.
+      else if (st == Empty &&
+	       t->block_status[block_num].compare_exchange_strong(old, Working)) {
+
+	// initialize block_status for next grow round
+	for (int i = 0; i < grow_factor; i++)
+	  next->block_status[grow_factor*block_num + i] = Empty;
+	
+	// copy block_size buckets
+	for (int i = start; i < start + t->block_size; i++) {
+	  copy_bucket(t, next, i);
+	}
+	t->block_status[block_num] = Done;
+	
+	// If all blocks have been copied then can set current table
+	// to next.  Note: this atomic fetch-and-add can be a
+	// bottleneck and is the reason the block sizes are reasonably
+	// large (e.g. 256).  A smarter combining tree could be used
+	// if smaller block sizes are needed.
+	if (++next->finished_block_count == num_blocks) {
+	  //std::cout << "expand done" << std::endl;
+	  current_table_version = next;
+	}
+      } else {
+	// If another thread is working on the block, wait until Done
+	while (t->block_status[block_num] == Working) {
+	  for (volatile int i=0; i < 100; i++);
+	}
+      }
+    }
+  }
+    
+  // *********************************************
+  // Construction and Destruction
+  // *********************************************
+
+  // Clear bucket, assuming it is not forwarded.
+  void clear_bucket(bckt* b) {
+    auto [s, tag] = b->ll();
+    if (!s.is_forwarded() && b->sc(tag, state())) {
+      for (int j=0; j < std::min(s.buffer_cnt(), buffer_size); j++) {
+	entries_->retire_entry(s.buffer[j]);
+      }
+      retire_list_all(s.overflow_list());
+    }
+  }
+
+  // Clears bucket or if the bucket is forwarded (during copying)
+  // then clear the forwarded buckets.
+  void clear_bucket_rec(table_version* t, long i) {
+    bckt* b = &(t->buckets[i].v);
+    state head = b->load();
+    if (!head.is_forwarded())
+      clear_bucket(b);
+    else {
+      table_version* next = t->next.load();
+      for (int j = 0; j < grow_factor; j++)
+	clear_bucket_rec(next, grow_factor * i + j);
+    }
+  }
+
+  void clear_buckets() {
+    table_version* ht = current_table_version.load();
+    // clear buckets from current and future versions
+    parallel_for(ht->size, [&] (size_t i) {
+	clear_bucket_rec(ht, i);});
+  }
+  
+  // Clear all memory.
+  // Reinitialize to table of size 1 if specified, and by default.
+  void clear(bool reinitialize = true) {
+    clear_buckets();
+
+    // now reclaim the arrays
+    table_version* tv = initial_table_version;
+    while (tv != nullptr) {
+      table_version* tv_next = tv->next;
+      delete tv;
+      tv = tv_next;
+    }
+    // reinitialize
+    if (reinitialize) {
+      current_table_version = new table_version(1);
+      initial_table_version = current_table_version;
+    }
+  }
+
+  Entries* entries_;
+  
+  // Creates initial table version for the given size.  The
+  // clear_at_end allows to free up the epoch-based collector's
+  // memory, and the scheduler.
+  parlay_hash(long n, Entries* entries, bool clear_at_end = default_clear_at_end)
+    : entries_(entries),
+      clear_memory_and_scheduler_at_end(clear_at_end),
+      sched_ref(clear_at_end ?
+		new parlay::scheduler_type(std::thread::hardware_concurrency()) :
+		nullptr),
+      link_pool(clear_at_end ?
+		new epoch::memory_pool<link>() :
+		&epoch::get_default_pool<link>()),
+      current_table_version(new table_version(n)),
+      initial_table_version(current_table_version.load())
+  { }
+
+  ~parlay_hash() {
+    clear(false);
+    if (clear_memory_and_scheduler_at_end) {
+      delete sched_ref;
+      delete link_pool;
+    }
+  }
+
+  // *********************************************
+  // Operations
+  // *********************************************
+
+  // Updates b, s, tag, and idx to the correct bucket, state, tag and
+  // index if the the state s is forwarded.  Is called recursively,
+  // but unlikely to go more than one level, and when not growing will
+  // return immediately.
+  void check_bucket_and_state(table_version* t, const K& k,
+			      big_atomic<state>*& b, state& s, tag_type& tag, long& idx) {
+    if (s.is_forwarded()) {
+      table_version* nxt = t->next.load();
+      idx = nxt->get_index(k);
+      b = &(nxt->buckets[idx].v);
+      std::tie(s, tag) = b->ll();
+      check_bucket_and_state(nxt, k, b, s, tag, idx);
+    }
+  }
+
+  // find in the bucket, or if forwarded (during copying) then follow
+  // through to the next table, possibly reapeatedly, although
+  // unlikely.
+  template <typename F>
+  auto find_in_bucket_rec(table_version* t, bckt* s, const K& k, const F& f)
+    -> std::optional<typename std::invoke_result<F,Entry>::type>
+  {
+    state x = s->load();
+    //if bucket is forwarded, go to next version
+    if (x.is_forwarded()) {
+      table_version* nxt = t->next.load();
+      return find_in_bucket_rec(nxt, nxt->get_bucket(k), k, f);
+    }
+    return find_in_state(x, k, f);
+  }
+
+  // Finds the entry with the key
+  // Returns an optional which is empty if the key is not in the table,
+  // and contains f(e) otherwise, where e is the entry matching the key
+  // NOTE: this is the most important function to opmitize for performance
+  // Hence one hand inline and one prefetch (not used anywhere else in code).
+  template <typename F>
+  auto Find(const K& k, const F& f)
+    -> std::optional<typename std::invoke_result<F,Entry>::type>
+  {
+    table_version* ht = current_table_version.load();
+    long idx = ht->get_index(k);
+    bckt* b = &(ht->buckets[idx].v);
+    // if entries are direct, then safe to scan the buffer without epoch protection
+    if constexpr (Entry::Direct) {
+      auto [s, tag] = b->ll();
+      if (s.is_forwarded()) 
+	check_bucket_and_state(ht, k, b, s, tag, idx);
+      for (long i = 0; i < std::min(s.buffer_cnt(), buffer_size); i++)
+	if (s.buffer[i].equal(k))
+	  return std::optional(f(s.buffer[i]));
+      // if not found and not overfull, then done
+      if (s.buffer_cnt() <= buffer_size) return std::nullopt;
+      // otherwise need to search overflow, which requires protection
+      return epoch::with_epoch([&, tag=tag, &s = s] {
+        // if state has not changed, then just search list
+	if (b->lv(tag)) return find_in_list(s.overflow_list(), k, f).first;
+	return find_in_bucket_rec(ht, b, k, f);
+      });
+    } else { // if using indirection always use protection
+      __builtin_prefetch(b); // allows read to be pipelined with epoch announcement
+      return epoch::with_epoch([&] () -> std::optional<typename std::invoke_result<F,Entry>::type> {
+	  return find_in_bucket_rec(ht, b, k, f);});
+
+
+    }
+  }
+
+  // Inserts at key, and does nothing if key already in the table.
+  // The constr function construct the entry to be inserted if needed.
+  // Returns an optional, which is empty if sucessfully inserted or
+  // contains f(e) if not, where e is the entry matching the key.
+  template <typename Constr, typename F>
+  auto Insert(const K& key, const Constr& constr, const F& f)
+    -> std::optional<typename std::invoke_result<F,Entry>::type>
+  {
+    using rtype = std::optional<typename std::invoke_result<F,Entry>::type>;
+    return epoch::with_epoch([&] () -> rtype {
+			       auto [e, flag] = insert_(key, constr);
+			       if (flag) return {};
+			       return rtype(f(e));});
+  }
+
+  template <typename Constr>
+  auto insert_(const K& key, const Constr& constr) -> std::pair<Entry, bool> {
+    table_version* ht = current_table_version.load();
+    long idx = ht->get_index(key);
+    auto b = &(ht->buckets[idx].v);
+    int delay = 200;
+    while (true) {
+      auto [s, tag] = b->ll();
+      copy_if_needed(ht, idx);
+      check_bucket_and_state(ht, key, b, s, tag, idx);
+      long len = s.buffer_cnt();
+      // if found in buffer then done
+      for (long i = 0; i < std::min(len, buffer_size); i++)
+	if (s.buffer[i].equal(key)) return std::pair(s.buffer[i], false);
+      if (len < buffer_size) { // buffer has space, insert to end of buffer
+	Entry new_e = constr();
+	if (b->sc(tag, state(s, new_e))) return std::pair(new_e, true);
+	entries_->retire_entry(new_e); // if failed need to ty again
+      } else if (len == buffer_size) { // buffer full, insert new link
+	Entry new_e = constr();
+	link* new_head = new_link(new_e, nullptr);
+	if (b->sc(tag, state(s, new_head))) 
+	  return std::pair(new_e, true);
+	entries_->retire_entry(new_head->entry); // if failed need to try again
+	retire_link(new_head);
+      } else { // buffer overfull, need to check if in list
+	auto [x, list_len] = find_in_list(s.overflow_list(), key, identity);
+	if (list_len + buffer_size > ht->overflow_size) expand_table(ht);
+	if (x.has_value()) return std::pair(*x, false); // if in list, then done
+	Entry new_e = constr();
+	link* new_head = new_link(new_e, s.overflow_list());
+	if (b->sc(tag, state(s, new_head))) // try to add to head of list
+	  return std::pair(new_e, true);
+	entries_->retire_entry(new_head->entry); // if failed need to ty again
+	retire_link(new_head);
+      }
+      // delay before trying again, only marginally helps
+      for (volatile int i=0; i < delay; i++);
+      delay = std::min(2*delay, 5000); // 1000-10000 are about equally good
+    }
+  }
+
+  template <typename Constr, typename G>
+  auto Upsert(const K& key, const Constr& constr, G& g)
+    -> std::optional<typename std::invoke_result<G,Entry>::type>
+  {
+    using rtype = std::optional<typename std::invoke_result<G,Entry>::type>;
+    table_version* ht = current_table_version.load();
+    long idx = ht->get_index(key);
+    auto b = &(ht->buckets[idx].v);
+    return epoch::with_epoch([&] () -> rtype {
+      int delay = 200;
+      while (true) {
+	auto [s, tag] = b->ll();
+	state out_s = s;
+	copy_if_needed(ht, idx);
+	check_bucket_and_state(ht, key, b, s, tag, idx);
+	long len = s.buffer_cnt();
+	bool cont = false;
+	for (long i = 0; i < std::min(len, buffer_size); i++) {
+	  if (s.buffer[i].equal(key)) {
+	    Entry new_e = constr(std::optional(s.buffer[i]));
+	    out_s.buffer[i] = new_e;
+	    if (b->sc(tag, out_s)) return g(s.buffer[i]);
+	    else {
+	      entries_->retire_entry(new_e);
+	      cont = true;
+	      break;
+	    }
+	  }
+	}
+	if (cont) continue;
+	if (len < buffer_size) { // buffer has space, insert to end of buffer
+	  Entry new_e = constr(std::optional<Entry>());
+	  if (b->sc(tag, state(s, new_e))) return std::nullopt;
+	  entries_->retire_entry(new_e); // if failed need to ty again
+	} else if (len == buffer_size) { // buffer just full, insert new link
+	  link* new_head = new_link(constr(std::optional<Entry>()), nullptr);
+	  if (b->sc(tag, state(s, new_head))) 
+	    return std::nullopt;
+	  entries_->retire_entry(new_head->entry); // if failed need to try again
+	  retire_link(new_head);
+	} else { // buffer overfull, need to check if in list
+	  link* old_head = s.overflow_list();
+	  auto [list_len, new_head, updated] = update_list(old_head, key, constr);
+	  if (new_head != nullptr) {
+	    if (b->sc(tag, state(s, new_head))) {// try to add to head of list
+	      rtype r = std::optional(g(updated->entry));
+	      retire_list_n(old_head, list_len); // retire old list
+	      return r;
+	    } else retire_list_n(new_head, list_len);
+	  } else {
+	    if (list_len + buffer_size > ht->overflow_size) expand_table(ht);
+	    new_head = new_link(constr(std::optional<Entry>()), old_head);
+	    if (b->sc(tag, state(s, new_head))) // try to add to head of list
+	      return std::nullopt;
+	    entries_->retire_entry(new_head->entry); // if failed need to ty again
+	    retire_link(new_head);
+	  }	    
+	}
+	// delay before trying again, only marginally helps
+	for (volatile int i=0; i < delay; i++);
+	delay = std::min(2*delay, 5000); // 1000-10000 are about equally good
+      }
+    });
+  }
+
+  // Removes entry with given key
+  // Returns an optional which is empty if the key is not in the table,
+  // and contains f(e) otherwise, where e is the entry that is removed.
+  template <typename F>
+  auto Remove(const K& key, const F& f)
+    -> std::optional<typename std::invoke_result<F,Entry>::type>
+  {
+    using rtype = std::optional<typename std::invoke_result<F,Entry>::type>;
+    table_version* ht = current_table_version.load();
+    long idx = ht->get_index(key);
+    auto b = &(ht->buckets[idx].v);
+    // if entries are direct safe to scan the buffer without epoch protection
+    if constexpr (Entry::Direct) {
+      auto [s, tag] = b->ll();
+      copy_if_needed(ht, idx);
+      check_bucket_and_state(ht, key, b, s, tag, idx);
+      if (s.buffer_cnt() <= buffer_size) {
+	int i = find_in_buffer(s, key);
+	if (i == -1) return std::nullopt;
+	if (b->sc(tag, state(s, i))) {
+	  rtype r = f(s.buffer[i]);
+	  entries_->retire_entry(s.buffer[i]);
+	  return r;
+	} // if sc failed, will need to try again
+      }
+    }
+    // if buffer overfull, or indirect, then need to protect
+    return epoch::with_epoch([&] () -> rtype {
+      int delay = 200;
+      while (true) {
+        auto [s, tag] = b->ll();
+	copy_if_needed(ht, idx);
+	check_bucket_and_state(ht, key, b, s, tag, idx);
+	int i = find_in_buffer(s, key);
+	if (i >= 0) { // found in buffer
+	  if (s.buffer_cnt() > buffer_size) { // need to backfill from list
+	    link* l = s.overflow_list();
+	    if (b->sc(tag, state(s, l, i))) {
+	      rtype r = f(s.buffer[i]);
+	      entries_->retire_entry(s.buffer[i]);
+	      retire_link(l);
+	      return r;
+	    } // if sc failed, will need to try again
+	  } else { // buffer not overfull, can backfill within buffer
+	    if (b->sc(tag, state(s, i))) {
+	      rtype r = f(s.buffer[i]);
+	      entries_->retire_entry(s.buffer[i]);
+	      return r;
+	    } // if sc failed, will need to try again
+	  }
+	} else { // not found in buffer
+	  if (s.buffer_cnt() <= buffer_size) // if not overful, then done
+	    return std::nullopt;
+	  auto [cnt, new_list, removed] = remove_from_list(s.overflow_list(), key);
+          if (cnt == 0) // if not found in list then done
+	    return std::nullopt;
+	  // if found, try to update with the new list that has the element removed
+          if (b->sc(tag, state(s, new_list))) { 
+	    rtype r = f(removed->entry); 
+	    entries_->retire_entry(removed->entry);
+            retire_list_n(s.overflow_list(), cnt); // retire old list
+            return r;
+          } // if sc failed, will need to try again
+          retire_list_n(new_list, cnt - 1); // failed, retire new list
+	}
+	for (volatile int i=0; i < delay; i++);
+	delay = std::min(2*delay, 5000); // 1000-10000 are about equally good
+      }
+    });
+  }
+
+  // Size of bucket, or if forwarded, then sum sizes of all forwarded
+  // buckets, recursively.
+  long bucket_size_rec(table_version* t, long i) {
+    state head = t->buckets[i].v.load();
+    if (!head.is_forwarded())
+      return  head.size();
+    else {
+      long sum = 0;
+      table_version* next = t->next.load();
+      for (int j = 0; j < grow_factor; j++)
+	sum += bucket_size_rec(next, grow_factor * i + j);
+      return sum;
+    }
+  }
+
+  long size() {
+    table_version* ht = current_table_version.load();
+    return epoch::with_epoch([&] {
+       return parlay::tabulate_reduce(ht->size, [&] (size_t i) {
+	   return bucket_size_rec(ht, i);});});
+  }
+
+  template <typename F>
+  void static for_each_bucket_rec(table_version* t, long i, const F& f) {
+    state s = t->buckets[i].v.load();
+    if (!s.is_forwarded())
+      for_each_in_state(s, f);
+    else {
+      table_version* next = t->next.load();
+      for (int j = 0; j < grow_factor; j++)
+	for_each_bucket_rec(next, grow_factor * i + j, f);
+    }
+  }
+
+  // Apply function f to all entries of the table.  Works while updates are going on, and guarantees that:
+  //   any element whose delete linearizes before the invocation will not be included
+  //   any element whose insert linearizes after the response will not be included
+  //   any element that is present from invocation to response will be included
+  // Elements that are inserted or deleted between the invocation and response might or might not appear.
+  // template <typename F>
+  // parlay::sequence<Entry> entries(const F& f) {
+  //   table_version* ht = current_table_version.load();
+  //   return epoch::with_epoch([&] {
+  //     auto s = parlay::tabulate(ht->size, [&] (size_t i) {
+  //       parlay::sequence<Entry> r;
+  // 	for_each_in_bucket_rec(ht, i, [&] (const Entry& entry) {
+  // 	  r.push_back(f(entry));});
+  // 	return r;});
+  //     return flatten(s);});
+  // }
+
+  // Applies f to all elments in table.
+  // Same pseudo-linearizable guarantee as entries and size.
+  template <typename F>
+  void for_each(const F& f) {
+    table_version* ht = current_table_version.load();
+    return epoch::with_epoch([&] {
+                               parallel_for(ht->size, [&] (long i) {
+                                   for_each_bucket_rec(ht, i, f);});});
+  }
+
+  // *********************************************
+  // Iterator
+  // *********************************************
+
+  struct Iterator {
+  public:
+    using value_type        = typename Entries::Data;
+    using iterator_category = std::forward_iterator_tag;
+    using pointer           = value_type*;
+    using reference         = value_type&;
+    using difference_type   = long;
+
+  private:
+    std::vector<Entry> entries;
+    Entry entry;
+    table_version* t;
+    int i;
+    long bucket_num;
+    bool single;
+    bool end;
+    void get_next_bucket() {
+      auto g = [&] (const Entry& e) {entries.push_back(e);};
+      while (entries.size() == 0 && ++bucket_num < t->size)
+        for_each_bucket_rec(t, bucket_num, g);
+      if (bucket_num == t->size) end = true;
+    }
+
+  public:
+    Iterator(bool end) : i(0), bucket_num(-2l), single(false), end(true) {}
+    Iterator(table_version* t) : t(t),
+      i(0), bucket_num(-1l), single(false), end(false) {
+      get_next_bucket();
+    }
+    Iterator(Entry entry) : entry(entry), single(true), end(false) {}
+    Iterator& operator++() {
+      if (single) end = true;
+      else if (++i == entries.size()) {
+	i = 0;
+	entries.clear();
+	get_next_bucket();
+      }
+      return *this;
+    }
+    Iterator& operator++(int) {
+      Iterator tmp = *this;
+      if (single) end = true;
+      else if (++i == entries.size()) {
+	i = 0;
+	entries.clear();
+	get_next_bucket();
+      }
+      return tmp;
+    }
+    template<bool D = Entry::Direct, std::enable_if_t<D, int> = 0>
+    const value_type operator*() {
+      if (single) return entry.get_entry();
+      return entries[i].get_entry();}
+
+    template<bool D = Entry::Direct, std::enable_if_t<!D, int> = 0>
+    const value_type& operator*() { 
+      if (single) return entry.get_entry();
+      return entries[i].get_entry();}
+
+    bool operator!=(const Iterator& iterator) {
+      return !(end ? iterator.end : (bucket_num == iterator.bucket_num &&
+				     i == iterator.i));
+    }
+    bool operator==(const Iterator& iterator) {
+      return !(*this != iterator);}
+  };
+
+  Iterator begin() { return Iterator(current_table_version.load());}
+  Iterator end() { return Iterator(true);}
+
+  static constexpr auto identity = [] (const Entry& entry) {return entry;};
+  static constexpr auto true_f = [] (const Entry& entry) {return true;};
+
+  
+  template <typename Constr>
+  std::pair<Iterator,bool> insert(const K& key, const Constr& constr) {
+    return epoch::with_epoch([&] {
+      auto [e,flag] = insert_(key, constr);
+      return std::pair(Iterator(e), flag);});
+  }
+
+  Iterator erase(Iterator pos) {
+    Remove(*pos.first, true_f);
+    return Iterator(true);
+  }
+
+  size_t erase(const K& key) {
+    return Remove(key, true_f).has_value();
+  }
+
+  Iterator find(const K& k) {
+    auto r = Find(k, identity);
+    if (!r.has_value()) return Iterator(true);
+    auto x = Iterator(*r);
+    return x;
+  }
+
+};
+
+  static constexpr bool default_clear_at_end = true;
+
+  // conditionally rehash if type Hash::avalanching is not defined
+  template<typename Hash, typename ignore = void>
+  struct rehash {
+    size_t operator()(size_t h) {
+      size_t x = h * UINT64_C(0xbf58476d1ce4e5b9); // linear transform
+      return (x ^ (x >> 31));  // non-linear transform
+    }};
+
+  template<typename Hash>
+  struct rehash<Hash, typename Hash::is_avalanching> {
+    size_t operator()(size_t i) {return i;}};
+
+  // Definition where entries of the hash table are stored indirectly
+  // through a pointer.  This means the entries themselves will never
+  // move, but requires a level of indirection when accessing them.
+  // Tags the high-bits of pointers with part of the hash function so
+  // one can avoid the indirection if the tags do not match.
+  // Currently used for all types that are not trivially copyable.
+  template <typename EntryData>
+  struct IndirectEntries {
+    using DataS = EntryData;
+    using Data = typename DataS::value_type;
+    using Hash = typename DataS::Hash;
+    using KeyEqual = typename DataS::KeyEqual;
+      
+    struct Entry {
+      using K = typename DataS::K;
+      using Key = std::pair<const K*,size_t>;
+      static constexpr bool Direct = false;
+      Data* ptr;
+      static Data* tag_ptr(size_t hashv, Data* data) {
+	return (Data*) (((hashv >> 48) << 48) | ((size_t) data));
+      }
+      Data* get_ptr() const {
+	return (Data*) (((size_t) ptr) & ((1ul << 48) - 1)); }
+      static unsigned long hash(const Key& k) {
+	return k.second;}
+      bool equal(const Key& k) const {
+	return (((k.second >> 48) == (((size_t) ptr) >> 48)) &&
+		KeyEqual{}(DataS::get_key(*get_ptr()), *k.first)); }
+      Key get_key() const { return make_key(DataS::get_key(*get_ptr()));}
+      Data& get_entry() const { return *get_ptr();}
+      static Key make_key(const K& key) {
+	return Key(&key, rehash<Hash>{}(Hash{}(key)));}
+      Entry(Key k, Data* data) : ptr(tag_ptr(hash(k), data)) {}
+      Entry() {}
+    };
+
+    bool clear_at_end;
+    using Key = typename Entry::Key;
+
+    // a memory pool for the entries
+    epoch::memory_pool<Data>* data_pool;
+
+    IndirectEntries(bool clear_at_end=false) 
+      : clear_at_end(clear_at_end),
+	data_pool(clear_at_end ?
+		  new epoch::memory_pool<Data>() :
+		  &epoch::get_default_pool<Data>()) {}
+    ~IndirectEntries() {
+      if (clear_at_end) { delete data_pool;}
+    }
+
+    // allocates memory for the entry
+    Entry make_entry(const Key& k, const Data& data) {
+      return Entry(k, data_pool->New(data)); }
+
+    // retires the memory for the entry
+    void retire_entry(Entry& e) {
+      data_pool->Retire(e.get_ptr()); }
+  };
+
+  // Definition where entries of the hash table are stored directly.
+  // This means the entries might be moved during updates, including
+  // insersions, removals, and resizing.  Currently used for trivially
+  // copyable types.
+  template <typename EntryData>
+  struct DirectEntries {
+    using DataS = EntryData;
+    using Data = typename DataS::value_type;
+    using Hash = typename DataS::Hash;
+    using KeyEqual = typename DataS::KeyEqual;
+    using K = typename DataS::K;
+
+    struct Entry {
+      using K = typename DataS::K;
+      using Key = K;
+      static const bool Direct = true;
+      Data data; 
+      static unsigned long hash(const Key& k) {
+	return rehash<Hash>{}(Hash{}(k));}
+      bool equal(const Key& k) const { return KeyEqual{}(get_key(), k); }
+      static Key make_key(const K& k) {return k;}
+      const K& get_key() const {return DataS::get_key(data);}
+      const Data& get_entry() const { return data;}
+      Entry(const Data& data) : data(data) {}
+      Entry() {}
+    };
+
+    DirectEntries(bool clear_at_end=false) {}
+    Entry make_entry(const K& k, const Data& data) {
+      return Entry(data); }
+
+    // retiring is a noop since no memory has been allocated for entries
+    void retire_entry(Entry& e) {}
+  };
+
+  // template <typename EntryData>
+  // struct DirectEntriesX {
+  //   using DataS = EntryData;
+  //   using Data = typename DataS::value_type;
+  //   using Hash = typename DataS::Hash;
+  //   using KeyEqual = typename DataS::KeyEqual;
+  //   using K = typename DataS::K;
+
+  //   struct Entry {
+  //     using K = typename DataS::K;
+  //     using Key = K;
+  //     static const bool Direct = true;
+  //     std::array<long,1 + (sizeof(Data)-1)/8> data;
+  //     static unsigned long hash(const Key& k) {
+  // 	return rehash<Hash>{}(Hash{}(k));}
+  //     bool equal(const Key& k) const { return KeyEqual{}(get_key(), k); }
+  //     static Key make_key(const K& k) {return k;}
+  //     const K& get_key() const { return DataS::get_key(*((Data*) &data));}
+  //     const Data& get_entry() const { return *((Data*) &data);}
+  //     Entry(const Data& d) { new (&data) Data(d); }
+  //     Entry() {}
+  //   };
+
+  //   bool clear_at_end;
+
+  //   // a memory pool for the entries
+  //   epoch::retire_pool<Data>* data_pool;
+
+  //   DirectEntriesX(bool clear_at_end=false) 
+  //     : clear_at_end(clear_at_end),
+  // 	data_pool(clear_at_end ?
+  //   		  new epoch::retire_pool<Data>() :
+  //   		  &epoch::get_default_retire_pool<Data>())
+  //   {}
+  //   ~DirectEntriesX() {
+  //     if (clear_at_end) { delete data_pool;}
+  //   }
+
+  //   // allocates memory for the entry
+  //   Entry make_entry(const K& k, const Data& data) {
+  //     return Entry(data);}
+
+  //   // retires the memory for the entry
+  //   void retire_entry(Entry& e) {
+  //     data_pool->Retire((Data*) &(e.data)); 
+  //   }
+  // };
+  
+
+}  // namespace parlay
+#endif  // PARLAY_HASH_H_
diff --git a/include/parlay_hash/unordered_map.h b/include/parlay_hash/unordered_map.h
new file mode 100644
index 0000000..af6c1d3
--- /dev/null
+++ b/include/parlay_hash/unordered_map.h
@@ -0,0 +1,170 @@
+// Initial Author: Guy Blelloch
+// Developed as part of the flock library
+// 
+// A growable unordered_map using a hash table designed for scalability to large number of threads, and
+// for high contention.  On a key type K and value type V it supports:
+//
+//   unordered_map<K, V, Hash=std::hash<K>, Equal=std::equal_to<K>>(n) :
+//   constructor for table of initial size n
+//
+//   Find(const K&) -> std::optional<V> :
+//   returns value if key is found, and otherwise returns nullopt
+//
+//   Insert(const K&, const V&) -> std::optional<V> :
+//   if key not in the table it inserts the key with the given value
+//   and returns nullopt, otherwise it does not modify the table and
+//   returns the old value.
+//
+//   Remove(const K&) -> std::optional<V> :
+//   if key is in the table it removes the entry and returns its value.
+//   otherwise it does nothing and returns nullopt.
+//
+//   size() -> long : returns the size of the table.  Not linearizable with
+//   the other functions, and takes time proportional to the table size.
+//  
+//   clear() -> void : clears the table so its size is 0.
+//
+//   for_each(F f) : applies functor f to each entry of the table.
+//   f should be of type (const std::pair<K,V>&) -> void
+
+#ifndef PARLAY_UNORDERED_MAP_
+#define PARLAY_UNORDERED_MAP_
+
+#include <functional>
+#include <optional>
+#include "parlay_hash.h"
+
+namespace parlay {
+
+  // entries contain a key
+  template <typename K_, typename V_, class Hash_ = std::hash<K_>, class KeyEqual_ = std::equal_to<K_>>
+  struct MapData {
+    using K = K_;
+    using V = V_;
+    using Hash = Hash_;
+    using KeyEqual = KeyEqual_;
+    using value_type = std::pair<K,V>;
+    static const K& get_key(const value_type& x) { return x.first;}
+  };
+
+  // Generic unordered_map that can be used with direct or indirect
+  // entries depending on the template argument.
+  template <typename Entries>
+  struct unordered_map_internal {
+    using map = parlay_hash<Entries>;
+
+    Entries entries_;
+    map m;
+
+    using Entry = typename Entries::Entry;
+    using K = typename Entries::DataS::K;
+    using V = typename Entries::DataS::V;
+    using key_type = K;
+    using mapped_type = V;
+    using value_type = std::pair<K, V>;
+    using iterator = typename map::Iterator;
+
+    static constexpr auto true_f = [] (const Entry& kv) {return true;};
+    static constexpr auto identity = [] (const Entry& kv) {return kv;};
+    static constexpr auto get_value = [] (const value_type& kv) {return kv.second;};
+
+    unordered_map_internal(long n, bool clear_at_end = default_clear_at_end)
+      : entries_(Entries(clear_at_end)),
+	m(map(n, &entries_, clear_at_end)) {}
+    
+    iterator begin() { return m.begin();}
+    iterator end() { return m.end();}
+    bool empty() { return size() == 0;}
+    bool max_size() { return (1ul << 47)/sizeof(Entry);}
+    void clear() { m.clear_buckets();}
+    long size() { return m.size();}
+
+    template <typename F = decltype(identity)>
+    //auto entries(const F& f = identity) { return m.entries(f);}
+    long count(const K& k) { return (contains(k)) ? 1 : 0; }
+    bool contains(const K& k) { return find(k, true_f).has_value();}
+
+    template <typename F = decltype(get_value)>
+    auto Find(const K& k, const F& f = get_value)
+    // -> std::optional<typename std::invoke_result<F(value_type)>::type>
+    {
+      auto g = [&] (const Entry& e) {return f(e.get_entry());};
+      return m.Find(Entry::make_key(k), g);
+    }
+
+    auto Insert(const K& key, const V& value) -> std::optional<mapped_type>
+    {
+      auto k = Entry::make_key(key);
+      auto g = [&] (const Entry& e) {return get_value(e.get_entry());};
+      return m.Insert(k, [&] {return entries_.make_entry(k, value_type(key, value));}, g);
+    }
+
+    template <typename F>
+    auto Upsert(const K& key, const F& f) -> std::optional<mapped_type>
+    {
+      auto k = Entry::make_key(key);
+      auto g = [&] (const Entry& e) {return get_value(e.get_entry());};
+      auto constr = [&] (const std::optional<Entry>& e) -> Entry {
+		      if (e.has_value())
+			return entries_.make_entry(k, value_type(key, f(std::optional(get_value((*e).get_entry())))));
+		      return entries_.make_entry(k, value_type(key, f(std::optional<V>())));
+		    };
+      return m.Upsert(k, constr, g);
+    }
+
+    template <typename F>
+    auto Insert(const K& key, const V& value, const F& f)
+    // -> std::optional<typename std::invoke_result<F(value_type)>::type>
+    {
+      auto k = Entry::make_key(key);
+      auto g = [&] (const Entry& e) {return f(e.get_entry());};
+      return m.Insert(k, [&] {return entries_.make_entry(k, value_type(key, value));}, g);
+    }
+
+    auto Remove(const K& k) -> std::optional<mapped_type>
+    {
+      auto g = [&] (const Entry& e) {return get_value(e.get_entry());};
+      return m.Remove(Entry::make_key(k), g);
+    }
+
+    template <typename F>
+    auto Remove(const K& k, const F& f)
+    //  -> std::optional<typename std::invoke_result<F(value_type)>::type>
+    {
+      auto g = [&] (const Entry& e) {return f(e.get_entry());};
+      return m.Remove(Entry::make_key(k), g);
+    }
+
+    iterator find(const K& k) { return m.find(k); }
+
+    std::pair<iterator,bool> insert(const value_type& entry) {
+      auto k = Entry::make_key(entry.first);
+      return m.insert(k, [&] {return entries_.make_entry(k, entry);});}
+
+    iterator erase(iterator pos) { return m.erase(pos); }
+    size_t erase(const K& k) { return m.erase(k); }
+
+  };
+
+  // Entries are stored directly in the bucket, avoiding a cache miss
+  // for indirection.  Entries can be moved by updates even on
+  // different keys.
+  template <typename K, typename V, class Hash = std::hash<K>, class KeyEqual = std::equal_to<K>>
+  using parlay_unordered_map_direct = unordered_map_internal<DirectEntries<MapData<K, V, Hash, KeyEqual>>>;
+
+  // Entries are stored indirectly through a pointer.  Pointers to
+  // entries wil remain valid until the entry is upserted or deleted
+  // (an upsert can be though of as a deletion followed by an
+  // insersion).
+  template <typename K, typename V, class Hash = std::hash<K>, class KeyEqual = std::equal_to<K>>
+  using parlay_unordered_map_indirect = unordered_map_internal<IndirectEntries<MapData<K, V, Hash, KeyEqual>>>;
+
+  // specialization of unordered_map to use either direct or indirect
+  // entries depending on whether K and V are trivially copyable.
+  template <typename K, typename V, class Hash = std::hash<K>, class KeyEqual = std::equal_to<K>>
+  using parlay_unordered_map = std::conditional_t<std::is_trivially_copyable_v<K> &&
+						  std::is_trivially_copyable_v<V>,
+						  parlay_unordered_map_direct<K,V,Hash,KeyEqual>,
+						  parlay_unordered_map_indirect<K,V,Hash,KeyEqual>>;
+}  // namespace parlay
+#endif  // PARLAY_BIGATOMIC_HASH_LIST
diff --git a/include/parlay_hash/unordered_set.h b/include/parlay_hash/unordered_set.h
new file mode 100644
index 0000000..fbad87a
--- /dev/null
+++ b/include/parlay_hash/unordered_set.h
@@ -0,0 +1,87 @@
+#ifndef PARLAY_UNORDERED_SET_
+#define PARLAY_UNORDERED_SET_
+
+#include <functional>
+#include <optional>
+#include "parlay_hash.h"
+#include <utils/epoch.h>
+
+namespace parlay {
+
+  // entries just contain a key
+  template <typename K_, class Hash_ = std::hash<K_>, class KeyEqual_ = std::equal_to<K_>>
+  struct SetData {
+    using K = K_;
+    using Hash = Hash_;
+    using KeyEqual = KeyEqual_;
+    using value_type = K;
+    static const K& get_key(const value_type& x) { return x;}
+  };
+
+  // Generic unordered_set that can be used with direct or indirect
+  // entries depending on the template argument.
+  template <typename Entries>
+  struct unordered_set_internal {
+    using set = parlay_hash<Entries>;
+
+    Entries entries_;
+    set m;
+
+    using Entry = typename Entries::Entry;
+    using K = typename Entries::DataS::K;
+    using key_type = K;
+    using value_type = K;
+    using iterator = typename set::Iterator;
+
+    static constexpr auto true_f = [] (const Entry& kv) {return true;};
+    static constexpr auto identity = [] (const Entry& kv) {return kv;};
+
+    unordered_set_internal(long n, bool clear_at_end = default_clear_at_end)
+      : entries_(Entries(clear_at_end)),
+	m(set(n, &entries_, clear_at_end)) {}
+    
+    iterator begin() { return m.begin();}
+    iterator end() { return m.end();}
+    bool empty() { return size() == 0;}
+    bool max_size() { return (1ul << 47)/sizeof(Entry);}
+    void clear() { m.clear_buckets();}
+    long size() { return m.size();}
+
+    template <typename F = decltype(identity)>
+    auto entries(const F& f = identity) { return m.entries(f);}
+    long count(const K& k) { return (contains(k)) ? 1 : 0; }
+    bool contains(const K& k) { return find(k, true_f).has_value();}
+
+    bool Find(const K& k) { return m.Find(Entry::make_key(k), true_f).has_value(); }
+    bool Insert(const K& key) 
+    {
+      auto k = Entry::make_key(key);
+      return !m.Insert(k, [&] {return entries_.make_entry(k, key);}, true_f).has_value();
+    }
+
+    bool Remove(const K& k)
+    { return m.Remove(Entry::make_key(k), true_f).has_value(); }
+
+    iterator find(const K& k) { return m.find(k); }
+
+    std::pair<iterator,bool> insert(const value_type& entry) {
+      return m.insert(entries_.make_entry(make_key(entry.first), entry)); }
+
+    iterator erase(iterator pos) { return m.erase(pos); }
+    size_t erase(const K& k) { return m.erase(k); }
+
+  };
+
+  template <typename K, class Hash = std::hash<K>, class KeyEqual = std::equal_to<K>>
+  using parlay_unordered_set_direct = unordered_set_internal<DirectEntries<SetData<K, Hash, KeyEqual>>>;
+
+  template <typename K, class Hash = std::hash<K>, class KeyEqual = std::equal_to<K>>
+  using parlay_unordered_set_indirect = unordered_set_internal<IndirectEntries<SetData<K, Hash, KeyEqual>>>;
+
+  template <typename K, class Hash = std::hash<K>, class KeyEqual = std::equal_to<K>>
+  using parlay_unordered_set = std::conditional_t<std::is_trivially_copyable_v<K>,
+						  parlay_unordered_set_direct<K, Hash, KeyEqual>,
+						  parlay_unordered_set_indirect<K, Hash, KeyEqual>>;
+}  // namespace parlay
+#endif  // PARLAY_BIGATOMIC_HASH_LIST
+
diff --git a/include/sketch_interfacing.h b/include/sketch_interfacing.h
new file mode 100644
index 0000000..746c948
--- /dev/null
+++ b/include/sketch_interfacing.h
@@ -0,0 +1,8 @@
+#pragma once
+#include "sketch.h"
+#include "sketch/sketch_columns.h"
+#include "sketch/sketch_concept.h"
+
+
+// using DefaultSketchColumn = FixedSizeSketchColumn;
+using DefaultSketchColumn = ResizeableSketchColumn;
diff --git a/include/sketchless_euler_tour_tree.h b/include/sketchless_euler_tour_tree.h
index 7bc0f61..ce742b2 100644
--- a/include/sketchless_euler_tour_tree.h
+++ b/include/sketchless_euler_tour_tree.h
@@ -6,6 +6,9 @@
 #include <sketchless_skiplist.h>
 #include "types.h"
 
+
+#include <absl/container/flat_hash_map.h>
+
 class SketchlessEulerTourNode {
 
   std::unordered_map<SketchlessEulerTourNode*, SketchlessSkipListNode*> edges;
@@ -39,12 +42,19 @@ class SketchlessEulerTourNode {
   friend std::ostream& operator<<(std::ostream& os, const SketchlessEulerTourNode& ett);
 };
 
+
+template <
+// typename Container = std::vector<SketchlessEulerTourNode>>
+typename Container = absl::flat_hash_map<node_id_t, SketchlessEulerTourNode*>>
 class SketchlessEulerTourTree {
-  long seed = 0;
+  // TODO - packing order fixes
+  size_t seed = 0;
+  uint32_t tier_num = 0;
 public:
-  std::vector<SketchlessEulerTourNode> ett_nodes;
+  node_id_t max_num_nodes;
+  Container ett_nodes;
 
-  SketchlessEulerTourTree(node_id_t num_nodes, uint32_t tier_num, int seed);
+  SketchlessEulerTourTree(node_id_t max_num_nodes, uint32_t tier_num, size_t seed);
   
   void link(node_id_t u, node_id_t v);
   void cut(node_id_t u, node_id_t v);
@@ -52,4 +62,51 @@ class SketchlessEulerTourTree {
   SketchlessSkipListNode* get_root(node_id_t u);
   bool is_connected(node_id_t u, node_id_t v);
   std::vector<std::set<node_id_t>> cc_query();
+  
+  SketchlessEulerTourNode& ett_node(node_id_t u) {
+    if constexpr (std::is_same_v<Container, std::vector<SketchlessEulerTourNode>>) {
+        assert(u < ett_nodes.size());
+        return ett_nodes[u];
+    } else {
+      // if (ett_nodes.find(u) == ett_nodes.end()) {
+      //     std::cout << "ruh oh" << std::endl;
+      // }
+        assert(ett_nodes.find(u) != ett_nodes.end());
+        return *ett_nodes[u];
+    }
+  }
+  
+  void initialize_node(node_id_t u) {
+    // no-op with vector implementation
+    if constexpr (!std::is_same_v<Container, std::vector<SketchlessEulerTourNode>>) {
+        ett_nodes[u] = new SketchlessEulerTourNode(this->seed, u, this->tier_num);
+    }
+  };
+  void uninitialize_node(node_id_t u) {
+    // no-op with vector implementation
+    if constexpr (!std::is_same_v<Container, std::vector<SketchlessEulerTourNode>>) {
+        assert(ett_nodes.find(u) != ett_nodes.end());
+        delete ett_nodes[u];
+    }
+  };
+  
+  void initialize_all_nodes() {
+    for (node_id_t i = 0; i < max_num_nodes; ++i) {
+        initialize_node(i);
+    }
+  };
+  void initialize_all_nodes(node_id_t until) {
+    assert(until <= max_num_nodes);
+    for (node_id_t i = 0; i < until; ++i) {
+        initialize_node(i);
+    }
+  }
+  bool is_initialized(node_id_t u) {
+    // no-op with vector implementation
+    if constexpr (std::is_same_v<Container, std::vector<SketchlessEulerTourNode>>) {
+        return true;
+    } else {
+        return ett_nodes.find(u) != ett_nodes.end();
+    }
+  };
 };
diff --git a/include/skiplist.h b/include/skiplist.h
index deafa22..81ec679 100644
--- a/include/skiplist.h
+++ b/include/skiplist.h
@@ -2,82 +2,353 @@
 
 #include <gtest/gtest.h>
 #include "sketch.h"
+#include "sketch/sketch_columns.h"
+#include "sketch_interfacing.h"
 
+#include <parlay/sequence.h>
+#include <tbb/tbb.h>
+
+// using ColumnEntryDeltas = parlay::sequence<ColumnEntryDelta>::const_view_type;
+using ColumnEntryDeltas = parlay::sequence<ColumnEntryDelta>::view_type;
+
+
+
+#ifndef SKETCH_BUFFER_SIZE
+  #define SKETCH_BUFFER_SIZE 25
+#endif
+
+enum AggUpdateState {
+    NORMAL = 0,
+    // needs to be updated (normal cas logic)
+    NEEDS_UPDATE = 1,
+    // this one was updated but its parent needs to be FULLY updated
+    // since we changed in some non-trackable way (ie we atomically updated)
+    PARENT_IS_STALE = 2,
+    // this is applied to nodes where we don't need to reapply the aggregation
+    LEAVE_ALONE = 3
+};
+
+template <typename SketchClass = DefaultSketchColumn> requires(SketchColumnConcept<SketchClass, vec_t>)
 class EulerTourNode;
 
-constexpr int skiplist_buffer_cap = 25;
 extern long skiplist_seed;
 extern double height_factor;
 extern vec_t sketch_len;
 extern vec_t sketch_err;
 
+template <typename SketchClass = DefaultSketchColumn> requires(SketchColumnConcept<SketchClass, vec_t>)
 class SkipListNode {
+  friend class EulerTourNode<SketchClass>;
 
-  SkipListNode* left = nullptr;
-  SkipListNode* right = nullptr;
-  SkipListNode* up = nullptr;
-  SkipListNode* down = nullptr;
+  SkipListNode<SketchClass>* left = nullptr;
+  SkipListNode<SketchClass>* right = nullptr;
+  SkipListNode<SketchClass>* up = nullptr;
+  SkipListNode<SketchClass>* down = nullptr;
   // Store the first node to the left on the next level up
-  SkipListNode* parent = nullptr;
+  SkipListNode<SketchClass>* parent = nullptr;
 
-  vec_t update_buffer[skiplist_buffer_cap];
   int buffer_size = 0;
   int buffer_capacity;
+  vec_t update_buffer[SKETCH_BUFFER_SIZE];
+  int8_t needs_update = AggUpdateState::NORMAL;
 
 public:
-  Sketch* sketch_agg = nullptr;
+  EulerTourNode<SketchClass>* node;
+  SketchClass sketch_agg;
 
   uint32_t size = 1;
+  
 
-  EulerTourNode* node;
-
-  SkipListNode(EulerTourNode* node, long seed, bool has_sketch);
+  SkipListNode(EulerTourNode<SketchClass>* node, long seed, bool has_sketch);
   ~SkipListNode();
-  static SkipListNode* init_element(EulerTourNode* node, bool is_allowed_caller);
+  static SkipListNode* init_element(EulerTourNode<SketchClass>* node, bool is_allowed_caller);
   void uninit_element(bool delete_bdry);
   void uninit_list();
 
   // Returns the closest node on the next level up at or left of the current
-  SkipListNode* get_parent();
+  SkipListNode<SketchClass>* get_parent() const;
   // Returns the top left root node of the skiplist
-  SkipListNode* get_root();
+  SkipListNode<SketchClass>* get_root() const;
   // Returns the bottom left boundary node of the skiplist
-  SkipListNode* get_first();
+  SkipListNode<SketchClass>* get_first() const;
   // Returns the bottom right node of the skiplist
-  SkipListNode* get_last();
+  SkipListNode<SketchClass>* get_last() const;
 
   // Return the aggregate size at the root of the list
   uint32_t get_list_size();
   // Return the aggregate sketch at the root of the list
-  Sketch* get_list_aggregate();
+  const SketchClass& get_list_aggregate();
   // Update all the aggregate sketches with the input vector from the current node to its root
-  SkipListNode* update_path_agg(vec_t update_idx);
+  SkipListNode<SketchClass>* update_path_agg(vec_t update_idx);
+  // // same, but atomically
+  SkipListNode<SketchClass>* update_path_agg_atomic(vec_t update_idx);
+  // SkipListNode<SketchClass>* update_path_agg_atomic(vec_t update_idx)
   // Add the given sketch to all aggregate sketches from the current node to its root
-  SkipListNode* update_path_agg(Sketch* sketch);
+  SkipListNode<SketchClass>* update_path_agg(const SketchClass &sketch);
+  SkipListNode<SketchClass>* update_path_agg(SketchClass &sketch);
+  
+  SkipListNode<SketchClass>* update_path_agg(const ColumnEntryDelta &delta);
+  SkipListNode<SketchClass>* update_path_agg(const ColumnEntryDeltas &deltas);
+
+  SkipListNode<SketchClass>* update_path_agg_atomic(const ColumnEntryDelta &delta);
+  SkipListNode<SketchClass>* update_path_agg_atomic(const ColumnEntryDeltas &deltas);
 
   // Update just this node's aggregate sketch
   void update_agg(vec_t update_idx);
+  // Same but atomically
+  void update_agg_atomic(vec_t update_idx);
+  //Just apply the delta
+  void update_agg_entry_delta(const ColumnEntryDelta& delta) {
+      if (!this->sketch_agg.is_initialized())  // Only do something if this node has a sketch
+          return;
+      this->sketch_agg.apply_entry_delta(delta);
+  }
+
+  void update_agg_entry_deltas(const ColumnEntryDeltas &deltas) {
+      if (!this->sketch_agg.is_initialized())  // Only do something if this node has a sketch
+          return;
+      size_t sz = deltas.size();
+      for (const auto& delta : deltas)
+          this->sketch_agg.apply_entry_delta(delta);
+  }
+  // and the atomic versions:
+  void update_agg_atomic_entry_delta(const ColumnEntryDelta &delta) {
+      if (!this->sketch_agg.is_initialized())  // Only do something if this node has a sketch
+          return;
+      this->sketch_agg.atomic_apply_entry_delta(delta);
+  }
+  void update_agg_atomic_entry_deltas(const ColumnEntryDeltas &deltas) {
+      if (!this->sketch_agg.is_initialized())  // Only do something if this node has a sketch
+          return;
+      size_t sz = deltas.size();
+      for (const auto& delta : deltas)
+          this->sketch_agg.atomic_apply_entry_delta(delta);
+  }
 
   // Apply all the sketch updates currently in the update buffer
   void process_updates();
+  
+  bool _needs_full_recompute() {
+    if (this->down == nullptr)
+      return false;
+    SkipListNode<SketchClass>* current = this->down;
+    do {
+      if (current->needs_update == AggUpdateState::PARENT_IS_STALE) {
+        return true;
+      }
+      current = current->right;
+    } while (current != nullptr && current != this->down && current->up == nullptr);
+    return false;
+  }
+
+  void _do_full_prefetch() {
+    if (this->down == nullptr)
+      return;
+    SkipListNode<SketchClass>* current = this->down;
+    do {
+      if (current->sketch_agg.is_initialized()) {
+          this->sketch_agg.prefetch();
+      }
+      current = current->right;
+    } while (current != nullptr && current != this->down && current->up == nullptr);
+  }
+
+  void _subtract_stale_children() {
+    // subtract the agg for any sketches that need to be updated.
+    if (this->down == nullptr)
+      return;
+    SkipListNode<SketchClass>* current = this->down;
+    do {
+      assert(current->needs_update != AggUpdateState::PARENT_IS_STALE);
+      if (current->needs_update == AggUpdateState::NEEDS_UPDATE) {
+        if (current->sketch_agg.is_initialized()) {
+            this->sketch_agg.merge(current->sketch_agg);
+        }
+      }
+      else {
+        current->needs_update = AggUpdateState::LEAVE_ALONE;
+      }
+      current = current->right;
+    } while (current != nullptr && current != this->down && current->up == nullptr);
+  }
+  
+  void _do_full_reagg() {
+    if (this->down == nullptr)
+      return;
+    this->sketch_agg.clear();
+    SkipListNode<SketchClass>* current = this->down;
+    do {
+        if (current->sketch_agg.is_initialized()) {
+            this->sketch_agg.merge(current->sketch_agg);
+        }
+        current->needs_update = AggUpdateState::NORMAL;
+        current = current->right;
+    } while (current != nullptr && current != this->down && current->up == nullptr);
+  }
+  
+  void _full_recompute_aggs_topdown(int fork_levels) {
+    if (!this->sketch_agg.is_initialized())
+      return;
+    if (this->down == nullptr)
+      return;
+    SkipListNode<SketchClass>* current = this->down;
+    this->sketch_agg.clear();
+    if (fork_levels > 0) {
+      tbb::task_group tg;
+      do {
+        if (current->needs_update == AggUpdateState::NEEDS_UPDATE) {
+          tg.run([current, fork_levels]() {
+            current->recompute_aggs_topdown(fork_levels-1);
+          });
+        }
+        current = current->right;
+      } while (current != nullptr && current != this->down && current->up == nullptr);
+      tg.wait();
+      // _do_full_prefetch();
+      _do_full_reagg();
+    }
+    else {
+        do {
+            if (current->needs_update == AggUpdateState::NEEDS_UPDATE) {
+                current->recompute_aggs_topdown(fork_levels - 1);
+            }
+        } while (current != nullptr && current != this->down && current->up == nullptr);
+        // _do_full_prefetch();
+        _do_full_reagg();
+    }
+    this->needs_update = false;
+  }
+
+  void _recursive_recompute_children(int fork_levels) {
+    if (this->down == nullptr)
+      return;
+    SkipListNode<SketchClass>* current = this->down;
+    if (fork_levels > 0) {
+      tbb::task_group tg;
+      do {
+        if (current->needs_update == AggUpdateState::NEEDS_UPDATE) {
+        tg.run([current, fork_levels]() {
+            current->recompute_aggs_topdown(fork_levels-1);
+          });
+        }
+        current = current->right;
+      } while (current != nullptr && current != this->down && current->up == nullptr);
+      tg.wait();
+    }
+    else {
+      if (current->needs_update == AggUpdateState::NEEDS_UPDATE) {
+        do {
+            current->recompute_aggs_topdown(fork_levels - 1);
+            current = current->right;
+        } while (current != nullptr && current != this->down && current->up == nullptr);
+      }
+    }
+  }
+  
+  // recompute your aggregate from your children.
+  void recompute_aggs_topdown(int fork_levels) {
+    assert(this != nullptr);
+    if (!this->sketch_agg.is_initialized())
+      return;
+    // do not recompute for bottom level nodes
+    if (this->down == nullptr) 
+      return;
+    // _full_recompute_aggs_topdown(fork_levels);
+    if (_needs_full_recompute()) {
+      // prefetch all the children
+      _full_recompute_aggs_topdown(fork_levels);
+    }
+    else {
+      _subtract_stale_children();
+      _recursive_recompute_children(fork_levels);
+      SkipListNode<SketchClass>* current = this->down;
+      do {
+        if (current->needs_update == AggUpdateState::LEAVE_ALONE) {
+          // do nothing
+        }
+        else {
+          if (current->sketch_agg.is_initialized()) {
+              this->sketch_agg.merge(current->sketch_agg);
+          }
+        }
+        current->needs_update = AggUpdateState::NORMAL;
+        current = current->right;
+      } while (current != nullptr && current != this->down && current->up == nullptr);
+    }
+    this->needs_update = AggUpdateState::NORMAL;
+  }
+  size_t compute_space_usage() {
+    size_t total = sizeof(SkipListNode<SketchClass>);
+    if (this->sketch_agg.is_initialized())
+      total += sketch_agg.space_usage_bytes();
+    if (this->down != nullptr) {
+      SkipListNode<SketchClass>* current = this->down;
+      do {
+        total += current->compute_space_usage();
+        current = current->right;
+      } while (current != nullptr && current != this->down && current->up == nullptr);
+    }
+    return total;
+  }
+
+  // we have to barrier on all of these finishing
+  SkipListNode<SketchClass>* find_root_with_cas() {
+    SkipListNode<SketchClass>* current = this;
+    while (current->parent != nullptr) {
+      current = current->parent;
+      std::atomic_ref<int8_t> atomic_needs_update(current->needs_update);
+      int8_t expected = static_cast<int8_t>(AggUpdateState::NORMAL);
+      bool cas_succeed =  atomic_needs_update.compare_exchange_strong(
+        expected,
+        static_cast<int8_t>(AggUpdateState::NEEDS_UPDATE),
+        std::memory_order_seq_cst
+      );
+      //  __sync_bool_compare_and_swap(
+      //   (bool *)&current->needs_update,
+      //   false,
+      //   true
+      // );
+      if (!cas_succeed) {
+        // someone else already set needs_update to true, so we can stop
+        return nullptr;
+      }
+    }
+    // TODO - dont make this hard-coded
+    return current;
+  }
+  
+  void clear_cas_flags() {
+    assert(this != nullptr);
+    this->needs_update = AggUpdateState::NORMAL;  
+    SkipListNode<SketchClass>* current = this->down;
+    do {
+      if (current->needs_update == AggUpdateState::NEEDS_UPDATE) {
+          current->clear_cas_flags();
+      }
+      current = current->right;
+    } while (current != nullptr && current != this->down && current->up == nullptr);
+  }
 
-  std::set<EulerTourNode*> get_component();
+  std::set<EulerTourNode<SketchClass>*> get_component();
 
   // Returns the root of a new skiplist formed by joining the lists containing left and right
-  static SkipListNode* join(SkipListNode* left, SkipListNode* right);
-  template <typename... T>
-  static SkipListNode* join(SkipListNode* head, T*... tail);
+  static SkipListNode<SketchClass>* join(SkipListNode<SketchClass>* left, SkipListNode<SketchClass>* right);
+  
+  template <typename... Tail> requires((std::is_same_v<SkipListNode<SketchClass>*, Tail> && ...))
+  static SkipListNode<SketchClass>* join(SkipListNode<SketchClass>* head, Tail... tail) {
+    return join(head, join(tail...));
+  };
   // Returns the root of the left list after splitting to the left of the given node
-  static SkipListNode* split_left(SkipListNode* node);
+  static SkipListNode<SketchClass>* split_left(SkipListNode<SketchClass>* node);
   // Returns the root of the right list after splitting to the right of the given node
-  static SkipListNode* split_right(SkipListNode* node);
+  static SkipListNode<SketchClass>* split_right(SkipListNode<SketchClass>* node);
 
   bool isvalid();
-  SkipListNode* next();
+  SkipListNode<SketchClass>* next();
   int print_list();
 };
 
-template <typename... T>
-SkipListNode* SkipListNode::join(SkipListNode* head, T*... tail) {
-  return join(head, join(tail...));
-}
+// template <typename... Tail> requires((std::is_same_v<SkipListNode<SketchClass>*, Tail> && ...))
+// SkipListNode<SketchClass>* SkipListNode<SketchClass>::join(SkipListNode<SketchClass>* head, Tail... tail) {
+//   return join(head, join(tail...));
+// }
diff --git a/include/ufo_tree/types.h b/include/ufo_tree/types.h
new file mode 100644
index 0000000..2eb1e0c
--- /dev/null
+++ b/include/ufo_tree/types.h
@@ -0,0 +1,59 @@
+#pragma once
+#include <stdint.h>
+#include <parlay/sequence.h>
+
+
+namespace ufo {
+
+typedef uint32_t vertex_t;
+static vertex_t NONE = -1;
+
+struct empty_t {
+};
+static empty_t empty;
+
+typedef uint64_t edge_t;
+
+enum UpdateType {
+  INSERT,
+  DELETE
+};
+
+struct Edge {
+public:
+  vertex_t src;
+  vertex_t dst;
+
+  bool operator==(const Edge& other) const {
+    return src == other.src && dst == other.dst;
+  }
+};
+
+struct Update {
+  UpdateType type;
+  Edge edge;
+};
+
+struct UpdateBatch {
+  UpdateType type;
+  parlay::sequence<std::pair<int, int>> edges;
+};
+
+struct UpdateBatchWithWeights{
+  UpdateType type;
+  parlay::sequence<std::tuple<int,int,int>> insert_edges;
+  parlay::sequence<std::pair<int,int>> delete_edges;
+};
+
+enum QueryType {
+  CONNECTIVITY,
+  PATH,
+  SUBTREE
+};
+
+struct Query {
+  vertex_t u;
+  vertex_t v;
+};
+
+}
diff --git a/include/ufo_tree/ufo_cluster.h b/include/ufo_tree/ufo_cluster.h
new file mode 100644
index 0000000..6a8ee93
--- /dev/null
+++ b/include/ufo_tree/ufo_cluster.h
@@ -0,0 +1,229 @@
+#pragma once
+#include "ufo_tree/types.h"
+#include "ufo_tree/util.h"
+#include <absl/container/flat_hash_set.h>
+#include <absl/container/flat_hash_map.h>
+
+/* These constants determines the maximum size of array of nieghbors and
+the vector of neighbors for each UFOCluster. Any additional neighbors will
+be stored in the hash set for efficiency. Minimum value is 3 for queries
+function correctly. */
+#define UFO_ARRAY_MAX 3
+
+// #define COLLECT_ROOT_CLUSTER_STATS
+#ifdef COLLECT_ROOT_CLUSTER_STATS
+    static std::map<int, int> root_clusters_histogram;
+#endif
+
+
+namespace ufo {
+
+template<typename v_t, typename e_t>
+class UFOCluster {
+using Cluster = UFOCluster<v_t, e_t>;
+using NeighborSet = absl::flat_hash_map<Cluster*,e_t>;
+public:
+    // Query fields, note that the [[no_unique_address]] fields must be declared first
+    [[no_unique_address]] e_t edge_value1;
+    [[no_unique_address]] e_t edge_value2;
+    [[no_unique_address]] e_t edge_value3;
+    [[no_unique_address]] v_t value;
+    // Parent pointer
+    Cluster* parent = nullptr;
+    /* We tag the last neighbor pointer in the array with information about the degree of the cluster.
+    If it is 1, 2, or 3, that is the degree of the cluster. If it is 4, then the cluster has degree 4
+    or higher and the last neighbor pointer is actually a pointer to the NeighborsSet object containing
+    the remaining neighbors of the cluster. */
+    Cluster* neighbors[UFO_ARRAY_MAX];
+    int degree = 0;
+    int fanout = 0;
+    // Constructors
+    UFOCluster() : parent(), neighbors(), degree(), fanout(), edge_value1(), edge_value2(), edge_value3(), value() {};
+    UFOCluster(v_t val) : parent(), neighbors(), degree(), fanout(), edge_value1(), edge_value2(), edge_value3(), value(val) {};
+    // Helper functions
+    Cluster* get_root();
+    bool contracts();
+    int get_degree();
+    bool has_neighbor_set();
+    NeighborSet* get_neighbor_set();
+    bool parent_high_fanout();
+    bool contains_neighbor(Cluster* c);
+    void insert_neighbor(Cluster* c);
+    void insert_neighbor_with_value(Cluster* c, e_t value);
+    void remove_neighbor(Cluster* c);
+    void set_edge_value(int index, e_t value);
+    e_t get_edge_value(int index);
+    size_t calculate_size();
+};
+
+template<typename v_t, typename e_t>
+UFOCluster<v_t,e_t>* UFOCluster<v_t,e_t>::get_root() {
+    Cluster* curr = this;
+    while (curr->parent) curr = curr->parent;
+    return curr;
+}
+
+template<typename v_t, typename e_t>
+bool UFOCluster<v_t,e_t>::contracts() {
+    assert(get_degree() <= UFO_ARRAY_MAX);
+    for (auto neighborp : neighbors) {
+        auto neighbor = UNTAG(neighborp);
+        if (neighbor && neighbor->parent == parent) return true;
+    }
+    return false;
+}
+
+template<typename v_t, typename e_t>
+int UFOCluster<v_t,e_t>::get_degree() {
+    int tag = GET_TAG(neighbors[UFO_ARRAY_MAX-1]);
+    if (tag <= 3) [[likely]] return tag;
+    return 2 + get_neighbor_set()->size();
+}
+
+template<typename v_t, typename e_t>
+bool UFOCluster<v_t,e_t>::has_neighbor_set() {
+    int tag = GET_TAG(neighbors[UFO_ARRAY_MAX-1]);
+    if (tag <= 3) [[likely]] return false;
+    return true;
+}
+
+template<typename v_t, typename e_t>
+absl::flat_hash_map<UFOCluster<v_t, e_t>*,e_t>* UFOCluster<v_t,e_t>::get_neighbor_set() {
+    return (NeighborSet*) UNTAG(neighbors[UFO_ARRAY_MAX-1]);
+}
+
+template<typename v_t, typename e_t>
+bool UFOCluster<v_t,e_t>::parent_high_fanout() {
+    assert(parent);
+    int parent_degree = parent->get_degree();
+    if (get_degree() == 1) {
+        auto neighbor = neighbors[0];
+        if (neighbor->parent == parent)
+        if (neighbor->get_degree() - parent_degree > 2) return true;
+    } else {
+        if (get_degree() - parent_degree > 2) return true;
+    }
+    return false;
+}
+
+template<typename v_t, typename e_t>
+bool UFOCluster<v_t,e_t>::contains_neighbor(Cluster* c) {
+    for (auto neighbor : neighbors) if (UNTAG(neighbor) == c) return true;
+    if (has_neighbor_set() && get_neighbor_set()->find(c) != get_neighbor_set()->end()) return true;
+    return false;
+}
+
+template<typename v_t, typename e_t>
+void UFOCluster<v_t,e_t>::insert_neighbor(Cluster* c) {
+    assert(!contains_neighbor(c));
+    // degree++;
+    for (int i = 0; i < UFO_ARRAY_MAX; ++i) {
+        if (UNTAG(neighbors[i]) == nullptr) [[likely]] {
+            int deg = GET_TAG(neighbors[UFO_ARRAY_MAX-1]);
+            neighbors[i] = c;
+            neighbors[UFO_ARRAY_MAX-1] = TAG(UNTAG(neighbors[UFO_ARRAY_MAX-1]), deg+1);
+            return;
+        }
+    }
+    if (!has_neighbor_set()) {
+        auto neighbor_set = new NeighborSet();
+        std::pair<Cluster*,e_t> insert_pair;
+        insert_pair.first = UNTAG(neighbors[UFO_ARRAY_MAX-1]);
+        neighbor_set->insert(insert_pair);
+        neighbors[UFO_ARRAY_MAX-1] = TAG(neighbor_set, 4);
+    }
+    std::pair<Cluster*,e_t> insert_pair;
+    insert_pair.first = c;
+    get_neighbor_set()->insert(insert_pair);
+}
+
+template<typename v_t, typename e_t>
+void UFOCluster<v_t,e_t>::insert_neighbor_with_value(Cluster* c, e_t value) {
+    if constexpr (!std::is_same<e_t, empty_t>::value) {
+        assert(!contains_neighbor(c));
+        // degree++;
+        for (int i = 0; i < UFO_ARRAY_MAX; ++i) {
+            if (UNTAG(neighbors[i]) == nullptr) [[likely]] {
+                int deg = GET_TAG(neighbors[UFO_ARRAY_MAX-1]);
+                neighbors[i] = c;
+                set_edge_value(i, value);
+                neighbors[UFO_ARRAY_MAX-1] = TAG(UNTAG(neighbors[UFO_ARRAY_MAX-1]), deg+1);
+                return;
+            }
+        }
+        if (!has_neighbor_set()) {
+            auto neighbor_set = new NeighborSet();
+            neighbor_set->insert({UNTAG(neighbors[UFO_ARRAY_MAX-1]), get_edge_value(UFO_ARRAY_MAX-1)});
+            neighbors[UFO_ARRAY_MAX-1] = TAG(neighbor_set, 4);
+        }
+        get_neighbor_set()->insert({c,value});
+    }
+}
+
+template<typename v_t, typename e_t>
+void UFOCluster<v_t,e_t>::remove_neighbor(Cluster* c) {
+    assert(contains_neighbor(c));
+    // degree--;
+    for (int i = 0; i < UFO_ARRAY_MAX; ++i) {
+        if (UNTAG(neighbors[i]) == c) {
+            neighbors[i] = TAG(nullptr, GET_TAG(neighbors[i]));
+            if (has_neighbor_set()) [[unlikely]] { // Put an element from the set into the array
+                auto neighbor_set = get_neighbor_set();
+                auto replacement = *neighbor_set->begin();
+                neighbors[i] = replacement.first;
+                if constexpr (!std::is_same<e_t,empty_t>::value)
+                    set_edge_value(i, replacement.second);
+                neighbor_set->erase(replacement.first);
+                if (neighbor_set->size() == 1) {
+                    auto temp = *neighbor_set->begin();
+                    delete neighbor_set;
+                    neighbors[UFO_ARRAY_MAX-1] = TAG(temp.first, 3);
+                    if constexpr (!std::is_same<e_t,empty_t>::value)
+                        set_edge_value(UFO_ARRAY_MAX-1, temp.second);
+                }
+            } else [[likely]] {
+                for (int j = UFO_ARRAY_MAX-1; j > i; --j) {
+                    if (UNTAG(neighbors[j])) [[unlikely]] {
+                        neighbors[i] = UNTAG(neighbors[j]);
+                        neighbors[j] = TAG(nullptr, GET_TAG(neighbors[j]));
+                        if constexpr (!std::is_same<e_t,empty_t>::value)
+                            set_edge_value(i, get_edge_value(j));
+                        break;
+                    }
+                }
+                neighbors[UFO_ARRAY_MAX-1] = TAG(UNTAG(neighbors[UFO_ARRAY_MAX-1]), GET_TAG(neighbors[UFO_ARRAY_MAX-1])-1);
+            }
+            return;
+        }
+    }
+    auto neighbor_set = get_neighbor_set();
+    neighbor_set->erase(c);
+    if (neighbor_set->size() == 1) {
+        auto temp = *neighbor_set->begin();
+        delete neighbor_set;
+        neighbors[UFO_ARRAY_MAX-1] = TAG(temp.first, 3);
+        if constexpr (!std::is_same<e_t,empty_t>::value)
+            set_edge_value(UFO_ARRAY_MAX-1, temp.second);
+    }
+}
+
+template<typename v_t, typename e_t>
+void UFOCluster<v_t,e_t>::set_edge_value(int index, e_t value) {
+    e_t* address = &edge_value1 + index;
+    *address = value;
+}
+
+template<typename v_t, typename e_t>
+e_t UFOCluster<v_t,e_t>::get_edge_value(int index) {
+    e_t* address = &edge_value1 + index;
+    return *address;
+}
+
+template<typename v_t, typename e_t>
+size_t UFOCluster<v_t,e_t>::calculate_size() {
+    size_t memory = sizeof(UFOCluster<v_t, e_t>);
+    if (has_neighbor_set()) memory += get_neighbor_set()->bucket_count() * sizeof(std::pair<Cluster*, e_t>);
+    return memory;
+}
+
+}
diff --git a/include/ufo_tree/ufo_tree.h b/include/ufo_tree/ufo_tree.h
new file mode 100644
index 0000000..0aa6acf
--- /dev/null
+++ b/include/ufo_tree/ufo_tree.h
@@ -0,0 +1,806 @@
+#pragma once
+#include "ufo_tree/types.h"
+#include "ufo_tree/util.h"
+#include "ufo_tree/ufo_cluster.h"
+#include <absl/container/flat_hash_set.h>
+#include <absl/container/flat_hash_map.h>
+
+
+namespace ufo {
+
+template<typename v_t, typename e_t>
+class UFOTree {
+using Cluster = UFOCluster<v_t, e_t>;
+public:
+    // UFO tree interface
+    UFOTree(
+        vertex_t n, QueryType q = CONNECTIVITY,
+        std::function<v_t(v_t, v_t)> f_v = [](v_t x, v_t y) -> v_t {return x;},
+        std::function<e_t(e_t, e_t)> f_e = [](e_t x, e_t y) -> e_t {return x;});
+    UFOTree(
+        vertex_t n, QueryType q,
+        std::function<v_t(v_t, v_t)> f_v, std::function<e_t(e_t, e_t)> f_e,
+        v_t id_v, e_t id_e, v_t dval_v, e_t dval_e);
+    UFOTree(int n, QueryType q, std::function<v_t(v_t, v_t)> f, v_t id, v_t d_val);
+    ~UFOTree();
+    void link(vertex_t u, vertex_t v);
+    void link(vertex_t u, vertex_t v, e_t value);
+    void cut(vertex_t u, vertex_t v);
+    bool connected(vertex_t u, vertex_t v);
+    e_t path_query(vertex_t u, vertex_t v);
+    // Testing helpers
+    size_t space();
+    size_t count_nodes();
+    size_t get_height();
+    bool is_valid();
+    void print_tree();
+private:
+    // Class data and parameters
+    std::vector<Cluster> leaves;
+    std::vector<std::vector<Cluster*>> root_clusters;
+    int max_level;
+    std::vector<std::pair<Cluster*,vertex_t>> lower_deg[2]; // lower_deg helps to identify clusters who became low degree during a deletion update
+    QueryType query_type;
+    std::function<v_t(v_t, v_t)> f_v;
+    v_t identity_v;
+    v_t default_v;
+    std::function<e_t(e_t, e_t)> f_e;
+    e_t identity_e;
+    e_t default_e;
+    // We preallocate UFO clusters and store unused clusters in free_clusters
+    std::vector<Cluster*> free_clusters;
+    Cluster* allocate_cluster();
+    void free_cluster(Cluster* c);
+    // Helper functions
+    void remove_ancestors(Cluster* c, int start_level = 0);
+    void recluster_tree();
+    bool is_high_degree_or_high_fanout(Cluster* cluster, Cluster* child, int level);
+    void disconnect_siblings(Cluster* c, int level);
+    void insert_adjacency(Cluster* u, Cluster* v);
+    void insert_adjacency(Cluster* u, Cluster* v, e_t value);
+    void remove_adjacency(Cluster* u, Cluster* v);
+};
+
+template<typename v_t, typename e_t>
+UFOTree<v_t, e_t>::UFOTree(vertex_t n, QueryType q,
+        std::function<v_t(v_t, v_t)> f_v, std::function<e_t(e_t, e_t)> f_e)
+    : query_type(q), f_v(f_v), f_e(f_e) {
+    leaves.resize(n);
+    root_clusters.resize(max_tree_height(n));
+    for (int i = 0; i < n; ++i)
+        free_clusters.push_back(new Cluster());
+}
+
+template<typename v_t, typename e_t>
+UFOTree<v_t, e_t>::UFOTree(vertex_t n, QueryType q,
+        std::function<v_t(v_t, v_t)> f_v, std::function<e_t(e_t, e_t)> f_e,
+        v_t id_v, e_t id_e, v_t dval_v, e_t dval_e)
+    : query_type(q), f_v(f_v), f_e(f_e), identity_v(id_v), identity_e(id_e),
+     default_v(dval_v), default_e(dval_e) {
+    leaves.resize(n, default_v);
+    root_clusters.resize(max_tree_height(n));
+    for (int i = 0; i < n; ++i)
+        free_clusters.push_back(new Cluster());
+}
+
+template<typename v_t, typename e_t>
+UFOTree<v_t, e_t>::UFOTree(int n, QueryType q,
+        std::function<v_t(v_t, v_t)> f, v_t id, v_t d_val)
+    : query_type(q), f_v(f), identity_v(id), default_v(d_val) {
+    if constexpr (std::is_same<v_t,e_t>::value) {
+        f_e = f;
+        identity_e = id;
+        default_e = d_val;
+    }
+    leaves.resize(n, default_v);
+    root_clusters.resize(max_tree_height(n));
+    for (int i = 0; i < n; ++i)
+        free_clusters.push_back(new Cluster());
+}
+
+template<typename v_t, typename e_t>
+UFOTree<v_t, e_t>::~UFOTree() {
+    // Clear all memory
+    std::unordered_set<Cluster*> clusters;
+    for (auto leaf : leaves) {
+        auto curr = leaf.parent;
+        while (curr) {
+            clusters.insert(curr);
+            curr = curr->parent;
+        }
+    }
+    for (auto cluster : clusters) delete cluster;
+    for (auto cluster : free_clusters) delete cluster;
+    #ifdef COLLECT_ROOT_CLUSTER_STATS
+    std::cout << "Number of root clusters: Frequency" << std::endl;
+        for (auto entry : root_clusters_histogram)
+            std::cout << entry.first << "\t" << entry.second << std::endl;
+    #endif
+}
+
+template<typename v_t, typename e_t>
+UFOCluster<v_t, e_t>* UFOTree<v_t, e_t>::allocate_cluster() {
+    if (!free_clusters.empty()) {
+        auto c = free_clusters.back();
+        free_clusters.pop_back();
+        return c;
+    }
+    return new Cluster();
+}
+
+template<typename v_t, typename e_t>
+void UFOTree<v_t, e_t>::free_cluster(UFOCluster<v_t, e_t>* c) {
+    c->parent = nullptr;
+    if (c->has_neighbor_set()) [[unlikely]] delete c->get_neighbor_set();
+    for (int i = 0; i < UFO_ARRAY_MAX; ++i)
+        c->neighbors[i] = nullptr;
+    c->degree = 0;
+    c->fanout = 0;
+    free_clusters.push_back(c);
+}
+
+template<typename v_t, typename e_t>
+size_t UFOTree<v_t, e_t>::space() {
+    std::unordered_set<Cluster*> visited;
+    size_t memory = sizeof(UFOTree<v_t, e_t>);
+    for (auto cluster : leaves) {
+        memory += cluster.calculate_size();
+        auto parent = cluster.parent;
+        while (parent != nullptr && visited.count(parent) == 0) {
+            memory += parent->calculate_size();
+            visited.insert(parent);
+            parent = parent->parent;
+        }
+    }
+    return memory;
+}
+
+template<typename v_t, typename e_t>
+size_t UFOTree<v_t, e_t>::count_nodes() {
+    std::unordered_set<Cluster*> visited;
+    size_t node_count = 0;
+    for(auto cluster : leaves){
+        node_count += 1;
+        auto parent = cluster.parent;
+        while(parent != nullptr && visited.count(parent) == 0){
+            node_count += 1;
+            visited.insert(parent);
+            parent = parent->parent;
+        }
+    }
+    return node_count;
+}
+
+template<typename v_t, typename e_t>
+size_t UFOTree<v_t, e_t>::get_height() {
+    size_t max_height = 0;
+    for (vertex_t v = 0; v < leaves.size(); ++v) {
+        size_t height = 0;
+        Cluster* curr = &leaves[v];
+        while (curr) {
+            height++;
+            curr = curr->parent;
+        }
+        max_height = std::max(max_height, height);
+    }
+    return max_height;
+}
+
+/* Link vertex u and vertex v in the tree. Optionally include an
+augmented value for the new edge (u,v). If no augmented value is
+provided, the default value is 1. */
+template<typename v_t, typename e_t>
+void UFOTree<v_t, e_t>::link(vertex_t u, vertex_t v) {
+    assert(u >= 0 && u < leaves.size() && v >= 0 && v < leaves.size());
+    assert(u != v && !connected(u,v));
+    max_level = 0;
+    remove_ancestors(&leaves[u]);
+    remove_ancestors(&leaves[v]);
+    insert_adjacency(&leaves[u], &leaves[v]);
+    recluster_tree();
+}
+template<typename v_t, typename e_t>
+void UFOTree<v_t, e_t>::link(vertex_t u, vertex_t v, e_t value) {
+    assert(u >= 0 && u < leaves.size() && v >= 0 && v < leaves.size());
+    assert(u != v && !connected(u,v));
+    max_level = 0;
+    remove_ancestors(&leaves[u]);
+    remove_ancestors(&leaves[v]);
+    insert_adjacency(&leaves[u], &leaves[v], value);
+    recluster_tree();
+}
+
+/* Cut vertex u and vertex v in the tree. */
+template<typename v_t, typename e_t>
+void UFOTree<v_t, e_t>::cut(vertex_t u, vertex_t v) {
+    assert(u >= 0 && u < leaves.size() && v >= 0 && v < leaves.size());
+    assert(leaves[u].contains_neighbor(&leaves[v]));
+    max_level = 0;
+    auto curr_u = &leaves[u];
+    auto curr_v = &leaves[v];
+    while (curr_u != curr_v) {
+        lower_deg[0].push_back({curr_u, curr_u->get_degree()-1});
+        lower_deg[1].push_back({curr_v, curr_v->get_degree()-1});
+        curr_u->degree = curr_u->get_degree()-1;
+        curr_v->degree = curr_v->get_degree()-1;
+        curr_u = curr_u->parent;
+        curr_v = curr_v->parent;
+    }
+    remove_ancestors(&leaves[u]);
+    remove_ancestors(&leaves[v]);
+    for (auto cluster: lower_deg[0]) cluster.first->degree = 0;
+    for (auto cluster: lower_deg[1]) cluster.first->degree = 0;
+    lower_deg[0].clear();
+    lower_deg[1].clear();
+    remove_adjacency(&leaves[u], &leaves[v]);
+    recluster_tree();
+}
+
+/* Removes the ancestors of cluster c that are not high degree nor
+high fan-out and add them to root_clusters. */
+template<typename v_t, typename e_t>
+void UFOTree<v_t, e_t>::remove_ancestors(Cluster* c, int start_level) {
+    int level = start_level; // level is always the level of cluster prev, 0 being the leaves
+    auto prev = c;
+    auto curr = c->parent;
+    bool del = false;
+    while (curr) {
+        // Different cases for if curr will or will not be deleted later
+        if (!is_high_degree_or_high_fanout(curr, prev, level)) [[likely]] { // We will delete curr next round
+            disconnect_siblings(prev, level);
+            if (del) [[likely]] { // Possibly delete prev
+                assert(prev->get_degree() <= UFO_ARRAY_MAX);
+                for (auto neighborp : prev->neighbors) {
+                    auto neighbor = UNTAG(neighborp);
+                    if (neighbor) neighbor->remove_neighbor(prev); // Remove prev from adjacency
+                }
+                auto position = std::find(root_clusters[level].begin(), root_clusters[level].end(), prev);
+                if (position != root_clusters[level].end()) root_clusters[level].erase(position);
+                free_cluster(prev);
+            } else [[unlikely]] {
+                prev->parent = nullptr;
+                curr->fanout--;
+                root_clusters[level].push_back(prev);
+            }
+            del = true;
+        } else [[unlikely]] { // We will not delete curr next round
+            if (del) [[likely]] { // Possibly delete prev
+                assert(prev->get_degree() <= UFO_ARRAY_MAX);
+                for (auto neighborp : prev->neighbors) {
+                    auto neighbor = UNTAG(neighborp);
+                    if (neighbor) neighbor->remove_neighbor(prev); // Remove prev from adjacency
+                }
+                auto position = std::find(root_clusters[level].begin(), root_clusters[level].end(), prev);
+                if (position != root_clusters[level].end()) root_clusters[level].erase(position);
+                free_cluster(prev);
+                curr->fanout--;
+            } else [[unlikely]] if (prev->get_degree() <= 1) {
+                prev->parent = nullptr;
+                curr->fanout--;
+                root_clusters[level].push_back(prev);
+            }
+            del = false;
+        }
+        // Update pointers
+        prev = curr;
+        curr = prev->parent;
+        level++;
+    }
+    // DO LAST DELETIONS
+    if (del) [[likely]] { // Possibly delete prev
+        assert(prev->get_degree() <= UFO_ARRAY_MAX);
+        for (auto neighborp : prev->neighbors) {
+            auto neighbor = UNTAG(neighborp);
+            if (neighbor) neighbor->remove_neighbor(prev); // Remove prev from adjacency
+        }
+        auto position = std::find(root_clusters[level].begin(), root_clusters[level].end(), prev);
+        if (position != root_clusters[level].end()) root_clusters[level].erase(position);
+        free_cluster(prev);
+    } else [[unlikely]] root_clusters[level].push_back(prev);
+    if (level > max_level) max_level = level;
+}
+
+template<typename v_t, typename e_t>
+void UFOTree<v_t, e_t>::recluster_tree() {
+    for (int level = 0; level <= max_level; level++) {
+        if (root_clusters[level].empty()) [[unlikely]] continue;
+        // Update root cluster stats if we are collecting them
+        #ifdef COLLECT_ROOT_CLUSTER_STATS
+            if (root_clusters_histogram.find(root_clusters[level].size()) == root_clusters_histogram.end())
+                root_clusters_histogram[root_clusters[level].size()] = 1;
+            else
+                root_clusters_histogram[root_clusters[level].size()] += 1;
+        #endif
+        // Merge deg 3-5 root clusters with all of its deg 1 neighbors
+        for (auto cluster : root_clusters[level]) {
+            if (!cluster->parent && cluster->get_degree() > 2) [[unlikely]] {
+                assert(cluster->get_degree() <= 5);
+                auto parent = allocate_cluster();
+                if constexpr (!std::is_same<e_t, empty_t>::value) {
+                    parent->value = identity_v;
+                }
+                parent->fanout = 1;
+                cluster->parent = parent;
+                root_clusters[level+1].push_back(parent);
+                assert(UFO_ARRAY_MAX >= 3);
+                if (!cluster->has_neighbor_set()) [[likely]] {
+                    for (int i = 0; i < UFO_ARRAY_MAX; ++i) {
+                        auto neighbor = UNTAG(cluster->neighbors[i]);
+                        if (neighbor->get_degree() == 1) [[unlikely]] {
+                            auto curr = neighbor->parent;
+                            int lev = level+1;
+                            while (curr) {
+                                auto temp = curr;
+                                curr = curr->parent;
+                                auto position = std::find(root_clusters[lev].begin(), root_clusters[lev].end(), temp);
+                                if (position != root_clusters[lev].end()) root_clusters[lev].erase(position);
+                                free_cluster(temp);
+                                lev++;
+                            }
+                            neighbor->parent = cluster->parent;
+                            parent->fanout++;
+                        } else if (neighbor->parent) { // Populate new parent's neighbors
+                            if constexpr (std::is_same<e_t, empty_t>::value) {
+                                parent->insert_neighbor(neighbor->parent);
+                                neighbor->parent->insert_neighbor(parent);
+                            } else {
+                                parent->insert_neighbor_with_value(neighbor->parent, cluster->get_edge_value(i));
+                                neighbor->parent->insert_neighbor_with_value(parent, cluster->get_edge_value(i));
+                            }
+                        }
+                    }
+                } else [[unlikely]] {
+                    for (int i = 0; i < UFO_ARRAY_MAX-1; ++i) {
+                        auto neighbor = cluster->neighbors[i];
+                        if (neighbor->get_degree() == 1) [[unlikely]] {
+                            auto curr = neighbor->parent;
+                            int lev = level+1;
+                            while (curr) {
+                                auto temp = curr;
+                                curr = curr->parent;
+                                auto position = std::find(root_clusters[lev].begin(), root_clusters[lev].end(), temp);
+                                if (position != root_clusters[lev].end()) root_clusters[lev].erase(position);
+                                free_cluster(temp);
+                                lev++;
+                            }
+                            neighbor->parent = cluster->parent;
+                            parent->fanout++;
+                        } else if (neighbor->parent) { // Populate new parent's neighbors
+                            if constexpr (std::is_same<e_t, empty_t>::value) {
+                                parent->insert_neighbor(neighbor->parent);
+                                neighbor->parent->insert_neighbor(parent);
+                            } else {
+                                parent->insert_neighbor_with_value(neighbor->parent, cluster->get_edge_value(i));
+                                neighbor->parent->insert_neighbor_with_value(parent, cluster->get_edge_value(i));
+                            }
+                        }
+                    }
+                    for (auto neighbor_pair : *cluster->get_neighbor_set()) {
+                        auto neighbor = neighbor_pair.first;
+                        if (neighbor->get_degree() == 1) [[unlikely]] {
+                            auto curr = neighbor->parent;
+                            int lev = level+1;
+                            while (curr) {
+                                auto temp = curr;
+                                curr = curr->parent;
+                                auto position = std::find(root_clusters[lev].begin(), root_clusters[lev].end(), temp);
+                                if (position != root_clusters[lev].end()) root_clusters[lev].erase(position);
+                                free_cluster(temp);
+                                lev++;
+                            }
+                            neighbor->parent = cluster->parent;
+                            parent->fanout++;
+                        } else if (neighbor->parent) { // Populate new parent's neighbors
+                            if constexpr (std::is_same<e_t, empty_t>::value) {
+                                parent->insert_neighbor(neighbor->parent);
+                                neighbor->parent->insert_neighbor(parent);
+                            } else {
+                                parent->insert_neighbor_with_value(neighbor->parent, neighbor_pair.second);
+                                neighbor->parent->insert_neighbor_with_value(parent, neighbor_pair.second);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        // This loop handles all deg 2 and 1 root clusters
+        for (auto cluster : root_clusters[level]) {
+            // Combine deg 2 root clusters with deg 2 root clusters
+            if (!cluster->parent && cluster->get_degree() == 2) [[unlikely]] {
+                assert(UFO_ARRAY_MAX >= 2);
+                for (int i = 0; i < 2; ++i) {
+                    auto neighbor = cluster->neighbors[i];
+                    if (!neighbor->parent && (neighbor->get_degree() == 2)) [[unlikely]] {
+                        auto parent = allocate_cluster();
+                        cluster->parent = parent;
+                        neighbor->parent = parent;
+                        parent->fanout = 2;
+                        if constexpr (!std::is_same<e_t, empty_t>::value) { // Path query
+                            parent->value = f_e(cluster->value, f_e(neighbor->value, cluster->get_edge_value(i)));
+                        }
+                        root_clusters[level+1].push_back(parent);
+                        for (int i = 0; i < 2; ++i) { // Populate new parent's neighbors
+                            if (cluster->neighbors[i]->parent && cluster->neighbors[i]->parent != parent) {
+                                if constexpr (std::is_same<e_t, empty_t>::value) {
+                                    parent->insert_neighbor(cluster->neighbors[i]->parent);
+                                    cluster->neighbors[i]->parent->insert_neighbor(parent);
+                                } else {
+                                    parent->insert_neighbor_with_value(cluster->neighbors[i]->parent, cluster->get_edge_value(i));
+                                    cluster->neighbors[i]->parent->insert_neighbor_with_value(parent, cluster->get_edge_value(i));
+                                }
+                            }
+                            if (neighbor->neighbors[i]->parent && neighbor->neighbors[i]->parent != parent) {
+                                if constexpr (std::is_same<e_t, empty_t>::value) {
+                                    parent->insert_neighbor(neighbor->neighbors[i]->parent);
+                                    neighbor->neighbors[i]->parent->insert_neighbor(parent);
+                                } else {
+                                    parent->insert_neighbor_with_value(neighbor->neighbors[i]->parent, neighbor->get_edge_value(i));
+                                    neighbor->neighbors[i]->parent->insert_neighbor_with_value(parent, neighbor->get_edge_value(i));
+                                }
+                            }
+                        }
+                        break;
+                    }
+                }
+                // Combine deg 2 root clusters with deg 1 or 2 non-root clusters
+                if (!cluster->parent) [[unlikely]] {
+                    assert(UFO_ARRAY_MAX >= 2);
+                    for (int i = 0; i < 2; ++i) {
+                        auto neighbor = cluster->neighbors[i];
+                        if (neighbor->parent && (neighbor->get_degree() == 1 || neighbor->get_degree() == 2)) [[unlikely]] {
+                            if (neighbor->contracts()) continue;
+                            cluster->parent = neighbor->parent;
+                            neighbor->parent->fanout++;
+                            if constexpr (!std::is_same<e_t, empty_t>::value) { // Path query
+                                cluster->parent->value = f_e(cluster->value, f_e(neighbor->value, cluster->get_edge_value(i)));
+                            }
+                            remove_ancestors(cluster->parent, level+1); // Recursive remove ancestor call
+                            auto other_neighbor = cluster->neighbors[!i]; // Popoulate neighbors
+                            // if (other_neighbor->parent && (long) other_neighbor->parent->parent != 1) {
+                            if (other_neighbor->parent) {
+                                if constexpr (std::is_same<e_t, empty_t>::value) {
+                                    insert_adjacency(cluster->parent, other_neighbor->parent);
+                                } else {
+                                    insert_adjacency(cluster->parent, other_neighbor->parent, cluster->get_edge_value(!i));
+                                }
+                            }
+                            break;
+                        }
+                    }
+                }
+            // Always combine deg 1 root clusters with its neighboring cluster
+            } else if (!cluster->parent && cluster->get_degree() == 1) [[unlikely]] {
+                auto neighbor = cluster->neighbors[0];
+                if (neighbor->parent) {
+                    if (neighbor->get_degree() == 2 && neighbor->contracts()) continue;
+                    cluster->parent = neighbor->parent;
+                    neighbor->parent->fanout++;
+                    remove_ancestors(cluster->parent, level+1);
+                } else {
+                    auto parent = allocate_cluster();
+                    cluster->parent = parent;
+                    neighbor->parent = parent;
+                    parent->fanout = 2;
+                    if constexpr (!std::is_same<e_t, empty_t>::value) { // Path query
+                        parent->value = identity_v;
+                    }
+                    for (int i = 0; i < 2; ++i) { // Populate new parent's neighbors
+                        if (neighbor->neighbors[i] && neighbor->neighbors[i] != cluster && neighbor->neighbors[i]->parent) {
+                            if constexpr (std::is_same<e_t, empty_t>::value) {
+                                parent->insert_neighbor(neighbor->neighbors[i]->parent);
+                                neighbor->neighbors[i]->parent->insert_neighbor(parent);
+                            } else {
+                                parent->insert_neighbor_with_value(neighbor->neighbors[i]->parent, neighbor->get_edge_value(i));
+                                neighbor->neighbors[i]->parent->insert_neighbor_with_value(parent, neighbor->get_edge_value(i));
+                            }
+                        }
+                    }
+                    root_clusters[level+1].push_back(parent);
+                }
+            }
+        }
+        // Add remaining uncombined clusters to the next level
+        for (auto cluster : root_clusters[level]) {
+            if (!cluster->parent && cluster->get_degree() > 0) [[unlikely]] {
+                auto parent = allocate_cluster();
+                cluster->parent = parent;
+                parent->fanout = 1;
+                if constexpr (!std::is_same<v_t, empty_t>::value) { // Path query
+                    parent->value = cluster->value;
+                }
+                for (int i = 0; i < 2; ++i) { // Populate new parent's neighbors
+                    if (cluster->neighbors[i] && cluster->neighbors[i]->parent) {
+                        if constexpr (std::is_same<e_t, empty_t>::value) {
+                            parent->insert_neighbor(cluster->neighbors[i]->parent);
+                            cluster->neighbors[i]->parent->insert_neighbor(parent);
+                        } else {
+                            parent->insert_neighbor_with_value(cluster->neighbors[i]->parent, cluster->get_edge_value(i));
+                            cluster->neighbors[i]->parent->insert_neighbor_with_value(parent, cluster->get_edge_value(i));
+                        }
+                    }
+                }
+                root_clusters[level+1].push_back(parent);
+            }
+        }
+        // Clear the contents of this level
+        root_clusters[level].clear();
+        if (level == max_level && !root_clusters[max_level+1].empty()) max_level++;
+    }
+}
+
+template<typename v_t, typename e_t>
+bool UFOTree<v_t, e_t>::is_high_degree_or_high_fanout(Cluster* cluster, Cluster* child, int level) {
+    int cluster_degree = cluster->degree > 0 ? cluster->degree : cluster->get_degree();
+    if (cluster_degree > 2) [[unlikely]] return true;
+    if (!child->neighbors[1] && cluster->fanout > 2) [[unlikely]] return true;
+    int child_degree = child->degree > 0 ? child->degree : child->get_degree();
+    if (child_degree - cluster_degree > 2) [[unlikely]] return true;
+    return false;
+}
+
+/* Helper function which takes a cluster c and the level of that cluster. The function
+should find every cluster that shares a parent with c, disconnect it from their parent
+and add it as a root cluster to be processed. */
+template<typename v_t, typename e_t>
+void UFOTree<v_t, e_t>::disconnect_siblings(Cluster* c, int level) {
+    if (c->get_degree() == 1) {
+        auto center = c->neighbors[0];
+        if (center->parent && c->parent != center->parent) return;
+        assert(center->get_degree() <= 5);
+        if (!center->has_neighbor_set()) [[likely]] {
+            for (auto neighborp : center->neighbors) {
+                Cluster* neighbor = UNTAG(neighborp);
+                if (neighbor && neighbor->parent == c->parent && neighbor != c) {
+                    neighbor->parent = nullptr; // Set sibling parent pointer to null
+                    root_clusters[level].push_back(neighbor); // Keep track of root clusters
+                }
+            }
+        } else [[unlikely]] {
+            for (int i = 0; i < UFO_ARRAY_MAX-1; ++i) {
+                Cluster* neighbor = center->neighbors[i];
+                if (neighbor->parent == c->parent && neighbor != c) {
+                    neighbor->parent = nullptr; // Set sibling parent pointer to null
+                    root_clusters[level].push_back(neighbor); // Keep track of root clusters
+                }
+            }
+            for (auto neighbor_pair : *center->get_neighbor_set()) {
+                Cluster* neighbor = neighbor_pair.first;
+                if (neighbor && neighbor->parent == c->parent && neighbor != c) {
+                    neighbor->parent = nullptr; // Set sibling parent pointer to null
+                    root_clusters[level].push_back(neighbor); // Keep track of root clusters
+                }
+            }
+        }
+        center->parent = nullptr;
+        root_clusters[level].push_back(center);
+    } else {
+        assert(c->get_degree() <= 5);
+        if (!c->has_neighbor_set()) [[likely]] {
+            for (auto neighborp : c->neighbors) {
+                Cluster* neighbor = UNTAG(neighborp);
+                if (neighbor && neighbor->parent == c->parent) {
+                    neighbor->parent = nullptr; // Set sibling parent pointer to null
+                    root_clusters[level].push_back(neighbor); // Keep track of root clusters
+                }
+            }
+        } else [[unlikely]] {
+            for (int i = 0; i < UFO_ARRAY_MAX-1; ++i) {
+                Cluster* neighbor = c->neighbors[i];
+                if (neighbor->parent == c->parent) {
+                    neighbor->parent = nullptr; // Set sibling parent pointer to null
+                    root_clusters[level].push_back(neighbor); // Keep track of root clusters
+                }
+            }
+            for (auto neighbor_pair : *c->get_neighbor_set()) {
+                Cluster* neighbor = neighbor_pair.first;
+                if (neighbor && neighbor->parent == c->parent) {
+                    neighbor->parent = nullptr; // Set sibling parent pointer to null
+                    root_clusters[level].push_back(neighbor); // Keep track of root clusters
+                }
+            }
+        }
+    }
+}
+
+template<typename v_t, typename e_t>
+void UFOTree<v_t, e_t>::insert_adjacency(Cluster* u, Cluster* v) {
+    auto curr_u = u;
+    auto curr_v = v;
+    while (curr_u && curr_v && curr_u != curr_v) {
+        curr_u->insert_neighbor(curr_v);
+        curr_v->insert_neighbor(curr_u);
+        curr_u = curr_u->parent;
+        curr_v = curr_v->parent;
+    }
+}
+
+template<typename v_t, typename e_t>
+void UFOTree<v_t, e_t>::insert_adjacency(Cluster* u, Cluster* v, e_t value) {
+    auto curr_u = u;
+    auto curr_v = v;
+    while (curr_u && curr_v && curr_u != curr_v) {
+        curr_u->insert_neighbor_with_value(curr_v, value);
+        curr_v->insert_neighbor_with_value(curr_u, value);
+        curr_u = curr_u->parent;
+        curr_v = curr_v->parent;
+    }
+}
+
+template<typename v_t, typename e_t>
+void UFOTree<v_t, e_t>::remove_adjacency(Cluster* u, Cluster* v) {
+    auto curr_u = u;
+    auto curr_v = v;
+    while (curr_u && curr_v && curr_u != curr_v) {
+        curr_u->remove_neighbor(curr_v);
+        curr_v->remove_neighbor(curr_u);
+        // curr_u->degree = 0;
+        // curr_v->degree = 0;
+        curr_u = curr_u->parent;
+        curr_v = curr_v->parent;
+    }
+}
+
+/* Return true if and only if there is a path from vertex u to
+vertex v in the tree. */
+template<typename v_t, typename e_t>
+bool UFOTree<v_t, e_t>::connected(vertex_t u, vertex_t v) {
+    return leaves[u].get_root() == leaves[v].get_root();
+}
+
+template<typename v_t, typename e_t>
+e_t UFOTree<v_t, e_t>::path_query(vertex_t u, vertex_t v) {
+    assert(u < leaves.size() && u >= 0 && v < leaves.size() && v >= 0 && u != v && connected(u, v)); 
+
+    e_t path_u1, path_u2, path_v1, path_v2;
+    path_u1 = path_u2 = path_v1 = path_v2 = identity_e;
+    Cluster *bdry_u1, *bdry_u2, *bdry_v1, *bdry_v2;
+    bdry_u1 = bdry_u2 = bdry_v1 = bdry_v2 = nullptr;
+    if (leaves[u].get_degree() == 2) {
+        bdry_u1 = leaves[u].neighbors[0];
+        bdry_u2 = leaves[u].neighbors[1];
+    }
+    if (leaves[v].get_degree() == 2) {
+        bdry_v1 = leaves[v].neighbors[0];
+        bdry_v2 = leaves[v].neighbors[1];
+    }
+    auto curr_u = &leaves[u];
+    auto curr_v = &leaves[v];
+    while (curr_u->parent != curr_v->parent) {
+        // NOTE(ATHARVA): Make this all into one function.
+        if (curr_u->get_degree() > 2) {
+            if (curr_u->parent->get_degree() == 2) {
+                // Superunary to Binary
+                bdry_u1 = curr_u->parent->neighbors[0];
+                bdry_u2 = curr_u->parent->neighbors[1];
+                path_u2 = path_u1;
+            }
+        } else {
+            for (int i = 0; i < 2; i++) {
+                auto neighbor = curr_u->neighbors[i];
+                if (neighbor && neighbor->parent == curr_u->parent) {
+                    if (curr_u->get_degree() == 2) {
+                        if (curr_u->parent->get_degree() == 2) {
+                            // Binary to Binary
+                            if (neighbor == bdry_u1) {
+                                path_u1 = f_e(path_u1, f_e(curr_u->get_edge_value(i), neighbor->value));
+                                bdry_u2 = bdry_u2->parent;
+                                for (int i = 0; i < 2; i++)
+                                    if (curr_u->parent->neighbors[i] && curr_u->parent->neighbors[i] != bdry_u2)
+                                        bdry_u1 = curr_u->parent->neighbors[i];
+                            } else {
+                                path_u2 = f_e(path_u2, f_e(curr_u->get_edge_value(i), neighbor->value));
+                                bdry_u1 = bdry_u1->parent;
+                                for (int i = 0; i < 2; i++)
+                                    if (curr_u->parent->neighbors[i] && curr_u->parent->neighbors[i] != bdry_u1)
+                                        bdry_u2 = curr_u->parent->neighbors[i];
+                            }
+                        } else {
+                            // Binary to Unary
+                            path_u1 = (neighbor == bdry_u1) ? path_u2 : path_u1;
+                        }
+                    } else {
+                        if (curr_u->parent->get_degree() == 2) {
+                            // Unary to Binary
+                            path_u1 = path_u2 = f_e(path_u1, curr_u->get_edge_value(i));
+                            bdry_u1 = curr_u->parent->neighbors[0];
+                            bdry_u2 = curr_u->parent->neighbors[1];
+                        } else {
+                            // Unary to Unary and Unary to Superunary
+                            path_u1 = f_e(path_u1, f_e(curr_u->get_edge_value(i), neighbor->value));
+                        }
+                    }
+                    break;
+                }
+            }
+            if (!curr_u->contracts()) {
+                if (bdry_u1) bdry_u1 = bdry_u1->parent;
+                if (bdry_u2) bdry_u2 = bdry_u2->parent;
+            }
+        }
+        curr_u = curr_u->parent;
+        // Same thing for the side of curr_v
+        if (curr_v->get_degree() > 2) {
+            if (curr_v->parent->get_degree() == 2) {
+                // Superunary to Superunary/Binary
+                bdry_v1 = curr_v->parent->neighbors[0];
+                bdry_v2 = curr_v->parent->neighbors[1];
+                path_v2 = path_v1;
+            }
+        } else {
+            for (int i = 0; i < 2; i++) {
+                auto neighbor = curr_v->neighbors[i];
+                if (neighbor && neighbor->parent == curr_v->parent) {
+                    if (curr_v->get_degree() == 2) {
+                        if (curr_v->parent->get_degree() == 2) {
+                            // Binary to Binary
+                            if (neighbor == bdry_v1) {
+                                path_v1 = f_e(path_v1, f_e(curr_v->get_edge_value(i), neighbor->value));
+                                bdry_v2 = bdry_v2->parent;
+                                for (int i = 0; i < 2; i++)
+                                    if (curr_v->parent->neighbors[i] && curr_v->parent->neighbors[i] != bdry_v2)
+                                        bdry_v1 = curr_v->parent->neighbors[i];
+                            } else {
+                                path_v2 = f_e(path_v2, f_e(curr_v->get_edge_value(i), neighbor->value));
+                                bdry_v1 = bdry_v1->parent;
+                                for (int i = 0; i < 2; i++)
+                                    if (curr_v->parent->neighbors[i] && curr_v->parent->neighbors[i] != bdry_v1)
+                                        bdry_v2 = curr_v->parent->neighbors[i];
+                            }
+                        } else {
+                            // Binary to Unary
+                            path_v1 = (neighbor == bdry_v1) ? path_v2 : path_v1;
+                        }
+                    } else {
+                        if (curr_v->parent->get_degree() == 2) {
+                            // Unary to Binary
+                            path_v1 = path_v2 = f_e(path_v1, curr_v->get_edge_value(i));
+                            bdry_v1 = curr_v->parent->neighbors[0];
+                            bdry_v2 = curr_v->parent->neighbors[1];
+                        } else {
+                            // Unary to Unary and Unary to Superunary
+                            path_v1 = f_e(path_v1, f_e(curr_v->get_edge_value(i), neighbor->value));
+                        }
+                    }
+                    break;
+                }
+            }
+            if (!curr_v->contracts()) {
+                if (bdry_v1) bdry_v1 = bdry_v1->parent;
+                if (bdry_v2) bdry_v2 = bdry_v2->parent;
+            }
+        }
+        curr_v = curr_v->parent;
+    }
+    // Get the correct path sides when the two vertices meet at the LCA
+    e_t total = identity_e;
+    if (curr_u->get_degree() == 2)
+        total = f_e(total, (curr_v == bdry_u1) ? path_u1 : path_u2);
+    else
+        total = f_e(total, path_u1);
+    if (curr_v->get_degree() == 2)
+        total = f_e(total, (curr_u == bdry_v1) ? path_v1 : path_v2);
+    else
+        total = f_e(total, path_v1);
+    // If the LCA contracts them in a star merge, take both edges to the center
+    if (curr_u->get_degree() == 1 && curr_v->get_degree() == 1
+    && curr_u->neighbors[0] != curr_v) [[unlikely]] {
+        total = f_e(total, curr_u->get_edge_value(0));
+        total = f_e(total, curr_v->get_edge_value(0));
+    }
+    // Add the value of the last edge (since they contract one must be deg <= 2)
+    else [[likely]] {
+        for (int i = 0; i < 2; i++) {
+            if (curr_u->neighbors[i] == curr_v) {
+                total = f_e(total, curr_u->get_edge_value(i));
+                break;
+            }
+            if (curr_v->neighbors[i] == curr_u) {
+                total = f_e(total, curr_v->get_edge_value(i));
+                break;
+            }
+        }
+    }
+    return total;
+}
+
+}
diff --git a/include/ufo_tree/util.h b/include/ufo_tree/util.h
new file mode 100644
index 0000000..d4820fa
--- /dev/null
+++ b/include/ufo_tree/util.h
@@ -0,0 +1,61 @@
+#pragma once
+#include <math.h>
+#include <parlay/parallel.h>
+#include <parlay/primitives.h>
+#include <parlay/sequence.h>
+#include "ufo_tree/types.h"
+
+
+namespace ufo {
+
+template <class ET>
+inline bool CAS(ET *ptr, ET oldv, ET newv) {
+    if (sizeof(ET) == 1) {
+        return __sync_bool_compare_and_swap((bool*)ptr, *((bool*)&oldv), *((bool*)&newv));
+    } else if (sizeof(ET) == 4) {
+        return __sync_bool_compare_and_swap((int*)ptr, *((int*)&oldv), *((int*)&newv));
+    } else if (sizeof(ET) == 8) {
+        return __sync_bool_compare_and_swap((long*)ptr, *((long*)&oldv), *((long*)&newv));
+    } else {
+        std::cout << "CAS bad length : " << sizeof(ET) << std::endl;
+        abort();
+    }
+}
+
+template <class ET>
+inline ET AtomicLoad(ET *ptr) {
+    return __atomic_load_n(ptr, __ATOMIC_SEQ_CST);
+}
+
+template <class ET>
+inline void AtomicStore(ET *ptr, ET val) {
+    __atomic_store_n(ptr, val, __ATOMIC_SEQ_CST);
+}
+
+template <class ET>
+inline ET AtomicExchange(ET *ptr, ET val) {
+    return __sync_lock_test_and_set(ptr, val);
+}
+
+#define MAX_VERTEX_T (std::numeric_limits<uint32_t>::max())
+
+#define VERTICES_TO_EDGE(U, V) (edge_t) U + (((edge_t) V) << 32)
+#define EDGE_TYPE_TO_STRUCT(E) {(vertex_t) E, (vertex_t) (E >> 32)}
+
+static int max_tree_height(vertex_t n) {
+    return ceil(log2(n) / log2(1.2));
+}
+
+#define TAG(P,T) (Cluster*)((uintptr_t) P | (uintptr_t) T)
+#define UNTAG(P) (Cluster*)((uintptr_t) P & (uintptr_t) ~0x7)
+#define GET_TAG(P) (int)((uintptr_t) P & (uintptr_t) 0x7)
+
+// #define START_TIMER(X) auto X = std::chrono::high_resolution_clock::now()
+// #define STOP_TIMER(X, T) T += std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::high_resolution_clock::now()-X).count()
+// #define PRINT_TIMER(S, T) std::cout << "    " << S << " (ms): " << T/1000000 << std::endl
+
+#define START_TIMER(X) ;
+#define STOP_TIMER(X, T) ;
+#define PRINT_TIMER(S, T) ;
+
+}
diff --git a/include/union_find_local.h b/include/union_find_local.h
new file mode 100644
index 0000000..8186985
--- /dev/null
+++ b/include/union_find_local.h
@@ -0,0 +1,71 @@
+#pragma once
+
+// basically unmodified from parlay's union_find.h
+// in its examples (as of commit e1b1f17)
+
+#include <parlay/primitives.h>
+
+// The following supports both "link" (a directed union) and "find".
+// They are safe to run concurrently as long as there is no cycle among
+// concurrent links.   This can be achieved, for example by only linking
+// a vertex with lower id into one with higher degree.
+// See:  "Internally deterministic parallel algorithms can be fast"
+// Blelloch, Fineman, Gibbons, and Shun
+// for a discussion of link/find.
+template <class vertex>
+struct union_find_local {
+    // TODO - think about this more carefully.
+    // it's not like we're really using the atomics?
+    // parlay::sequence<std::atomic<vertex>> parents;
+    parlay::sequence<vertex> parents;
+    
+    size_t space_usage_bytes() const {
+        return sizeof(union_find_local) + (parents.capacity() * sizeof(vertex));
+    }
+
+    bool is_root(vertex u) {
+        return parents[u] < 0;
+    }
+
+    // initialize n elements all as roots
+    union_find_local(size_t n) : parents(parlay::tabulate<vertex>(n, [](long) { return -1; })) {}
+
+    vertex find(vertex i) {
+        if (is_root(i)) return i;
+        vertex p = parents[i];
+        if (is_root(p)) return p;
+
+        // find root, shortcutting along the way
+        do {
+            vertex gp = parents[p];
+            parents[i] = gp;
+            i = p;
+            p = gp;
+        } while (!is_root(p));
+        return p;
+    }
+
+    // Version of union that is safe for parallelism
+    // when no cycles are created (e.g. only link from larger
+    // to smaller vertex).
+    // Does not use ranks.
+    void link(vertex u, vertex v) {
+        // ONLY MODIFICATION
+        // we're going to enforce minimum as root
+        if (u < v) {
+            std::swap(u, v);
+        }
+        // ensure that u > v
+        // make v the parent of u
+        parents[u] = v;
+    }
+
+    void reset() {
+        // make everything a root again
+        parlay::parallel_for(0, parents.size(), [&](size_t i) {
+            parents[i] = -1;
+        });
+    }
+};
+
+template struct union_find_local<int32_t>;
\ No newline at end of file
diff --git a/include/util.h b/include/util.h
index 0fbc050..313ce1f 100644
--- a/include/util.h
+++ b/include/util.h
@@ -6,6 +6,7 @@
 extern std::string stream_file;
 extern int batch_size_arg;
 extern double height_factor_arg;
+extern int hybrid_threshold_arg;
 
 //#define START(X) auto X = std::chrono::high_resolution_clock::now()
 //#define STOP(C, X) C += std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - X).count()
diff --git a/include/utils/epoch.h b/include/utils/epoch.h
new file mode 100644
index 0000000..917d01a
--- /dev/null
+++ b/include/utils/epoch.h
@@ -0,0 +1,597 @@
+// ***************************
+// Epoch-based memory reclamation
+// Supports:
+//     epoch::with_epoch(F f),
+// which runs f within an epoch, as well as:
+//     epoch::New<T>(args...)
+//     epoch::Retire(T* a)   -- delays destruction and free
+//     epoch::Delete(T* a)   -- destructs and frees immediately
+// Retire delays destruction and free until no operation that was in a
+// with_epoch at the time it was run is still within the with_epoch.
+//
+// All operations take constant time overhead (beyond the cost of the
+// system malloc and free).
+//
+// Designed to work with C++ threads, or compatible threading
+// libraries.  In particular it uses thread_local variables, and no two
+// concurrent processes can share the same instance of the  variable.
+//
+// When NDEBUG is not set, the operations check for memory corruption
+// of the bytes immediately before and after the object, and check
+// for double retires/deletes.  Also:
+//     epoch::check_ptr(T* a)
+// will check that an object allocated using epoch::New(..) has not
+// been corrupted.
+//
+// Supports undoing retires.  This can be useful in transactional
+// system in which an operation aborts, and any retires done during
+// the operations have to be undone.  In particular Retire returns a
+// pointer to a boolean.  Running
+//    epoch::undo_retire(bool* x)
+// will undo the retire.  Must be run in same with_epoch as the retire
+// was run, otherwise it is too late to undo.  If you don't want
+// to undo retires, you can ignore this feature.
+//
+// New<T>, Retire and Delete use a shared pool for the retired lists,
+// which, although not very large, is not cleared until program
+// termination.  A private pool can be created with
+//     epoch::memory_pool<T> a;
+// which then supports a->New(args...), a->Retire(T*) and
+// a->Delete(T*).  On destruction of "a", all elements of the retired
+// lists will be destructed and freed.
+//
+// Achieves constant times overhead by incrementally taking steps.
+// In particular every Retire takes at most a constant number of
+// incremental steps towards updating the epoch and clearing the
+// retired lists.
+//
+// Developed as part of parlay project at CMU, initially for flock then
+// used for verlib, and parlayhash.
+// Current dependence on parlay is just for parlay::my_thread_id() and
+// parlay::num_thread_ids() which are from "parlay/thread_specific.h".
+// ***************************
+
+#include <atomic>
+#include <cstdlib>
+#include <functional>
+#include <list>
+#include <ostream>
+#include <vector>
+#include <type_traits>
+#include <utility>
+// Needed for parlay::my_thread_id of parlay::num_thread_ids
+#include "threads/thread_specific.h"
+
+#ifndef PARLAY_EPOCH_H_
+#define PARLAY_EPOCH_H_
+
+#ifndef NDEBUG
+// Checks for corruption of bytes before and after allocated structures, as well as double frees.
+// Requires some extra memory to pad the front and back of a structure.
+#define EpochMemCheck 1
+#endif
+//#define EpochMemCheck 1
+
+#define USE_STEPPING 1
+//#define USE_UNDO 1
+
+#ifdef USE_PARLAY_ALLOC
+#include "parlay/alloc.h"
+#endif
+
+// ***************************
+// epoch structure
+// ***************************
+
+namespace epoch {
+
+  namespace internal {
+
+  inline int worker_id() {return parlay::my_thread_id(); }
+  inline int num_workers() {return parlay::num_thread_ids();}
+  constexpr int max_num_workers = 1024;
+
+struct alignas(64) epoch_s {
+
+  // functions to run when epoch is incremented
+  std::vector<std::function<void()>> before_epoch_hooks;
+  std::vector<std::function<void()>> after_epoch_hooks;
+
+  struct alignas(64) announce_slot {
+    std::atomic<long> last;
+    announce_slot() : last(-1l) {}
+  };
+
+  std::vector<announce_slot> announcements;
+  std::atomic<long> current_epoch;
+  epoch_s() :
+    announcements(std::vector<announce_slot>(max_num_workers)),
+    current_epoch(0),
+    epoch_state(0) {}
+
+  long get_current() {
+    return current_epoch.load();
+  }
+
+  long get_my_epoch() {
+    return announcements[worker_id()].last;
+  }
+
+  void set_my_epoch(long e) {
+    announcements[worker_id()].last = e;
+  }
+
+  int announce() {
+    size_t id = worker_id();
+    assert(id < max_num_workers);
+    while (true) {
+      long current_e = get_current();
+      long tmp = current_e;
+      // apparently an exchange is faster than a store (write and fence)
+      announcements[id].last.exchange(tmp, std::memory_order_seq_cst);
+      if (get_current() == current_e) return id;
+    }
+  }
+
+  void unannounce(size_t id) {
+    announcements[id].last.store(-1l, std::memory_order_release);
+  }
+
+  // top 16 bits are used for the process id, and the bottom 48 for
+  // the epoch number
+  using state = size_t;
+  std::atomic<state> epoch_state;
+
+  // Attempts to takes num_steps checking the announcement array to
+  // see that all slots are up-to-date with the current epoch.  Once
+  // they are, the epoch is updated.  Designed to deamortize the cost
+  // of sweeping the announcement array--every thread only does
+  // constant work.
+  state update_epoch_steps(state prev_state, int num_steps) {
+    state current_state = epoch_state.load();
+    if (prev_state != current_state)
+      return current_state;
+    size_t i = current_state >> 48;
+    size_t current_e = ((1ul << 48) - 1) & current_state;
+    size_t workers = num_workers();
+    if (i == workers) {
+      for (const auto h : before_epoch_hooks) h();
+      long tmp = current_e;
+      if (current_epoch.load() == current_e &&
+	  current_epoch.compare_exchange_strong(tmp, current_e+1)) {
+	for (const auto h : after_epoch_hooks) h();
+      }
+      state new_state = current_e + 1;
+      epoch_state.compare_exchange_strong(current_state, new_state);
+      return epoch_state.load();
+    }
+    size_t j;
+    for (j = i ; j < i + num_steps && j < workers; j++)
+      if ((announcements[j].last != -1l) && announcements[j].last < current_e)
+	return current_state;
+    state new_state = (j << 48 | current_e);
+    if (epoch_state.compare_exchange_strong(current_state, new_state))
+      return new_state;
+    return current_state;
+  }
+
+  // this version does the full sweep
+  void update_epoch() {
+    long current_e = get_current();
+
+    // check if everyone is done with earlier epochs
+    int workers;
+    do {
+      workers = num_workers();
+      if (workers > max_num_workers) {
+	std::cerr << "number of threads: " << workers
+		  << ", greater than max_num_threads: " << max_num_workers << std::endl;
+	abort();
+      }
+      for (int i=0; i < workers; i++)
+	if ((announcements[i].last != -1l) && announcements[i].last < current_e)
+	  return;
+    } while (num_workers() != workers); // this is unlikely to loop
+
+    // if so then increment current epoch
+    for (const auto h : before_epoch_hooks) h();
+    if (current_epoch.compare_exchange_strong(current_e, current_e+1)) {
+      for (const auto h : after_epoch_hooks) h();
+    }
+  }
+};
+
+  // Juat one epoch structure shared by all
+  extern inline epoch_s& get_epoch() {
+    static epoch_s epoch;
+    return epoch;
+  }
+
+// ***************************
+// type specific memory pools
+// ***************************
+
+template <typename T>
+struct alignas(64) memory_pool {
+private:
+
+  struct list_entry {
+    T* ptr;
+#ifdef USE_UNDO
+    bool keep_;
+    bool keep() {return keep_;}
+    list_entry() : keep_(false) {}
+    list_entry(T* ptr) : ptr(ptr), keep_(false) {}
+#else
+    bool keep() {return false;}
+#endif
+  };
+
+  // each thread keeps one of these
+  struct alignas(256) old_current {
+    std::list<list_entry> old;  // linked list of retired items from previous epoch
+    std::list<list_entry> current; // linked list of retired items from current epoch
+    std::list<list_entry> reserve;  // linked list of items that could be destructed, but delayed so they can be reused
+    long epoch; // epoch on last retire, updated on a retire
+    long retire_count; // number of retires so far, reset on updating the epoch
+    long alloc_count;
+    epoch_s::state e_state;
+    old_current() : e_state(0), epoch(0), retire_count(0), alloc_count(0) {}
+  };
+
+  std::vector<old_current> pools;
+    
+  // wrapper used so can pad for the memory checked version
+  struct wrapper {
+#ifdef EpochMemCheck    
+    long pad;
+    std::atomic<long> head;
+    T value;
+    std::atomic<long> tail;
+#else
+    T value;
+#endif
+  };
+
+  // values used to check for corruption or double delete
+  static constexpr long default_val = 10;
+  static constexpr long deleted_val = 55;
+
+  // given a pointer to value in a wrapper, return a pointer to the wrapper.
+  wrapper* wrapper_from_value(T* p) {
+    size_t offset = ((char*) &((wrapper*) p)->value) - ((char*) p);
+    return (wrapper*) (((char*) p) - offset);
+  }
+
+  // destructs entries on a list
+  void clear_list(std::list<list_entry>& lst) {
+    for (list_entry& x : lst)
+      if (!x.keep()) {
+	x.ptr->~T();
+	free_wrapper(wrapper_from_value(x.ptr));
+      }
+    lst.clear();
+  }
+
+  void advance_epoch(int i, old_current& pid) {
+#ifndef USE_UNDO
+    int delay = 1;
+#else
+    int delay = 2;
+#endif
+    if (pid.epoch + delay < get_epoch().get_current()) {
+      pid.reserve.splice(pid.reserve.end(), pid.old);
+      pid.old = std::move(pid.current);
+      pid.epoch = get_epoch().get_current();
+    }
+    // a heuristic
+#ifdef USE_STEPPING
+    long update_threshold = 10;
+#else
+    long update_threshold = 10 * num_workers();
+#endif
+    if (++pid.retire_count == update_threshold) {
+      pid.retire_count = 0;
+#ifdef USE_STEPPING
+      pid.e_state = get_epoch().update_epoch_steps(pid.e_state, 8);
+#else
+      get_epoch().update_epoch();
+#endif
+    }
+  }
+
+#ifdef USE_PARLAY_ALLOC
+  using Allocator = parlay::type_allocator<wrapper>;
+#endif
+
+  void check_wrapper_on_destruct(wrapper* x) {
+#ifdef EpochMemCheck
+    // check nothing is corrupted or double deleted
+    if (x->head != default_val || x->tail != default_val) {
+      if (x->head == deleted_val) std::cerr << "double free" << std::endl;
+      else if (x->head != default_val)  std::cerr << "corrupted head" << x->head << std::endl;
+      if (x->tail != default_val) std::cerr << "corrupted tail: " << x->tail << std::endl;
+      abort();
+    }
+    x->head = deleted_val;
+#endif
+  }
+
+  void set_wrapper_on_construct(wrapper* x) {
+#ifdef EpochMemCheck
+    x->pad = x->head = x->tail = default_val;
+#endif
+  }
+
+  void free_wrapper(wrapper* x) {
+    check_wrapper_on_destruct(x);
+#ifdef USE_PARLAY_ALLOC
+    return Allocator::free(x);
+#else
+    return std::free(x);
+#endif
+  }
+  
+  wrapper* allocate_wrapper() {
+    auto &pid = pools[worker_id()];
+    if (!pid.reserve.empty()) {
+      list_entry x = pid.reserve.front();
+      pid.reserve.pop_front();
+      if (!x.keep()) {
+	x.ptr->~T();
+	wrapper* w = wrapper_from_value(x.ptr);
+	check_wrapper_on_destruct(w);
+	set_wrapper_on_construct(w);
+	return w;
+      }
+    }
+#ifdef USE_PARLAY_ALLOC
+    wrapper* w = Allocator::alloc();
+#else
+    wrapper* w = (wrapper*) std::malloc(sizeof(wrapper));
+#endif
+    set_wrapper_on_construct(w);
+    return w;
+  }
+
+ public:
+  memory_pool() {
+    long workers = max_num_workers;
+    pools = std::vector<old_current>(workers);
+    for (int i = 0; i < workers; i++) {
+      pools[i].retire_count = 0;
+    }
+  }
+
+  memory_pool(const memory_pool&) = delete;
+  ~memory_pool() { clear(); }
+
+  // for backwards compatibility
+  void acquire(T* p) { }
+
+  template <typename ... Args>
+  T* New(Args... args) {
+    wrapper* x = allocate_wrapper();
+    T* newv = &x->value;
+    new (newv) T(args...);
+    return newv;
+  }
+
+  // f is a function that initializes a new object before it is shared
+  template <typename F, typename ... Args>
+  T* New_Init(F f, Args... args) {
+    T* x = New(args...);
+    f(x);
+    return x;
+  }
+
+  // retire and return a pointer if want to undo the retire
+#ifdef USE_UNDO
+  bool* Retire(T* p) {
+#else
+  void Retire(T* p) {
+#endif
+    auto i = worker_id();
+    auto &pid = pools[i];
+    if (pid.reserve.size() > 500) {
+      list_entry x = pid.reserve.front();
+      if (!x.keep()) {
+	x.ptr->~T();
+	free_wrapper(wrapper_from_value(x.ptr));
+      }
+      pid.reserve.pop_front();
+    }
+    advance_epoch(i, pid);
+    pid.current.push_back(list_entry{p});
+#ifdef USE_UNDO
+    return &pid.current.back().keep_;
+#endif
+  }
+
+  // destructs and frees the object immediately
+  void Delete(T* p) {
+    p->~T();
+    free_wrapper(wrapper_from_value(p));
+  }
+
+  bool check_ptr(T* ptr, bool silent=false) {
+#ifdef EpochMemCheck
+    if (ptr == nullptr) return true;
+    wrapper* x = wrapper_from_value(ptr);
+    if (!silent) {
+      if (x->pad != default_val) std::cerr << "memory_pool, check: pad word corrupted" << x->pad << std::endl;
+      if (x->head != default_val) std::cerr << "memory_pool, check: head word corrupted" << x->head << std::endl;
+      if (x->tail != default_val) std::cerr << "memory_pool, check: tail word corrupted: " << x->tail << std::endl;
+    }
+    return (x->pad == default_val && x->head == default_val && x->tail == default_val);
+#endif
+    return true;
+  }
+
+  // Clears all the lists, to be used on termination, or could be use
+  // at a quiescent point when noone is reading any retired items.
+  void clear() {
+    // for (int i=0; i < num_workers(); i++)
+    //   std::cout << i << ": " << pools[1].old.size() << ", "
+    // 		<< pools[i].current.size() << ", "
+    // 		<< pools[i].reserve.size() << std::endl;
+    get_epoch().update_epoch();
+    for (int i=0; i < num_workers(); i++) {
+      clear_list(pools[i].old);
+      clear_list(pools[i].current);
+      clear_list(pools[i].reserve);
+    }
+    //Allocator::print_stats();
+  }
+
+  void stats() {}
+};
+  
+template <typename T>
+struct alignas(64) retire_pool {
+private:
+
+  struct list_entry {
+    char data[sizeof(T)];
+  };
+
+  // each thread keeps one of these
+  struct alignas(256) old_current {
+    std::list<list_entry> old;  // linked list of retired items from previous epoch
+    std::list<list_entry> current; // linked list of retired items from current epoch
+    long epoch; // epoch on last retire, updated on a retire
+    long retire_count; // number of retires so far, reset on updating the epoch
+    epoch_s::state e_state;
+    old_current() : e_state(0), epoch(0), retire_count(0) {}
+  };
+
+  std::vector<old_current> pools;
+
+  // destructs entries on a list
+  void clear_list(std::list<list_entry>& lst) {
+    for (list_entry& x : lst)
+      ((T*) (&(x.data)))->~T();
+    lst.clear();
+  }
+
+  void advance_epoch(int i, old_current& pid) {
+    if (pid.epoch + 1 < get_epoch().get_current()) {
+      clear_list(pid.old);
+      pid.old = std::move(pid.current);
+      pid.epoch = get_epoch().get_current();
+    }
+#ifdef USE_STEPPING
+    long update_threshold = 10;
+#else
+    long update_threshold = 10 * num_workers();
+#endif
+    if (++pid.retire_count == update_threshold) {
+      pid.retire_count = 0;
+#ifdef USE_STEPPING
+      pid.e_state = get_epoch().update_epoch_steps(pid.e_state, 8);
+#else
+      get_epoch().update_epoch();
+#endif
+    }
+  }
+
+ public:
+  retire_pool() {
+    long workers = max_num_workers;
+    pools = std::vector<old_current>(workers);
+    for (int i = 0; i < workers; i++) 
+      pools[i].retire_count = 0;
+  }
+
+  retire_pool(const retire_pool&) = delete;
+  ~retire_pool() { clear(); }
+
+  void Retire(T* p) {
+    auto i = worker_id();
+    auto &pid = pools[i];
+    advance_epoch(i, pid);
+    list_entry x;
+    strncpy(x.data, (char*) p, sizeof(T));
+    pid.current.push_back(x);
+  }
+
+  // Clears all the lists, to be used on termination, or could be use
+  // at a quiescent point when noone is reading any retired items.
+  void clear() {
+    get_epoch().update_epoch();
+    for (int i=0; i < num_workers(); i++) {
+      clear_list(pools[i].old);
+      clear_list(pools[i].current);
+    }
+  }
+
+  void stats() {}
+};
+
+} // namespace internal
+
+// ***************************
+// The public interface
+// ***************************
+  
+  // x should point to the skip field of a link
+  inline void undo_retire(bool* x) { *x = true;}
+  
+  template <typename T>
+  using memory_pool = internal::memory_pool<T>;
+
+  template <typename T>
+  extern inline memory_pool<T>& get_default_pool() {
+    static memory_pool<T> pool;
+    return pool;
+  }
+
+  template <typename T>
+  using retire_pool = internal::retire_pool<T>;
+
+  template <typename T>
+  extern inline retire_pool<T>& get_default_retire_pool() {
+    static retire_pool<T> pool;
+    return pool;
+  }
+
+  template <typename T, typename ... Args>
+  static T* New(Args... args) {
+    return get_default_pool<T>().New(std::forward<Args>(args)...);}
+
+  template <typename T>
+  static void Delete(T* p) {get_default_pool<T>().Delete(p);}
+
+  template <typename T>
+#ifdef USE_UNDO
+  static bool* Retire(T* p) {return get_default_pool<T>().Retire(p);}
+#else
+  void Retire(T* p) {return get_default_pool<T>().Retire(p);}
+#endif
+    
+  template <typename T>
+  static bool check_ptr(T* p, bool silent=false) {
+    return get_default_pool<T>().check_ptr(p, silent);}
+
+  template <typename T>
+  static void clear() {get_default_pool<T>().clear();}
+
+  //template <typename T>
+  //static void stats() {get_default_pool<T>().stats();}
+
+  template <typename Thunk>
+  auto with_epoch(Thunk f) {
+    int id = internal::get_epoch().announce();
+    if constexpr (std::is_void_v<std::invoke_result_t<Thunk>>) {
+      f();
+      internal::get_epoch().unannounce(id);
+    } else {
+      auto v = f();
+      internal::get_epoch().unannounce(id);
+      return v;
+    }
+  }
+
+} // end namespace epoch
+
+#endif //PARLAY_EPOCH_H_
diff --git a/include/utils/lock.h b/include/utils/lock.h
new file mode 100644
index 0000000..1ea36f8
--- /dev/null
+++ b/include/utils/lock.h
@@ -0,0 +1,67 @@
+#include <atomic>
+#include <cstddef>
+#include <vector>
+
+#ifndef PARLAYLOCK_H_
+#define PARLAYLOCK_H_
+
+namespace parlay {
+
+// creates 2^16 lock slots.
+// locks.try_lock(i, f) will hash i to the h(i) % 2^16th lock.
+// If the lock is not taken then f is run and the try_lock returns the
+// boolean result of f then releasing the lock.   Otherwise it returns false.
+struct lock_set {
+private:
+  using lck = std::atomic<bool>;
+  const int bucket_bits = 16;
+  const size_t mask = ((1ul) << bucket_bits) - 1;
+  std::vector<lck> locks;
+
+  static inline uint64_t hash64(uint64_t x) {
+    x = (x ^ (x >> 30)) * UINT64_C(0xbf58476d1ce4e5b9);
+    x = (x ^ (x >> 27)) * UINT64_C(0x94d049bb133111eb);
+    x = x ^ (x >> 31);
+    return x;
+  }
+public:
+  template <typename F>
+  bool try_lock(long i, F f) {
+    bool old = false;
+    bool result = false;
+    lck& x = locks[hash64(i) & mask];
+    if (x.compare_exchange_strong(old, true)) {
+      result = f();
+      x = false;
+    }
+    return result;
+  }
+  lock_set() : locks(std::vector<lck>(1ul << bucket_bits)) {
+    std::fill(locks.begin(), locks.end(), false);
+  }
+};
+
+  extern inline lock_set& get_locks() {
+    static lock_set locks;
+    return locks;
+  }
+
+      template <typename F>
+  auto try_loop(const F& f, int delay = 200, const int max_multiplier = 20) {
+    int multiplier = 1;
+    int cnt = 0;
+    while (true)  {
+      if (cnt++ == 10000000000ul/(delay*max_multiplier)) {
+  	std::cerr << "problably in an infinite retry loop" << std::endl;
+  	abort(); 
+      }
+      auto r = f();
+      if (r.has_value()) return *r;
+      multiplier = std::min(2*multiplier, max_multiplier);
+      for (volatile int i=0; i < delay * multiplier; i++);
+    }
+  }
+
+}
+
+#endif // PARLAYLOCK_H_
diff --git a/include/utils/threads/portability.h b/include/utils/threads/portability.h
new file mode 100644
index 0000000..88a3718
--- /dev/null
+++ b/include/utils/threads/portability.h
@@ -0,0 +1,112 @@
+
+#ifndef PARLAY_PORTABILITY_H_
+#define PARLAY_PORTABILITY_H_
+
+#if defined(_WIN32)
+#ifndef NOMINMAX
+#define PARLAY_DEFINED_NOMINMAX
+#define NOMINMAX
+#endif
+#include <Windows.h>
+#ifdef PARLAY_DEFINED_NOMINMAX
+#undef NOMINMAX
+#endif
+#endif
+
+#include <cstdlib>
+
+#include <iostream>
+
+namespace parlay {
+
+// PARLAY_INLINE: Ask the compiler politely to inline the given function.
+#if defined(__GNUC__)
+#define PARLAY_INLINE inline  __attribute__((__always_inline__))
+#elif defined(_MSC_VER)
+#define PARLAY_INLINE __forceinline
+#else
+#define PARLAY_INLINE inline
+#endif
+
+// PARLAY_NOINLINE: Ask the compiler to *not* inline the given function
+#if defined(__GNUC__)
+#define PARLAY_NOINLINE __attribute__((__noinline__))
+#elif defined(_MSC_VER)
+#define PARLAY_NOINLINE __declspec(noinline)
+#else
+#define PARLAY_NOINLINE
+#endif
+
+// PARLAY_COLD: Ask the compiler to place the given function far away from other code
+#if defined(__GNUC__)
+#define PARLAY_COLD __attribute__((__cold__))
+#elif defined(_MSC_VER)
+#define PARLAY_COLD
+#else
+#define PARLAY_COLD
+#endif
+
+
+// PARLAY_PACKED: Ask the compiler to pack a struct into less memory by not padding
+#if defined(__GNUC__)
+#define PARLAY_PACKED __attribute__((packed))
+#else
+#define PARLAY_PACKED
+#endif
+
+// PARLAY_NO_UNIQUE_ADDR: Allow a member object to occupy no space
+#if defined(__has_cpp_attribute)
+#if __has_cpp_attribute(no_unique_address)
+#define PARLAY_NO_UNIQUE_ADDR [[no_unique_address]]
+#else
+#define PARLAY_NO_UNIQUE_ADDR
+#endif
+#else
+#define PARLAY_NO_UNIQUE_ADDR
+#endif
+
+// PARLAY_PREFETCH: Prefetch data into cache
+#if defined(__GNUC__)
+#define PARLAY_PREFETCH(addr, rw, locality) __builtin_prefetch ((addr), (rw), (locality))
+#elif defined(_MSC_VER)
+#define PARLAY_PREFETCH(addr, rw, locality)                                                 \
+  PreFetchCacheLine(((locality) ? PF_TEMPORAL_LEVEL_1 : PF_NON_TEMPORAL_LEVEL_ALL), (addr))
+#else
+#define PARLAY_PREFETCH(addr, rw, locality)
+#endif
+
+
+#if defined(__cplusplus) && __cplusplus >= 202002L
+#define PARLAY_LIKELY [[likely]]
+#define PARLAY_UNLIKELY [[unlikely]]
+#else
+#define PARLAY_LIKELY
+#define PARLAY_UNLIKELY
+#endif
+
+// Check for exceptions. The standard suggests __cpp_exceptions. Clang/GCC defined __EXCEPTIONS.
+// MSVC disables them with _HAS_EXCEPTIONS=0.  Might not cover obscure compilers/STLs.
+//
+// Exceptions can be explicitly disabled in Parlay with PARLAY_NO_EXCEPTIONS.
+#if !defined(PARLAY_NO_EXCEPTIONS) &&                            \
+    ((defined(__cpp_exceptions) && __cpp_exceptions != 0) ||     \
+     (defined(__EXCEPTIONS)) ||                                  \
+     (defined(_HAS_EXCEPTIONS) && _HAS_EXCEPTIONS == 1) ||       \
+     (defined(_MSC_VER) && !defined(_HAS_EXCEPTIONS)))
+#define PARLAY_EXCEPTIONS_ENABLED
+#endif
+
+template<typename Exception, typename... Args>
+[[noreturn]] PARLAY_NOINLINE PARLAY_COLD void throw_exception_or_terminate(Args&&... args) {
+#if defined(PARLAY_EXCEPTIONS_ENABLED)
+  throw Exception{std::forward<Args>(args)...};
+#else
+  std::cerr << Exception{std::forward<Args>(args)...}.what() << "\n";
+  std::terminate();
+#endif
+}
+
+
+}  // namespace parlay
+
+#endif  // PARLAY_PORTABILITY_H_
diff --git a/include/utils/threads/thread_id_pool.h b/include/utils/threads/thread_id_pool.h
new file mode 100644
index 0000000..71cc396
--- /dev/null
+++ b/include/utils/threads/thread_id_pool.h
@@ -0,0 +1,160 @@
+
+#ifndef PARLAY_INTERNAL_THREAD_ID_POOL_H_
+#define PARLAY_INTERNAL_THREAD_ID_POOL_H_
+
+#include <cassert>
+#include <cstddef>
+
+#include <atomic>
+#include <memory>
+#include <mutex>
+#include <utility>
+
+namespace parlay {
+namespace internal {
+
+using thread_id_type = unsigned int;
+
+// A ThreadIdPool hands out and maintains available unique dense IDs for active threads.
+// Each thread that requests an ID will get one in the range from [0...get_num_thread_ids()).
+// When the pool runs out of available IDs, it will allocate new ones, increasing the result
+// of get_num_thread_ids().  Threads that die will return their ID to the pool for re-use by
+// a subsequently spawned thread.
+//
+// There is a global singleton instance of ThreadIdPool given by ThreadIdPool::instance(),
+// however this function is private and should not be called by the outside world. The public
+// API through which the world can access thread IDs is limited to the free functions:
+//
+// - get_thread_id() -> size_t:  Returns the thread ID of the current thread. Will assign
+//                               one if this thread doesn't have one yet.
+// - get_num_thread_ids() -> size_t:  Returns the number of unique thread IDs that have
+//                                    been handed out.
+//
+class ThreadIdPool : public std::enable_shared_from_this<ThreadIdPool> {
+
+  // Prevent public construction since this class is meant as a global singleton
+  struct private_constructor {
+    explicit private_constructor() = default;
+  };
+
+ public:
+
+  // Returns a unique thread ID for the current thread in the range [0...get_num_thread_ids())
+  friend thread_id_type get_thread_id();
+
+  // Returns the number of assigned thread IDs in the range [0...get_num_thread_ids())
+  friend thread_id_type get_num_thread_ids();
+
+
+  ~ThreadIdPool() noexcept {
+    size_t num_destroyed = 0;
+    for (auto current = available_ids.load(std::memory_order_relaxed); current; num_destroyed++) {
+      auto old = std::exchange(current, current->next);
+      delete old;
+    }
+    assert(num_destroyed == num_thread_ids.load(std::memory_order_relaxed));
+  }
+
+  // The constructor must be public since we make_shared it, but we protect it with a private parameter type
+  explicit ThreadIdPool(private_constructor) noexcept : num_thread_ids(0), available_ids(nullptr) { }
+
+  ThreadIdPool(const ThreadIdPool&) = delete;
+  ThreadIdPool& operator=(const ThreadIdPool&) = delete;
+
+ private:
+
+  // A ThreadId corresponds to a unique ID number in the range [0...num_thread_ids). When it is
+  // not in use (the thread that owned it dies), it is returned to the global pool which maintains
+  // a linked list of available ones.
+  class ThreadId {
+    friend class ThreadIdPool;
+
+    explicit ThreadId(const thread_id_type id_) noexcept : id(id_), next(nullptr) { }
+
+   public:
+    const thread_id_type id;
+   private:
+    ThreadId* next;
+  };
+
+  // A ThreadIdOwner indicates that a thread is currently in possession of the given ThreadID.
+  // Each thread has a static thread_local ThreadIdOwner containing the ID that it owns.
+  // On construction, it acquires an available ThreadID, and on destruction, it releases
+  // it back to the pool. The ThreadIdOwner stores a shared_ptr to the pool to guarantee
+  // that the pool does not get destroyed before a detached thread returns its ID.
+  class ThreadIdOwner {
+    friend class ThreadIdPool;
+
+    explicit ThreadIdOwner(ThreadIdPool& pool_)
+        : pool(pool_.shared_from_this()), node(pool->acquire()), id(node->id) { }
+
+    ~ThreadIdOwner() { pool->relinquish(node); }
+
+   private:
+    const std::shared_ptr<ThreadIdPool> pool;
+    ThreadId* const node;
+
+   public:
+    const thread_id_type id;
+  };
+
+  // Grab a free ID from the available list, or if there are none available, allocate a new one.
+  ThreadId* acquire() {
+    if (available_ids.load(std::memory_order_relaxed)) {
+      // We only take the lock if there are available IDs in the pool. In the common case
+      // where there are no relinquished IDs available for re-use we don't need the lock.
+      static std::mutex m_;
+      std::lock_guard<std::mutex> g_{m_};
+
+      ThreadId* current = available_ids.load(std::memory_order_relaxed);
+      while (current && !available_ids.compare_exchange_weak(current, current->next,
+               std::memory_order_acquire, std::memory_order_relaxed)) {}
+      if (current) { return current; }
+    }
+    return new ThreadId(num_thread_ids.fetch_add(1));
+  }
+
+  // Given the ID back to the global pool for reuse
+  void relinquish(ThreadId* p) {
+    p->next = available_ids.load(std::memory_order_relaxed);
+    while (!available_ids.compare_exchange_weak(p->next, p,
+             std::memory_order_release, std::memory_order_relaxed)) {}
+  }
+
+  static inline const ThreadIdOwner& get_local_thread_id() {
+    static const thread_local ThreadIdPool::ThreadIdOwner my_id(instance());
+    return my_id;
+  }
+
+  static inline ThreadIdPool& instance() {
+    // We hold the global thread id pool inside a shared_ptr because it is possible
+    // for threads to be spawned *before* the ID pool has been initialized, which
+    // means that they may outlive this static variable. Each ThreadId holds onto
+    // a copy of the shared_ptr to ensure that the pool stays alive long enough
+    // for the IDs to relinquish themselves back to the pool.
+    //
+    // I think it is still possible to cause a segfault by spawning a new thread
+    // *after* the static destructors have run... so please do not spawn threads
+    // inside your static destructors :)
+    static const std::shared_ptr<ThreadIdPool> pool = std::make_shared<ThreadIdPool>(private_constructor{});
+    return *pool;
+  }
+
+  std::atomic<size_t> num_thread_ids;
+  std::atomic<ThreadId*> available_ids;
+};
+
+inline thread_id_type get_thread_id() {
+  return ThreadIdPool::get_local_thread_id().id;
+}
+
+inline thread_id_type get_num_thread_ids() {
+  return ThreadIdPool::instance().num_thread_ids.load();
+}
+
+
+}  // namespace internal
+}  // namespace parlay
+
+
+#endif  // PARLAY_INTERNAL_THREAD_ID_POOL_H_
diff --git a/include/utils/threads/thread_specific.h b/include/utils/threads/thread_specific.h
new file mode 100644
index 0000000..ccea7fe
--- /dev/null
+++ b/include/utils/threads/thread_specific.h
@@ -0,0 +1,463 @@
+
+#ifndef PARLAY_THREAD_SPECIFIC_H_
+#define PARLAY_THREAD_SPECIFIC_H_
+
+#include <cassert>
+#include <cstddef>
+
+#include <array>
+#include <algorithm>      // IWYU pragma: keep
+#include <atomic>
+#include <functional>
+#include <iterator>
+#include <mutex>
+#include <new>
+#include <thread>
+#include <type_traits>
+#include <utility>
+#include <variant>
+
+#include "thread_id_pool.h"
+#include "portability.h"
+//#include "range.h"
+#include "type_traits.h"
+
+
+namespace parlay {
+
+using internal::thread_id_type;
+
+// Returns a unique thread ID for the current running thread
+// in the range of [0...num_thread_ids()).  Thread IDs are
+// guaranteed to be unique for all *live* threads, but they
+// are re-used after a thread dies and another is spawned.
+inline thread_id_type my_thread_id() {
+  return internal::get_thread_id();
+}
+
+// Return the number of thread IDs that have been assigned to
+// threads.  All thread IDs are in the range [0...num_thread_ids()).
+//
+// Important note:  Thread IDs are assigned lazily when a thread
+// first requests one.  Therefore num_thread_ids() is *not*
+// guaranteed to be as large as the number of live threads if
+// those threads have never called my_thread_id().
+inline thread_id_type num_thread_ids() {
+  return internal::get_num_thread_ids();
+}
+
+namespace internal {
+
+class ThreadListChunkData {
+
+ public:
+
+  // This is just std::bit_ceil(std::thread::hardware_concurrency()) but we don't assume C++20
+  const static inline std::size_t thread_list_chunk_size = []() {
+    std::size_t size = 4;
+    while (size < std::thread::hardware_concurrency())
+      size *= 2;
+    return size;
+  }();
+
+  // Used by ThreadSpecific which stores a chunked sequence of items that is at least as large
+  // as the number of active threads.  Given a thread ID, items are split into chunks of size:
+  //
+  //   P, P, 2P, 4P, 8P, ...
+  //
+  // where P is the lowest power of two that is at least as large as the number of hardware threads.
+  static std::size_t compute_chunk_id(thread_id_type id) {
+    std::size_t k = thread_list_chunk_size;
+    std::size_t chunk = 0;
+    while (k <= id) {
+      chunk++;
+      k *= 2;
+    }
+    return chunk;
+  }
+
+  static std::size_t compute_chunk_position(thread_id_type id, std::size_t chunk_id) {
+    if (chunk_id == 0)
+      return id;
+    else {
+      auto high_bit = thread_list_chunk_size << (chunk_id - 1);
+      assert(id & high_bit);
+      return id - high_bit;
+    }
+  }
+
+  explicit ThreadListChunkData(thread_id_type thread_id_) noexcept : thread_id(thread_id_),
+      chunk_id(compute_chunk_id(thread_id)), chunk_position(compute_chunk_position(thread_id, chunk_id)) { }
+
+  const thread_id_type thread_id;
+  const std::size_t chunk_id;
+  const std::size_t chunk_position;
+};
+
+extern inline const ThreadListChunkData& get_chunk_data() {
+  static thread_local const ThreadListChunkData data{get_thread_id()};
+  return data;
+}
+
+template<typename T>
+struct Uninitialized {
+  union {
+    alignas(64) std::monostate empty;
+    T value;
+  };
+
+  Uninitialized() noexcept { };
+
+  T& operator*() { return value; }
+
+  T* get() { return std::addressof(value); }
+
+  ~Uninitialized() { value.~T(); }
+};
+
+}  // namespace internal
+
+// A ThreadSpecific<T> stores a list of objects of type T such that there
+// is a unique object for each active thread. The list automatically grows
+// when additional threads are spawned and attempt to access it. Threads
+// may also traverse the entire list if they need to.
+//
+// By default, list elements are all value initialized, roughly meaning
+// that class types are default constructed, and builtin types are zero
+// initialized.  For custom initialization, you can pass a constructor
+// function which returns the desired value.  The constructor function
+// can take zero or one arguments.  If it takes one argument, it will be
+// passed the thread ID that it is constructing for.  Note that the
+// elements are not guaranteed to be constructed by the thread that
+// they belong to, and they may be constructed in advance of any thread
+// actually taking ownership of that ID.
+//
+// A few things to note:
+//
+// - Thread IDs are always unique for the set of currently live threads,
+//   but not unique over the course of the entire program.  A thread that
+//   dies will give up its ID to be claimed by a new thread later.
+//
+// - The list elements are *not* destroyed when the thread that "owns"
+//   them is destroyed. A new thread that reclaims a previously-used ID
+//   will find the item at that position in the same state that it was
+//   left by the previous thread.  Elements are only destroyed when the
+//   entire ThreadSpecific is destroyed.
+//
+//   Therefore, threads are responsible for manually cleaning up the
+//   contents of a ThreadSpecific and/or resetting it to a default value
+//   for the next thread that might claim the spot if they need to.
+//
+template<typename T>
+class ThreadSpecific {
+
+  // 25 chunks guarantees enough slots for any machine
+  // with up to 2^48 bytes of addressable virtual memory,
+  // assuming that threads are 8MB large.
+  static constexpr std::size_t n_chunks = 25;
+
+ public:
+
+  using reference = T&;
+  using value_type = T;
+  using size_type = std::size_t;
+  using difference_type = std::ptrdiff_t;
+  using pointer = T*;
+
+  ThreadSpecific() : constructor([](std::size_t) { return T{}; }) {
+    initialize();
+  }
+
+  template<typename F,
+      typename std::enable_if_t<std::is_invocable_v<F&> && !std::is_invocable_v<F&, std::size_t>, int> = 0>
+  explicit ThreadSpecific(F&& constructor_)
+      : constructor([f = std::forward<F>(constructor_)](std::size_t) { return f(); }) {
+    initialize();
+  }
+
+  template<typename F,
+      typename std::enable_if_t<std::is_invocable_v<F&, std::size_t>, int> = 0>
+  explicit ThreadSpecific(F&& constructor_) : constructor(std::forward<F>(constructor_)) {
+    initialize();
+  }
+
+  ThreadSpecific(const ThreadSpecific&) = delete;
+  ThreadSpecific& operator=(const ThreadSpecific&) = delete;
+  ThreadSpecific(ThreadSpecific&&) = delete;
+
+  ~ThreadSpecific() {
+    for (internal::Uninitialized<T>* chunk : chunks) {
+      delete[] chunk;
+    }
+  }
+
+  T& operator*() { return get(); }
+  T* operator->() { return std::addressof(get()); }
+
+  T& get() {
+    auto chunk_data = internal::get_chunk_data();
+    return get_by_index(chunk_data.chunk_id, chunk_data.chunk_position);
+  }
+
+  const T& operator*() const { return get(); }
+  T const* operator->() const { return std::addressof(get()); }
+
+  const T& get() const {
+    auto chunk_data = internal::get_chunk_data();
+    return get_by_index(chunk_data.chunk_id, chunk_data.chunk_position);
+  }
+
+  template<typename F>
+  void for_each(F&& f) {
+    static_assert(std::is_invocable_v<F, T&>);
+
+    auto num_threads = num_thread_ids();
+    thread_id_type tid = 0;
+    internal::Uninitialized<T>* chunk = chunks[0].load(std::memory_order_relaxed);
+
+    for (std::size_t chunk_id = 0; tid < num_threads; chunk = chunks[++chunk_id].load(std::memory_order_acquire)) {
+      auto chunk_size = get_chunk_size(chunk_id);
+      if (!chunk) PARLAY_UNLIKELY {
+        ensure_chunk_exists(chunk_id);
+        chunk = chunks[chunk_id].load(std::memory_order_relaxed);
+      }
+      for (std::size_t i = 0; tid < num_threads && i < chunk_size; i++, tid++) {
+        f(*chunk[i]);
+      }
+    }
+  }
+
+  // Allow looping over all thread's data
+  template<bool Const>
+  class iterator_t {
+    friend class ThreadSpecific<T>;
+
+    using parent_type = maybe_const_t<Const, ThreadSpecific<T>>;
+
+    iterator_t(std::size_t chunk_id_, std::size_t position_, parent_type* parent_) :
+        chunk_id(chunk_id_), position(position_), parent(parent_) { }
+
+   public:
+    using iterator_category = std::random_access_iterator_tag;
+    using reference = std::add_lvalue_reference_t<maybe_const_t<Const, T>>;
+    using value_type = T;
+    using size_type = std::size_t;
+    using difference_type = std::ptrdiff_t;
+    using pointer = std::add_pointer_t<maybe_const_t<Const, T>>;
+
+    iterator_t() = default;
+
+    /* implicit */ iterator_t(const iterator_t<false>& other)  // cppcheck-suppress noExplicitConstructor    // NOLINT
+        : chunk_id(other.chunk_id), position(other.position), parent(other.parent) { }
+
+    reference operator*() const { return parent->get_by_index_nocheck(chunk_id, position); }
+
+    reference operator[](std::size_t p) const {
+      auto tmp = *this;
+      tmp += p;
+      return *tmp;
+    }
+
+    iterator_t& operator++() {
+      position++;
+      if (position == get_chunk_size(chunk_id)) {
+        if (++chunk_id < n_chunks && parent->chunks[chunk_id].load(std::memory_order_acquire) == nullptr) PARLAY_UNLIKELY
+          parent->ensure_chunk_exists(chunk_id);
+        position = 0;
+      }
+      return *this;
+    }
+
+    iterator_t operator++(int) { auto tmp = *this; ++(*this); return tmp; }   //NOLINT
+
+    iterator_t& operator--() {
+      if (position == 0) {
+        position = get_chunk_size(--chunk_id) - 1;
+        if (parent->chunks[chunk_id].load(std::memory_order_acquire) == nullptr) PARLAY_UNLIKELY
+          parent->ensure_chunk_exists(chunk_id);
+      }
+      else {
+        position--;
+      }
+      return *this;
+    }
+
+    iterator_t operator--(int) { auto tmp = *this; --(*this); return tmp; }   //NOLINT
+
+    iterator_t& operator+=(difference_type diff) {
+      if (diff < 0) return *this -= (-diff);
+      assert(diff >= 0);
+      position += diff;
+      if (position >= get_chunk_size(chunk_id)) {
+        do {
+          position -= get_chunk_size(chunk_id++);
+        } while (position >= get_chunk_size(chunk_id));
+        if (parent->chunks[chunk_id].load(std::memory_order_acquire) == nullptr) PARLAY_UNLIKELY
+          parent->ensure_chunk_exists(chunk_id);
+      }
+      return *this;
+    }
+
+    iterator_t operator+(difference_type diff) const {
+      auto result = *this;
+      result += diff;
+      return result;
+    }
+
+    iterator_t& operator-=(difference_type diff) {
+      if (diff < 0) return *this += (-diff);
+      assert(diff >= 0);
+      auto pos = static_cast<difference_type>(position);
+      pos -= diff;
+      if (pos < 0) {
+        do {
+          pos += static_cast<difference_type>(get_chunk_size(--chunk_id));
+        } while (pos < 0);
+        if (parent->chunks[chunk_id].load(std::memory_order_acquire) == nullptr) PARLAY_UNLIKELY
+          parent->ensure_chunk_exists(chunk_id);
+      }
+      assert(pos >= 0);
+      position = static_cast<std::size_t>(pos);
+      return *this;
+    }
+
+    iterator_t operator-(difference_type diff) const {
+      auto result = *this;
+      result -= diff;
+      return result;
+    }
+
+    difference_type operator-(const iterator_t& other) const {
+      if (other > *this) return -(other - *this);
+      assert(other <= *this);
+      auto result = static_cast<difference_type>(position) - static_cast<difference_type>(other.position);
+      auto chunk_id_ = other.chunk_id;
+      while (chunk_id_ < chunk_id) {
+        result += static_cast<difference_type>(get_chunk_size(chunk_id_++));
+      }
+      return result;
+    }
+
+    bool operator==(const iterator_t& other) const {
+      return chunk_id == other.chunk_id && position == other.position;
+    }
+
+    bool operator!=(const iterator_t& other) const {
+      return chunk_id != other.chunk_id || position != other.position;
+    }
+
+    bool operator<(const iterator_t& other) const {
+      return chunk_id < other.chunk_id || (chunk_id == other.chunk_id && position < other.position);
+    }
+
+    bool operator<=(const iterator_t& other) const {
+      return chunk_id < other.chunk_id || (chunk_id == other.chunk_id && position <= other.position);
+    }
+
+    bool operator>(const iterator_t& other) const {
+      return chunk_id > other.chunk_id || (chunk_id == other.chunk_id && position > other.position);
+    }
+
+    bool operator>=(const iterator_t& other) const {
+      return chunk_id > other.chunk_id || (chunk_id == other.chunk_id && position >= other.position);
+    }
+
+    friend void swap(iterator_t& left, iterator_t& right) {
+      std::swap(left.chunk_id, right.chunk_id);
+      std::swap(left.position, right.position);
+      std::swap(left.parent, right.parent);
+    }
+
+    std::size_t chunk_id{n_chunks};
+    std::size_t position{0};
+    parent_type* parent{nullptr};
+  };
+
+  using iterator = iterator_t<false>;
+  using const_iterator = iterator_t<true>;
+
+  //static_assert(is_random_access_iterator_v<iterator>);
+  //static_assert(is_random_access_iterator_v<const_iterator>);
+
+  [[nodiscard]] iterator begin() {
+    return iterator{0,0,this};
+  }
+
+  [[nodiscard]] const_iterator begin() const {
+    return const_iterator{0,0,this};
+  }
+
+  [[nodiscard]] iterator end() {
+    internal::ThreadListChunkData data{num_thread_ids()};
+    return iterator{data.chunk_id, data.chunk_position, this};
+  }
+
+  [[nodiscard]] const_iterator end() const {
+    internal::ThreadListChunkData data{num_thread_ids()};
+    return const_iterator{data.chunk_id, data.chunk_position, this};
+  }
+
+ private:
+
+  void initialize() {
+    internal::get_chunk_data();  //  Force static initialization before any ThreadLocals are constructed
+    chunks[0].store(new internal::Uninitialized<T>[internal::ThreadListChunkData::thread_list_chunk_size], std::memory_order_relaxed);
+    std::fill(chunks.begin() + 1, chunks.end(), nullptr);
+    auto chunk = chunks[0].load(std::memory_order_relaxed);
+    for (std::size_t i = 0; i < internal::ThreadListChunkData::thread_list_chunk_size; i++) {
+      new (static_cast<void*>(chunk[i].get())) T(constructor(i));
+    }
+  }
+
+  static std::size_t get_chunk_size(std::size_t chunk_id) {
+    assert(chunk_id < n_chunks);
+    if (chunk_id == 0) return internal::ThreadListChunkData::thread_list_chunk_size;
+    else return internal::ThreadListChunkData::thread_list_chunk_size << (chunk_id - 1);
+  }
+
+  T& get_by_index(std::size_t chunk_id, std::size_t chunk_position) {
+    if (chunk_id > 0 && chunks[chunk_id].load(std::memory_order_acquire) == nullptr) PARLAY_UNLIKELY
+      ensure_chunk_exists(chunk_id);
+    return get_by_index_nocheck(chunk_id, chunk_position);
+  }
+
+  const T& get_by_index(std::size_t chunk_id, std::size_t chunk_position) const {
+    if (chunk_id > 0 && chunks[chunk_id].load(std::memory_order_acquire) == nullptr) PARLAY_UNLIKELY
+    ensure_chunk_exists(chunk_id);
+    return get_by_index_nocheck(chunk_id, chunk_position);
+  }
+
+  T& get_by_index_nocheck(std::size_t chunk_id, std::size_t chunk_position) {
+    assert(chunks[chunk_id].load() != nullptr);
+    return *(chunks[chunk_id].load(std::memory_order_relaxed)[chunk_position]);
+  }
+
+  const T& get_by_index_nocheck(std::size_t chunk_id, std::size_t chunk_position) const {
+    assert(chunks[chunk_id].load() != nullptr);
+    return *(chunks[chunk_id].load(std::memory_order_relaxed)[chunk_position]);
+  }
+
+  void ensure_chunk_exists(std::size_t chunk_id) const {
+    std::lock_guard<std::mutex> lock(growing_mutex);
+    if (chunks[chunk_id].load(std::memory_order_relaxed) == nullptr) {
+      auto chunk_size = get_chunk_size(chunk_id);
+      auto chunk = new internal::Uninitialized<T>[chunk_size];
+      for (std::size_t i = 0; i < chunk_size; i++) {
+        new (static_cast<void*>(chunk[i].get())) T(constructor(chunk_size + i));
+      }
+      chunks[chunk_id].store(chunk, std::memory_order_release);
+    }
+  }
+
+  mutable std::function<T(thread_id_type)> constructor;
+  mutable std::mutex growing_mutex;
+  mutable std::array<std::atomic<internal::Uninitialized<T>*>, n_chunks> chunks;
+};
+
+  //static_assert(is_random_access_range_v<ThreadSpecific<int>>);
+  //static_assert(is_random_access_range_v<const ThreadSpecific<int>>);
+
+}  // namespace parlay
+
+
+#endif  // PARLAY_THREAD_SPECIFIC_H_
diff --git a/include/utils/threads/type_traits.h b/include/utils/threads/type_traits.h
new file mode 100644
index 0000000..a8b68ea
--- /dev/null
+++ b/include/utils/threads/type_traits.h
@@ -0,0 +1,286 @@
+// Useful type traits used mostly internally by Parlay
+//
+// Many inspired by this video, and the following standards
+// proposals:
+//  - https://www.youtube.com/watch?v=MWBfmmg8-Yo
+//  - http://open-std.org/JTC1/SC22/WG21/docs/papers/2014/n4034.pdf
+//  - https://quuxplusone.github.io/blog/code/object-relocation-in-terms-of-move-plus-destroy-draft-7.html
+//
+// Includes:
+//  - priority_tag
+//  - is_trivial_allocator
+//  - is_trivially_relocatable / is_nothrow_relocatable
+//
+
+#ifndef PARLAY_TYPE_TRAITS_H_
+#define PARLAY_TYPE_TRAITS_H_
+
+#include <cstddef>
+
+#include <functional>
+#include <memory>
+#include <optional>
+#include <type_traits>
+#include <utility>       // IWYU pragma: keep
+
+// IWYU pragma: no_include <iterator>
+
+namespace parlay {
+
+// Provides the member type T
+template<typename T>
+struct type_identity {
+  using type = T;
+};
+
+// Equal to the type T, i.e., the identity transformation
+template<typename T>
+using type_identity_t = typename type_identity<T>::type;
+
+// Given a pointer-to-member (object or function), returns
+// the type of the class in which the member lives
+template<typename T>
+struct member_pointer_class;
+
+template<typename T, typename U>
+struct member_pointer_class<T U::*> : public type_identity<U> {};
+
+template<typename T>
+using member_pointer_class_t = typename member_pointer_class<T>::type;
+
+// Provides the member type std::add_const_t<T> if Const is
+// true, otherwise provides the member type T
+template<bool Const, typename T>
+using maybe_const = std::conditional<Const, std::add_const_t<T>, T>;
+
+// Adds const to the given type if Const is true
+template<bool Const, typename T>
+using maybe_const_t = typename maybe_const<Const, T>::type;
+
+// Provides the member type std::decay_t<T> if Decay is
+// true, otherwise provides the member type T
+template<bool Decay, typename T>
+using maybe_decay = std::conditional<Decay, std::decay_t<T>, T>;
+
+// Decays the given type if Decay is true
+template<bool Decay, typename T>
+using maybe_decay_t = typename maybe_decay<Decay, T>::type;
+
+// Provides the member value true if the given type is an instance of std::optional
+template <typename T>
+struct is_optional : std::false_type {};
+
+template <typename T>
+struct is_optional<std::optional<T>> : std::true_type {};
+
+// true if the given type is an instance of std::optional
+template<typename T>
+inline constexpr bool is_optional_v = is_optional<T>::value;
+
+template<typename T, typename U = T>
+using is_less_than_comparable = std::conjunction<
+                                  std::is_invocable_r<bool, std::less<>, T, U>,
+                                  std::is_invocable_r<bool, std::less<>, U, T>
+                                >;
+
+template<typename T, typename U = T>
+inline constexpr bool is_less_than_comparable_v = is_less_than_comparable<T, U>::value;
+
+template<typename T, typename U = T>
+using is_equality_comparable = std::conjunction<
+                                 std::is_invocable_r<bool, std::equal_to<>, T, U>,
+                                 std::is_invocable_r<bool, std::equal_to<>, U, T>,
+                                 std::is_invocable_r<bool, std::not_equal_to<>, T, U>,
+                                 std::is_invocable_r<bool, std::not_equal_to<>, U, T>
+                               >;
+
+template<typename T, typename U = T>
+inline constexpr bool is_equality_comparable_v = is_equality_comparable<T, U>::value;
+
+// Defines a member value true if the given type BinaryOperator_ can be invoked on types
+// T1&& and T2 to yield a result of a type that is convertible to T1.
+//
+// This requirement corresponds to the needs of a left fold over the operator BinaryOperator_
+// with an identity and result type of T1, where the intermediate elements being reduced over
+// are potentially of type T2.
+template<typename BinaryOperator_, typename T1, typename T2, typename = void, typename = void>
+struct is_binary_operator_for : public std::false_type {};
+
+template<typename BinaryOperator_, typename T1, typename T2>
+struct is_binary_operator_for<BinaryOperator_, T1, T2, std::void_t<
+  std::enable_if_t< std::is_move_constructible_v<T1> >,
+  std::enable_if_t< std::is_invocable_r_v<T1, BinaryOperator_, T1&&, T1&&> >,
+  std::enable_if_t< std::is_invocable_r_v<T1, BinaryOperator_, T1&&, T2> >,
+  std::enable_if_t< std::is_invocable_r_v<T1, BinaryOperator_, T2, T2> >,
+  std::enable_if_t< std::is_invocable_r_v<T1, BinaryOperator_, T2, T1&&> >
+>, std::enable_if_t<!std::is_member_function_pointer_v<BinaryOperator_>>> : public std::true_type{};
+
+// Handle the case where BinaryOperator_ is a member function pointer
+template<typename BinaryOperator_, typename T1, typename T2>
+struct is_binary_operator_for<BinaryOperator_, T1, T2, std::void_t<
+  std::enable_if_t< std::is_move_constructible_v<T1> >,
+  std::enable_if_t< std::is_invocable_r_v<T1, BinaryOperator_, const member_pointer_class_t<BinaryOperator_>&, T1&&, T1&&> >,
+  std::enable_if_t< std::is_invocable_r_v<T1, BinaryOperator_, const member_pointer_class_t<BinaryOperator_>&, T1&&, T2> >,
+  std::enable_if_t< std::is_invocable_r_v<T1, BinaryOperator_, const member_pointer_class_t<BinaryOperator_>&, T2, T2> >,
+  std::enable_if_t< std::is_invocable_r_v<T1, BinaryOperator_, const member_pointer_class_t<BinaryOperator_>&, T2, T1&&> >
+>, std::enable_if_t<std::is_member_function_pointer_v<BinaryOperator_>>> : public std::true_type{};
+
+// True if the given type BinaryOperator_ can be invoked on types T1&& and T2 to yield a result
+// of a type that is convertible to T1. T2 defaults to T1&& if not specified.
+//
+// This requirement corresponds to the needs of a left fold over the operator BinaryOperator_
+// with an identity and result type of T1, where the intermediate elements being reduced over
+// are potentially of type T2.
+template<typename BinaryOperator_, typename T1, typename T2 = T1&&>
+inline constexpr bool is_binary_operator_for_v = is_binary_operator_for<BinaryOperator_, T1, T2>::value;
+
+// Defines the member value true if T is a pair or a tuple of length two
+template<typename T, typename = void>
+struct is_pair : public std::false_type {};
+
+template<typename T>
+struct is_pair<T, std::void_t<
+  decltype( std::get<0>(std::declval<T>()) ),
+  decltype( std::get<1>(std::declval<T>()) ),
+  std::enable_if_t< 2 == std::tuple_size_v<std::decay_t<T>> >
+>> : public std::true_type {};
+
+// True if T is a pair or a tuple of length two
+template<typename T>
+inline constexpr bool is_pair_v = is_pair<T>::value;
+
+/*  --------------------- Priority tags. -------------------------
+    Priority tags are an easy way to force template resolution to
+    pick the "best" option in the presence of multiple valid
+    choices. It works because of the facts that priority_tag<K>
+    is a subtype of priority_tag<K-1>, and template resolution
+    will always pick the most specialised option when faced with
+    a choice, so it will prefer priority_tag<K> over
+    priority_tag<K-1>
+*/
+
+template<size_t K>
+struct priority_tag : priority_tag<K-1> {};
+
+template<>
+struct priority_tag<0> {};
+
+
+/*  ----------------- Trivial allocators. ---------------------
+    Allocator-aware containers and algorithms need to know whether
+    they can construct/destruct objects directly inside memory given
+    to them by an allocator, or whether the allocator has custom
+    behaviour. Since some optimizations require us to circumvent
+    custom allocator behaviour, we need to detect when an allocator
+    does not do this.
+
+    Specifically, an allocator-aware algorithm must construct objects
+    inside memory returned by an allocator by writing
+
+    std::allocator_traits<allocator_type>::construct(allocator, p, args);
+
+    if the allocator type defines a method .construct, then this results
+    in forwarding the construction to that method. Otherwise, this just
+    results in a call to
+
+    new (p) T(std::forward<Args>(args)...)
+
+    If we wish to circumvent calling the constructor, for example,
+    for a trivially relocatable type in which we would prefer to
+    copy directly via memcpy, we must ensure that the allocator
+    does not have a custom .construct method. Otherwise, we can
+    not optimize, and must continue to use the allocator's own
+    construct method.
+
+    The same discussion is true for destruction as well.
+
+    See https://www.youtube.com/watch?v=MWBfmmg8-Yo for more info.
+*/
+
+namespace internal {
+
+// Detect the existence of the .destroy method of the type Alloc
+template<typename Alloc, typename T>
+auto trivial_allocator(Alloc& a, T* p, priority_tag<2>)
+  -> decltype(void(a.destroy(p)), std::false_type());
+
+// Detect the existence of the .construct method of the type Alloc
+template<typename Alloc, typename T>
+auto trivial_allocator(Alloc& a, T* p, priority_tag<1>)
+  -> decltype(void(a.construct(p, std::declval<T&&>())), std::false_type());
+
+// By default, if no .construct or .destroy methods are found, assume
+// that the allocator is trivial
+template<typename Alloc, typename T>
+auto trivial_allocator(Alloc& a, T* p, priority_tag<0>)
+  -> std::true_type;
+
+}  // namespace internal
+
+template<typename Alloc, typename T>
+struct is_trivial_allocator
+    : decltype(internal::trivial_allocator<Alloc, T>(std::declval<Alloc&>(), nullptr, priority_tag<2>())) {};
+
+template<typename Alloc, typename T>
+inline constexpr bool is_trivial_allocator_v = is_trivial_allocator<Alloc, T>::value;
+
+// Manually specialize std::allocator since it is trivial, but
+// some (maybe all?) implementations still provide a .construct
+// and .destroy method anyway.
+template<typename T>
+struct is_trivial_allocator<std::allocator<T>, T> : std::true_type {};
+
+/*  ----------------- Trivially relocatable. ---------------------
+    A type T is called trivially relocatable if, given a pointer
+    p to an object of type T, and a pointer q to unintialized
+    memory large enough for an object of type T, then
+
+    new (q) T(std::move(*p));
+    p->~T();
+
+    is equivalent to
+
+    std::memcpy(p, q, sizeof(T));
+
+    Any type that is trivially move constructible and trivially
+    destructible is therefore trivially relocatable. User-defined
+    types that are not obviously trivially relocatable can be
+    annotated as such by specializing the is_trivially_relocatable
+    type.
+
+    See proposal D1144R0 for copious details:
+    https://quuxplusone.github.io/blog/code/object-relocation-in-terms-of-move-plus-destroy-draft-7.html
+*/
+
+template <typename T>
+struct is_trivially_relocatable :
+        std::bool_constant<std::is_trivially_move_constructible<T>::value &&
+                           std::is_trivially_destructible<T>::value> { };
+
+template <typename T> struct is_nothrow_relocatable :
+        std::bool_constant<is_trivially_relocatable<T>::value ||
+                           (std::is_nothrow_move_constructible<T>::value &&
+                            std::is_nothrow_destructible<T>::value)> { };
+
+template<typename T>
+inline constexpr bool is_trivially_relocatable_v = is_trivially_relocatable<T>::value;
+
+template<typename T>
+inline constexpr bool is_nothrow_relocatable_v = is_nothrow_relocatable<T>::value;
+
+// The standard allocator is stateless, so it is trivially relocatable,
+// but unfortunately it is not detected as such, so we mark it manually.
+// This is important because parlay::sequence<T, Alloc> is only trivially
+// relocatable when its allocator is trivially relocatable.
+
+template<typename T>
+struct is_trivially_relocatable<std::allocator<T>> : std::true_type {};
+
+template <typename T1, typename T2>
+struct is_trivially_relocatable<std::pair<T1,T2>> : 
+    std::bool_constant<is_trivially_relocatable<T1>::value &&
+		       is_trivially_relocatable<T2>::value> {};
+
+}  // namespace parlay
+
+#endif //PARLAY_TYPE_TRAITS_H_
diff --git a/results.txt b/results.txt
new file mode 100644
index 0000000..1600066
--- /dev/null
+++ b/results.txt
@@ -0,0 +1,61 @@
+15, 4176
+2137, 851286
+4256, 852158
+6351, 852495
+8683, 852486
+10790, 852650
+12957, 852699
+15099, 852751
+17261, 852858
+19396, 852810
+21520, 852866
+23661, 853033
+25826, 852895
+27956, 852934
+30124, 853005
+32326, 853110
+34454, 853050
+36614, 853059
+38752, 853102
+40908, 853100
+43100, 853189
+45254, 853196
+47371, 853228
+49498, 853214
+51643, 853251
+53837, 853268
+56016, 853197
+58171, 853271
+60327, 853206
+62541, 853285
+64634, 853273
+66760, 853268
+68961, 853244
+71141, 853120
+73343, 853280
+75566, 853282
+77826, 853284
+80161, 853217
+82301, 853225
+84427, 853253
+86560, 853215
+88680, 853286
+123, 856122
+3028, 1692291
+5883, 1692496
+8678, 1692857
+11721, 1693162
+14547, 1693551
+17479, 1693607
+20352, 1693699
+23215, 1693782
+26150, 1693739
+29132, 1693762
+32293, 1693782
+35297, 1693955
+38308, 1693979
+41169, 1693853
+44220, 1693999
+47164, 1694025
+50101, 1694071
+52974, 1694108
diff --git a/scripts/batch_size_experiment.sh b/scripts/batch_size_experiment.sh
new file mode 100755
index 0000000..b690cff
--- /dev/null
+++ b/scripts/batch_size_experiment.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+declare base_dir="$(dirname $(dirname $(realpath $0)))"
+
+cd ${base_dir}/build
+set -e
+cmake -DSKETCH_BUFFER_SIZE=25 ..
+make -j
+set +e
+
+mkdir -p ./../results
+mkdir -p ./../results/mpi_speed_results
+
+# Test run
+# mpirun -np 23 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_13_query10_binary 100 0 --gtest_filter=*mpi_mixed_speed_test*
+
+# KRON-16 Batch Size Sweep
+mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_16_query10_binary 1 0 --gtest_filter=*mpi_mixed_speed_test*
+mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_16_query10_binary 10 0 --gtest_filter=*mpi_mixed_speed_test*
+mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_16_query10_binary 50 0 --gtest_filter=*mpi_mixed_speed_test*
+mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_16_query10_binary 100 0 --gtest_filter=*mpi_mixed_speed_test*
+mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_16_query10_binary 1000 0 --gtest_filter=*mpi_mixed_speed_test*
+# KRON-16 fixed-forest
+mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_16_ff_query10_binary 1 0 --gtest_filter=*mpi_mixed_speed_test*
+mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_16_ff_query10_binary 10 0 --gtest_filter=*mpi_mixed_speed_test*
+mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_16_ff_query10_binary 50 0 --gtest_filter=*mpi_mixed_speed_test*
+mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_16_ff_query10_binary 100 0 --gtest_filter=*mpi_mixed_speed_test*
+mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_16_ff_query10_binary 1000 0 --gtest_filter=*mpi_mixed_speed_test*
+
+# Twitter Batch Size Sweep
+mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/twitter_query10_binary 1 0 --gtest_filter=*mpi_mixed_speed_test*
+mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/twitter_query10_binary 10 0 --gtest_filter=*mpi_mixed_speed_test*
+mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/twitter_query10_binary 50 0 --gtest_filter=*mpi_mixed_speed_test*
+mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/twitter_query10_binary 100 0 --gtest_filter=*mpi_mixed_speed_test*
+mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/twitter_query10_binary 1000 0 --gtest_filter=*mpi_mixed_speed_test*
+# Twitter fixed-forest
+mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/twitter_ff_query10_binary 1 0 --gtest_filter=*mpi_mixed_speed_test*
+mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/twitter_ff_query10_binary 10 0 --gtest_filter=*mpi_mixed_speed_test*
+mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/twitter_ff_query10_binary 50 0 --gtest_filter=*mpi_mixed_speed_test*
+mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/twitter_ff_query10_binary 100 0 --gtest_filter=*mpi_mixed_speed_test*
+mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/twitter_ff_query10_binary 1000 0 --gtest_filter=*mpi_mixed_speed_test*
diff --git a/scripts/density_sweep_experiment.sh b/scripts/density_sweep_experiment.sh
new file mode 100644
index 0000000..3eb5be1
--- /dev/null
+++ b/scripts/density_sweep_experiment.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+declare base_dir="$(dirname $(dirname $(realpath $0)))"
+
+cd ${base_dir}/build
+set -e
+make -j
+set +e
+
+mkdir -p ./../results
+
+# mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/erdos_0001_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test*
+# mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/erdos_001_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test*
+# mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/erdos_01_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test*
+# mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/erdos_10_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test*
+# mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/erdos_20_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test*
+# mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/erdos_30_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test*
+# mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/erdos_40_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test*
+# mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/erdos_50_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test*
+# mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/erdos_60_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test*
+# mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/erdos_70_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test*
+# mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/erdos_80_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test*
+# mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/erdos_90_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test*
+# mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/erdos_100_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test*
diff --git a/scripts/export_results.sh b/scripts/export_results.sh
new file mode 100755
index 0000000..213838e
--- /dev/null
+++ b/scripts/export_results.sh
@@ -0,0 +1,81 @@
+#!/bin/bash
+
+#declare base_dir="$(dirname $(dirname $(realpath $0)))"
+
+#cd ${base_dir}/results/mpi_speed_results
+
+
+write_out() {
+	filename=$1.txt
+	if [ -f $filename ]; then
+		awk -F ' ' 'BEGIN {ORS=","}NR==1{print $2}' $filename >> $2
+		awk -F ' ' 'BEGIN {ORS=","}NR==2{print $2}' $filename >> $3
+
+	else
+		echo -n "0," >> $2
+		echo -n "0," >> $3
+	fi
+}
+
+declare -a streams=(
+[0]="kron_13_query10_binary"
+[1]="kron_15_query10_binary"
+[2]="kron_16_query10_binary"
+[3]="kron_17_query10_binary"
+[4]="kron_18_query10_binary"
+#
+[5]="dnc_query10_binary"
+[6]="tech_query10_binary"
+[7]="enron_query10_binary"
+#
+[8]="twitter_query10_binary"
+[9]="stanford_query10_binary"
+[10]="random2N_query10_binary"
+[11]="randomNLOGN_query10_binary"
+[12]="randomNSQRTN_query10_binary"
+[13]="randomDIV_query10_binary"
+# Fixed Forest
+[14]="kron_13_ff_query10_binary"
+[15]="kron_15_ff_query10_binary"
+[16]="kron_16_ff_query10_binary"
+[17]="kron_17_ff_query10_binary"
+[18]="kron_18_ff_query10_binary"
+#
+[19]="dnc_ff_query10_binary"
+[20]="tech_ff_query10_binary"
+[21]="enron_ff_query10_binary"
+#
+[22]="twitter_ff_query10_binary"
+[23]="stanford_ff_query10_binary"
+[24]="random2N_ff_query10_binary"
+[25]="randomNLOGN_ff_query10_binary"
+[26]="randomNSQRTN_ff_query10_binary"
+[27]="randomDIV_ff_query10_binary"
+)
+
+updates="UPDATES.txt"
+queries="QUERIES.txt"
+rm $updates
+rm $queries
+
+for i in $(seq 0 13);
+do
+	write_out ${streams[$i]} $updates $queries
+done
+
+echo "" >> $updates
+echo "" >> $queries
+
+updates="UPDATES_FF.txt"
+queries="QUERIES_FF.txt"
+rm $updates
+rm $queries
+
+for i in $(seq 14 27);
+do
+	write_out ${streams[$i]} $updates $queries
+done
+
+echo "" >> $updates
+echo "" >> $queries
+
diff --git a/scripts/full_test_experiment.sh b/scripts/full_test_experiment.sh
new file mode 100755
index 0000000..7370bfb
--- /dev/null
+++ b/scripts/full_test_experiment.sh
@@ -0,0 +1,105 @@
+#!/bin/bash
+
+declare base_dir="$(dirname $(dirname $(realpath $0)))"
+
+cd ${base_dir}/build
+set -e
+#cmake -DSKETCH_BUFFER_SIZE=25 ..
+#make -j
+#set +e
+
+mkdir -p ./../results
+mkdir -p ./../results/mpi_speed_results
+
+run_test() {
+	cat	binary_streams/$1 > /dev/null
+	mpirun -np $2 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/$1 0 $3 --gtest_filter=*mpi_mixed_speed_test*
+}
+
+declare -a streams=(
+[0]="kron_13_query10_binary"
+[1]="kron_15_query10_binary"
+[2]="kron_16_query10_binary"
+[3]="kron_17_query10_binary"
+[4]="kron_18_query10_binary"
+#
+[5]="dnc_query10_binary"
+[6]="tech_query10_binary"
+[7]="enron_query10_binary"
+#
+[8]="twitter_query10_binary"
+[9]="stanford_query10_binary"
+[10]="random2N_query10_binary"
+[11]="randomNLOGN_query10_binary"
+[12]="randomNSQRTN_query10_binary"
+[13]="randomDIV_query10_binary"
+# Fixed Forest
+[14]="kron_13_ff_query10_binary"
+[15]="kron_15_ff_query10_binary"
+[16]="kron_16_ff_query10_binary"
+[17]="kron_17_ff_query10_binary"
+[18]="kron_18_ff_query10_binary"
+#
+[19]="dnc_ff_query10_binary"
+[20]="tech_ff_query10_binary"
+[21]="enron_ff_query10_binary"
+#
+[22]="twitter_ff_query10_binary"
+[23]="stanford_ff_query10_binary"
+[24]="random2N_ff_query10_binary"
+[25]="randomNLOGN_ff_query10_binary"
+[26]="randomNSQRTN_ff_query10_binary"
+[27]="randomDIV_ff_query10_binary"
+)
+
+declare -a nps=(
+[0]=23
+[1]=26
+[2]=28
+[3]=30
+[4]=31
+#
+[5]=19
+[6]=26
+[7]=29
+#
+[8]=28
+[9]=31
+[10]=32
+[11]=29
+[12]=25
+[13]=29
+# Fixed Forest
+[14]=23
+[15]=26
+[16]=28
+[17]=30
+[18]=31
+#
+[19]=19
+[20]=26
+[21]=29
+#
+[22]=28
+[23]=31
+[24]=32
+[25]=29
+[26]=25
+[27]=29
+)
+
+
+run_test ${streams[$1]} ${nps[$1]} $2
+
+# Test run
+# mpirun -np 23 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_13_query10_binary 0 0 --gtest_filter=*mpi_mixed_speed_test*
+
+
+exit
+# Tests including memory measurement
+run_mem_test() {
+	mpirun -np $1 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/$2 0 0 --gtest_filter=*mpi_mixed_speed_test* &
+	./../scripts/mem_record.sh mpi_dynamicCC_tests 2 ./../results/mpi_space_results/batch_size_sweep/$2_$3_mem.txt
+	wait
+}
+	
diff --git a/scripts/gibbs_experiments.sh b/scripts/gibbs_experiments.sh
new file mode 100755
index 0000000..4f3c580
--- /dev/null
+++ b/scripts/gibbs_experiments.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+declare base_dir="$(dirname $(dirname $(realpath $0)))"
+
+cd ${base_dir}/build
+#set -e
+#cmake -DSKETCH_BUFFER_SIZE=25 ..
+#make -j
+#set +e
+
+mkdir -p ./../results
+mkdir -p ./../results/gibbs_speed_results
+
+run_test() {
+	cat	binary_streams/$1 > /dev/null
+	./dynamicCC_tests binary_streams/$1 --gtest_filter=*gibbs_mixed_speed_test*
+}
+
+declare -a streams=(
+[0]="kron_13_query10_binary"
+[1]="kron_15_query10_binary"
+[2]="kron_16_query10_binary"
+[3]="kron_17_query10_binary"
+[4]="kron_18_query10_binary"
+#
+[5]="dnc_query10_binary"
+[6]="tech_query10_binary"
+[7]="enron_query10_binary"
+#
+[8]="twitter_query10_binary"
+[9]="stanford_query10_binary"
+[10]="random2N_query10_binary"
+[11]="randomNLOGN_query10_binary"
+[12]="randomNSQRTN_query10_binary"
+[13]="randomDIV_query10_binary"
+# Fixed Forest
+[14]="kron_13_ff_query10_binary"
+[15]="kron_15_ff_query10_binary"
+[16]="kron_16_ff_query10_binary"
+[17]="kron_17_ff_query10_binary"
+[18]="kron_18_ff_query10_binary"
+#
+[19]="dnc_ff_query10_binary"
+[20]="tech_ff_query10_binary"
+[21]="enron_ff_query10_binary"
+#
+[22]="twitter_ff_query10_binary"
+[23]="stanford_ff_query10_binary"
+[24]="random2N_ff_query10_binary"
+[25]="randomNLOGN_ff_query10_binary"
+[26]="randomNSQRTN_ff_query10_binary"
+[27]="randomDIV_ff_query10_binary"
+)
+
+run_test ${streams[$1]}
diff --git a/scripts/mem_record.sh b/scripts/mem_record.sh
index 7213534..42530ad 100755
--- a/scripts/mem_record.sh
+++ b/scripts/mem_record.sh
@@ -19,7 +19,7 @@ while true; do
 	do
 		if [ -e /proc/$pid/smaps_rollup ]
 		then
-			impact=$((`cat /proc/$pid/smaps_rollup | grep Rss | awk '{print $2}'`))
+			impact=$((`cat /proc/$pid/smaps_rollup | grep '^Pss:' | awk '{print $2}'`))
 			sum=$((impact + sum))
 			#echo $pid, $impact, $sum
 		fi
diff --git a/scripts/mpi_correct_test.sh b/scripts/mpi_correct_test.sh
index 464fbd6..20e6328 100755
--- a/scripts/mpi_correct_test.sh
+++ b/scripts/mpi_correct_test.sh
@@ -9,12 +9,12 @@ set -e
 make -j
 set +e
 
-mpirun -np 23 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_13_stream_binary 0 0 --gtest_filter=*mpi_correctness_test*
-mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_15_stream_binary 0 0 --gtest_filter=*mpi_correctness_test*
-mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_16_stream_binary 0 0 --gtest_filter=*mpi_correctness_test*
-mpirun -np 30 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_17_stream_binary 0 0 --gtest_filter=*mpi_correctness_test*
-mpirun -np 31 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_18_stream_binary 0 0 --gtest_filter=*mpi_correctness_test*
+# mpirun -np 23 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_13_stream_binary 0 0 --gtest_filter=*mpi_correctness_test*
+# mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_15_stream_binary 0 0 --gtest_filter=*mpi_correctness_test*
+# mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_16_stream_binary 0 0 --gtest_filter=*mpi_correctness_test*
+# mpirun -np 30 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_17_stream_binary 0 0 --gtest_filter=*mpi_correctness_test*
+# mpirun -np 31 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_18_stream_binary 0 0 --gtest_filter=*mpi_correctness_test*
 
 mpirun -np 19 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/dnc_stream_binary 0 0 --gtest_filter=*mpi_correctness_test*
-mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/tech_stream_binary 0 0 --gtest_filter=*mpi_correctness_test*
-mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/enron_stream_binary 0 0 --gtest_filter=*mpi_correctness_test*
+# mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/tech_stream_binary 0 0 --gtest_filter=*mpi_correctness_test*
+# mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/enron_stream_binary 0 0 --gtest_filter=*mpi_correctness_test*
diff --git a/scripts/mpi_query_test.sh b/scripts/mpi_query_test.sh
index 99eb8c5..e802296 100755
--- a/scripts/mpi_query_test.sh
+++ b/scripts/mpi_query_test.sh
@@ -10,27 +10,52 @@ set +e
 mkdir -p ./../results
 
 # DEFAULT BATCH SIZE, DEFAULT SKIPLIST HEIGHT FACTOR (1 / log log n)
-mpirun -np 23 ./mpi_dynamicCC_tests binary_streams/kron_13_stream_binary 0 0 --gtest_filter=*mpi_query_speed_test*
-mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_15_stream_binary 0 0 --gtest_filter=*mpi_query_speed_test*
-mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_16_stream_binary 0 0 --gtest_filter=*mpi_query_speed_test*
-mpirun -np 30 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_17_stream_binary 0 0 --gtest_filter=*mpi_query_speed_test*
-mpirun -np 31 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_18_stream_binary 0 0 --gtest_filter=*mpi_query_speed_test*
+
+# mpirun -np 23 ./mpi_dynamicCC_tests binary_streams/kron_13_stream_binary 0 0 --gtest_filter=*mpi_query_speed_test*
+# mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_15_stream_binary 0 0 --gtest_filter=*mpi_query_speed_test*
+# mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_16_stream_binary 0 0 --gtest_filter=*mpi_query_speed_test*
+# mpirun -np 30 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_17_stream_binary 0 0 --gtest_filter=*mpi_query_speed_test*
+# mpirun -np 31 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_18_stream_binary 0 0 --gtest_filter=*mpi_query_speed_test*
+
 mpirun -np 19 ./mpi_dynamicCC_tests binary_streams/dnc_stream_binary 0 0 --gtest_filter=*mpi_query_speed_test*
 mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/tech_stream_binary 0 0 --gtest_filter=*mpi_query_speed_test*
 mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/enron_stream_binary 0 0 --gtest_filter=*mpi_query_speed_test*
 mpirun -np 19 ./mpi_dynamicCC_tests binary_streams/dnc_streamified_binary 0 0 --gtest_filter=*mpi_query_speed_test*
 mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/tech_streamified_binary 0 0 --gtest_filter=*mpi_query_speed_test*
 mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/enron_streamified_binary 0 0 --gtest_filter=*mpi_query_speed_test*
+mpirun -np 19 ./mpi_dynamicCC_tests binary_streams/dnc_ff_binary 0 0 --gtest_filter=*mpi_query_speed_test*
+mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/tech_ff_binary 0 0 --gtest_filter=*mpi_query_speed_test*
+mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/enron_ff_binary 0 0 --gtest_filter=*mpi_query_speed_test*
+
+mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/twitter_stream_binary 0 0 --gtest_filter=*mpi_query_speed_test*
+mpirun -np 31 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/stanford_stream_binary 0 0 --gtest_filter=*mpi_query_speed_test*
+mpirun -np 32 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/random2N_stream_binary 0 0 --gtest_filter=*mpi_query_speed_test*
+mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/randomNLOGN_stream_binary 0 0 --gtest_filter=*mpi_query_speed_test*
+mpirun -np 25 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/randomNSQRTN_stream_binary 0 0 --gtest_filter=*mpi_query_speed_test*
+mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/randomDIV_stream_binary 0 0 --gtest_filter=*mpi_query_speed_test*
+
 
 # DEFAULT BATCH SIZE, SKIPLIST HEIGHT FACTOR = 1
-mpirun -np 23 ./mpi_dynamicCC_tests binary_streams/kron_13_stream_binary 0 1 --gtest_filter=*mpi_query_speed_test*
-mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_15_stream_binary 0 1 --gtest_filter=*mpi_query_speed_test*
-mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_16_stream_binary 0 1 --gtest_filter=*mpi_query_speed_test*
-mpirun -np 30 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_17_stream_binary 0 1 --gtest_filter=*mpi_query_speed_test*
-mpirun -np 31 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_18_stream_binary 0 1 --gtest_filter=*mpi_query_speed_test*
+
+# mpirun -np 23 ./mpi_dynamicCC_tests binary_streams/kron_13_stream_binary 0 1 --gtest_filter=*mpi_query_speed_test*
+# mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_15_stream_binary 0 1 --gtest_filter=*mpi_query_speed_test*
+# mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_16_stream_binary 0 1 --gtest_filter=*mpi_query_speed_test*
+# mpirun -np 30 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_17_stream_binary 0 1 --gtest_filter=*mpi_query_speed_test*
+# mpirun -np 31 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_18_stream_binary 0 1 --gtest_filter=*mpi_query_speed_test*
+
 mpirun -np 19 ./mpi_dynamicCC_tests binary_streams/dnc_stream_binary 0 1 --gtest_filter=*mpi_query_speed_test*
 mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/tech_stream_binary 0 1 --gtest_filter=*mpi_query_speed_test*
 mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/enron_stream_binary 0 1 --gtest_filter=*mpi_query_speed_test*
 mpirun -np 19 ./mpi_dynamicCC_tests binary_streams/dnc_streamified_binary 0 1 --gtest_filter=*mpi_query_speed_test*
 mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/tech_streamified_binary 0 1 --gtest_filter=*mpi_query_speed_test*
 mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/enron_streamified_binary 0 1 --gtest_filter=*mpi_query_speed_test*
+mpirun -np 19 ./mpi_dynamicCC_tests binary_streams/dnc_ff_binary 0 1 --gtest_filter=*mpi_query_speed_test*
+mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/tech_ff_binary 0 1 --gtest_filter=*mpi_query_speed_test*
+mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/enron_ff_binary 0 1 --gtest_filter=*mpi_query_speed_test*
+
+mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/twitter_stream_binary 0 1 --gtest_filter=*mpi_query_speed_test*
+mpirun -np 31 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/stanford_stream_binary 0 1 --gtest_filter=*mpi_query_speed_test*
+mpirun -np 32 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/random2N_stream_binary 0 1 --gtest_filter=*mpi_query_speed_test*
+mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/randomNLOGN_stream_binary 0 1 --gtest_filter=*mpi_query_speed_test*
+mpirun -np 25 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/randomNSQRTN_stream_binary 0 1 --gtest_filter=*mpi_query_speed_test*
+mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/randomDIV_stream_binary 0 1 --gtest_filter=*mpi_query_speed_test*
diff --git a/scripts/mpi_space_test.sh b/scripts/mpi_space_test.sh
index ab98c7e..71a4a0f 100755
--- a/scripts/mpi_space_test.sh
+++ b/scripts/mpi_space_test.sh
@@ -4,44 +4,96 @@ declare base_dir="$(dirname $(dirname $(realpath $0)))"
 
 cd ${base_dir}/build
 set -e
-make -j
-set +e
+#cmake -DSKETCH_BUFFER_SIZE=25 ..
+#make -j
+#set +e
 
 mkdir -p ./../results
+mkdir -p ./../results/mpi_speed_results
 mkdir -p ./../results/mpi_space_results
 
-run_mem_test() {
-	mpirun -np $1 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/$2 0 0 --gtest_filter=*mpi_update_speed_test* &
-	./../scripts/mem_record.sh mpi_dynamicCC_tests 2 ./../results/mpi_space_results/$2_mem.txt
+# Tests including memory measurement
+run_test() {
+	cat	binary_streams/$1 > /dev/null
+	mpirun -np $2 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/$1 0 $3 --gtest_filter=*mpi_mixed_speed_test* &
+	./../scripts/mem_record.sh mpi_dynamicCC_tests 2 ./../results/mpi_space_results/$1_$3_mem.txt
 	wait
 }
 
-run_mem_test_no_reduced_height() {
-	mpirun -np $1 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/$2 0 1 --gtest_filter=*mpi_update_speed_test* &
-	./../scripts/mem_record.sh mpi_dynamicCC_tests 2 ./../results/mpi_space_results/$2_no_reduced_height_mem.txt
-	wait
-}
+declare -a streams=(
+[0]="kron_13_query10_binary"
+[1]="kron_15_query10_binary"
+[2]="kron_16_query10_binary"
+[3]="kron_17_query10_binary"
+[4]="kron_18_query10_binary"
+#
+[5]="dnc_query10_binary"
+[6]="tech_query10_binary"
+[7]="enron_query10_binary"
+#
+[8]="twitter_query10_binary"
+[9]="stanford_query10_binary"
+[10]="random2N_query10_binary"
+[11]="randomNLOGN_query10_binary"
+[12]="randomNSQRTN_query10_binary"
+[13]="randomDIV_query10_binary"
+# Fixed Forest
+[14]="kron_13_ff_query10_binary"
+[15]="kron_15_ff_query10_binary"
+[16]="kron_16_ff_query10_binary"
+[17]="kron_17_ff_query10_binary"
+[18]="kron_18_ff_query10_binary"
+#
+[19]="dnc_ff_query10_binary"
+[20]="tech_ff_query10_binary"
+[21]="enron_ff_query10_binary"
+#
+[22]="twitter_ff_query10_binary"
+[23]="stanford_ff_query10_binary"
+[24]="random2N_ff_query10_binary"
+[25]="randomNLOGN_ff_query10_binary"
+[26]="randomNSQRTN_ff_query10_binary"
+[27]="randomDIV_ff_query10_binary"
+)
+
+declare -a nps=(
+[0]=23
+[1]=26
+[2]=28
+[3]=30
+[4]=31
+#
+[5]=19
+[6]=26
+[7]=29
+#
+[8]=28
+[9]=31
+[10]=32
+[11]=29
+[12]=25
+[13]=29
+# Fixed Forest
+[14]=23
+[15]=26
+[16]=28
+[17]=30
+[18]=31
+#
+[19]=19
+[20]=26
+[21]=29
+#
+[22]=28
+[23]=31
+[24]=32
+[25]=29
+[26]=25
+[27]=29
+)
+
+for i in $(seq 0 27);
+do
+	run_test ${streams[$i]} ${nps[$i]} 0
+done
 
-run_mem_test "23" "kron_13_stream_binary"
-run_mem_test "26" "kron_15_stream_binary"
-run_mem_test "28" "kron_16_stream_binary"
-run_mem_test "30" "kron_17_stream_binary"
-run_mem_test "31" "kron_18_stream_binary"
-run_mem_test "19" "dnc_stream_binary"
-run_mem_test "26" "tech_stream_binary"
-run_mem_test "29" "enron_stream_binary"
-run_mem_test "19" "dnc_streamified_binary"
-run_mem_test "26" "tech_streamified_binary"
-run_mem_test "29" "enron_streamified_binary"
-
-run_mem_test_no_reduced_height "23" "kron_13_stream_binary"
-run_mem_test_no_reduced_height "26" "kron_15_stream_binary"
-run_mem_test_no_reduced_height "28" "kron_16_stream_binary"
-run_mem_test_no_reduced_height "30" "kron_17_stream_binary"
-run_mem_test_no_reduced_height "31" "kron_18_stream_binary"
-run_mem_test_no_reduced_height "19" "dnc_stream_binary"
-run_mem_test_no_reduced_height "26" "tech_stream_binary"
-run_mem_test_no_reduced_height "29" "enron_stream_binary"
-run_mem_test_no_reduced_height "19" "dnc_streamified_binary"
-run_mem_test_no_reduced_height "26" "tech_streamified_binary"
-run_mem_test_no_reduced_height "29" "enron_streamified_binary"
diff --git a/scripts/mpi_update_test.sh b/scripts/mpi_update_test.sh
index edbc2ac..ae70a59 100755
--- a/scripts/mpi_update_test.sh
+++ b/scripts/mpi_update_test.sh
@@ -9,31 +9,56 @@ set +e
 
 mkdir -p ./../results
 
-# DEFAULT BATCH SIZE (100), DEFAULT SKIPLIST HEIGHT FACTOR (1 / log log n)
-mpirun -np 23 ./mpi_dynamicCC_tests binary_streams/kron_13_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test*
-mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_15_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test*
-mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_16_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test*
-mpirun -np 30 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_17_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test*
-mpirun -np 31 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_18_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test*
+# DEFAULT BATCH SIZE (100), DEFAULT SKIPLIST HEIGHT FACTOR (1 / log log n) ==============================================================
+
+# mpirun -np 23 ./mpi_dynamicCC_tests binary_streams/kron_13_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test*
+# mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_15_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test*
+# mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_16_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test*
+# mpirun -np 30 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_17_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test*
+# mpirun -np 31 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_18_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test*
+
 mpirun -np 19 ./mpi_dynamicCC_tests binary_streams/dnc_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test*
 mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/tech_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test*
 mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/enron_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test*
 mpirun -np 19 ./mpi_dynamicCC_tests binary_streams/dnc_streamified_binary 0 0 --gtest_filter=*mpi_update_speed_test*
 mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/tech_streamified_binary 0 0 --gtest_filter=*mpi_update_speed_test*
 mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/enron_streamified_binary 0 0 --gtest_filter=*mpi_update_speed_test*
+mpirun -np 19 ./mpi_dynamicCC_tests binary_streams/dnc_ff_binary 0 0 --gtest_filter=*mpi_update_speed_test*
+mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/tech_ff_binary 0 0 --gtest_filter=*mpi_update_speed_test*
+mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/enron_ff_binary 0 0 --gtest_filter=*mpi_update_speed_test*
+
+mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/twitter_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test*
+mpirun -np 31 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/stanford_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test*
+mpirun -np 32 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/random2N_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test*
+mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/randomNLOGN_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test*
+mpirun -np 25 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/randomNSQRTN_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test*
+mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/randomDIV_stream_binary 0 0 --gtest_filter=*mpi_update_speed_test*
+
+
+# BATCH SIZE = 1, DEFAULT SKIPLIST HEIGHT FACTOR (1 / log log n) =======================================================================
+
+# mpirun -np 23 ./mpi_dynamicCC_tests binary_streams/kron_13_stream_binary 1 0 --gtest_filter=*mpi_update_speed_test*
+# mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_15_stream_binary 1 0 --gtest_filter=*mpi_update_speed_test*
+# mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_16_stream_binary 1 0 --gtest_filter=*mpi_update_speed_test*
+# mpirun -np 30 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_17_stream_binary 1 0 --gtest_filter=*mpi_update_speed_test*
+# mpirun -np 31 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_18_stream_binary 1 0 --gtest_filter=*mpi_update_speed_test*
 
-# BATCH SIZE = 1, DEFAULT SKIPLIST HEIGHT FACTOR (1 / log log n)
-mpirun -np 23 ./mpi_dynamicCC_tests binary_streams/kron_13_stream_binary 1 0 --gtest_filter=*mpi_update_speed_test*
-mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_15_stream_binary 1 0 --gtest_filter=*mpi_update_speed_test*
-mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_16_stream_binary 1 0 --gtest_filter=*mpi_update_speed_test*
-mpirun -np 30 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_17_stream_binary 1 0 --gtest_filter=*mpi_update_speed_test*
-mpirun -np 31 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/kron_18_stream_binary 1 0 --gtest_filter=*mpi_update_speed_test*
 mpirun -np 19 ./mpi_dynamicCC_tests binary_streams/dnc_stream_binary 1 0 --gtest_filter=*mpi_update_speed_test*
 mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/tech_stream_binary 1 0 --gtest_filter=*mpi_update_speed_test*
 mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/enron_stream_binary 1 0 --gtest_filter=*mpi_update_speed_test*
 mpirun -np 19 ./mpi_dynamicCC_tests binary_streams/dnc_streamified_binary 1 0 --gtest_filter=*mpi_update_speed_test*
 mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/tech_streamified_binary 1 0 --gtest_filter=*mpi_update_speed_test*
 mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/enron_streamified_binary 1 0 --gtest_filter=*mpi_update_speed_test*
+mpirun -np 19 ./mpi_dynamicCC_tests binary_streams/dnc_ff_binary 1 0 --gtest_filter=*mpi_update_speed_test*
+mpirun -np 26 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/tech_ff_binary 1 0 --gtest_filter=*mpi_update_speed_test*
+mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/enron_ff_binary 1 0 --gtest_filter=*mpi_update_speed_test*
+
+mpirun -np 28 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/twitter_stream_binary 1 0 --gtest_filter=*mpi_update_speed_test*
+mpirun -np 31 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/stanford_stream_binary 1 0 --gtest_filter=*mpi_update_speed_test*
+mpirun -np 32 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/random2N_stream_binary 1 0 --gtest_filter=*mpi_update_speed_test*
+mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/randomNLOGN_stream_binary 1 0 --gtest_filter=*mpi_update_speed_test*
+mpirun -np 25 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/randomNSQRTN_stream_binary 1 0 --gtest_filter=*mpi_update_speed_test*
+mpirun -np 29 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/randomDIV_stream_binary 1 0 --gtest_filter=*mpi_update_speed_test*
 
 
 
diff --git a/scripts/sketch_buffer_experiment.sh b/scripts/sketch_buffer_experiment.sh
new file mode 100755
index 0000000..86a98e9
--- /dev/null
+++ b/scripts/sketch_buffer_experiment.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+declare base_dir="$(dirname $(dirname $(realpath $0)))"
+echo "Testing with buffer size $1"
+
+cd ${base_dir}/build
+#set -e
+#cmake -DSKETCH_BUFFER_SIZE=$1 ..
+#make -j
+#set +e
+
+mkdir -p ./../results
+mkdir -p ./../results/mpi_speed_results
+
+run_test() {
+	cat	binary_streams/$1 > /dev/null
+	mpirun -np $2 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/$1 0 0 --gtest_filter=*mpi_mixed_speed_test*
+}
+
+run_test kron_16_query10_binary 28
+run_test kron_16_ff_query10_binary 28
+run_test twitter_query10_binary 28
+run_test twitter_ff_query10_binary 28
+
diff --git a/scripts/sketch_buffer_space.sh b/scripts/sketch_buffer_space.sh
new file mode 100755
index 0000000..55e4978
--- /dev/null
+++ b/scripts/sketch_buffer_space.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+declare base_dir="$(dirname $(dirname $(realpath $0)))"
+echo "Testing with buffer size $1"
+
+cd ${base_dir}/build
+set -e
+cmake -DSKETCH_BUFFER_SIZE=$1 ..
+make -j
+set +e
+
+mkdir -p ./../results
+mkdir -p ./../results/mpi_space_results
+
+run_mem_test() {
+	mpirun -np $1 --bind-to hwthread ./mpi_dynamicCC_tests binary_streams/$2 0 0 --gtest_filter=*mpi_mixed_speed_test* &
+	./../scripts/mem_record.sh mpi_dynamicCC_tests 2 ./../results/mpi_space_results/batch_size_sweep/$2_$3_mem.txt
+	wait
+}
+
+# Test run
+# run_mem_test "23" "kron_13_query10_binary" $1
+
+# KRON-16 Batch Size Sweep
+run_mem_test "28" "kron_16_query10_binary" $1
+# KRON-16 fixed-forest
+run_mem_test "28" "kron_16_ff_query10_binary" $1
+
+# Twitter Batch Size Sweep
+run_mem_test "28" "twitter_query10_binary" $1
+# Twitter fixed-forest
+run_mem_test "28" "twitter_ff_query10_binary" $1
diff --git a/src/batch_tiers.cpp b/src/batch_tiers.cpp
new file mode 100644
index 0000000..ea2dc7b
--- /dev/null
+++ b/src/batch_tiers.cpp
@@ -0,0 +1,761 @@
+#include "../include/batch_tiers.h"
+#include "util.h"
+#include <random>
+#include <atomic>
+#include <tbb/tbb.h>
+
+// // #define CANARY(X) do {if (update.edge.src == 1784 && update.edge.dst == 4420) { std::cout << __FILE__ << ":" << __LINE__ << " says " << X << std::endl;}} while (false)
+// #define CANARY(X) ;
+// // #define ENDPOINT_CANARY(X, src, dst) do {if ((src == 7781 || dst == 7781)) {std::cout << __FILE__ << ":" << __LINE__ << " says " << X << " " << src << " " << dst << std::endl;}} while (false)
+// #define ENDPOINT_CANARY(X, src, dst) ;
+
+// long lct_time = 0;
+// long ett_time = 0;
+// long ett_find_root = 0;
+// long ett_get_agg = 0;
+// long sketch_query = 0;
+// long sketch_time = 0;
+// long refresh_time = 0;
+// long parallel_isolated_check = 0;
+// long tiers_grown = 0;
+// long normal_refreshes = 0;
+
+
+// template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+// bool Batch<SketchClass>::is_connected(node_id_t a, node_id_t b) {
+// 	return this->link_cut_tree.find_root(a) == this->link_cut_tree.find_root(b);
+// }
+
+// template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+// thread_local parlay::sequence<ColumnEntryDelta> BatchTiers<SketchClass>::_deltas_buffer = parlay::sequence<ColumnEntryDelta>();
+
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+BatchTiers<SketchClass>::BatchTiers(node_id_t num_nodes, uint64_t seed) : num_nodes(num_nodes), seed(seed), link_cut_tree(num_nodes), query_ett(num_nodes, 0, seed) , _already_checked_components(2048, true), _unique_update_ids(2048), _component_reps_dsu(0) {
+    // TODO - use the batch_size parameter?
+    _component_reps_dsu = union_find_local<int32_t>(maximum_batch_size * 2);
+	// Algorithm parameters
+	uint32_t num_tiers = log2(num_nodes)/(log2(3)-1);
+	// Initialize all the ETTs
+	std::random_device dev;
+    std::mt19937 rng(dev());
+    std::uniform_int_distribution<std::mt19937::result_type> dist(0,MAX_INT);
+    // int seed = dist(rng);
+    std::cout << "SEED: " << seed << std::endl;
+    rng.seed(seed);
+	dist(rng); // To give 1:1 correspondence with MPI seeds
+	for (uint32_t i = 0; i < num_tiers; i++) {
+		int tier_seed = dist(rng);
+		ett.emplace_back(num_nodes, i, tier_seed);
+	}
+
+	// Initialize the root nodes matrix
+	_root_nodes.resize(num_tiers);
+	for (auto& tier_roots : _root_nodes) {
+		tier_roots.resize(maximum_batch_size * 2);
+	}
+    // and _updated_components
+    _updated_components.resize(num_tiers);
+    // {
+    //     auto tmp = parlay::parlay_unordered_map_direct<size_t, node_id_t>(2 * maximum_batch_size, true);
+    //     std::swap(this->_already_checked_components, tmp);
+    // }
+    // {
+    //     auto tmp2 = parlay::parlay_unordered_map_direct<int32_t, std::monostate>(2 * maximum_batch_size, true);
+    //     std::swap(this->_unique_update_ids, tmp2);
+    // }
+}
+
+template <typename SketchClass>
+    requires(SketchColumnConcept<SketchClass, vec_t>)
+BatchTiers<SketchClass>::BatchTiers(
+    node_id_t num_nodes, uint32_t num_tiers, int batch_size, size_t seed) : num_nodes(num_nodes), seed(seed), link_cut_tree(num_nodes), query_ett(num_nodes, 0, seed), _already_checked_components(num_nodes, true), _unique_update_ids(2048), _component_reps_dsu(0) {
+    // TODO - use the batch_size parameter?
+    _component_reps_dsu = union_find_local<int32_t>(maximum_batch_size * 2);
+
+    // Initialize all the ETTs
+    std::random_device dev;
+    std::mt19937 rng(dev());
+    std::uniform_int_distribution<std::mt19937::result_type> dist(0,MAX_INT);
+    // int seed = dist(rng);
+    std::cout << "SEED: " << seed << std::endl;
+    rng.seed(seed);
+    dist(rng); // To give 1:1 correspondence with MPI seeds
+    for (uint32_t i = 0; i < num_tiers; i++) {
+        int tier_seed = dist(rng);
+        ett.emplace_back(num_nodes, i, tier_seed);
+    }
+
+    // Initialize the root nodes matrix
+    _root_nodes.resize(num_tiers);
+    for (auto& tier_roots : _root_nodes) {
+        tier_roots.resize(maximum_batch_size * 2);
+    }
+    // and _updated_components
+    _updated_components.resize(num_tiers);
+}
+
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+BatchTiers<SketchClass>::~BatchTiers() {}
+
+
+// TODO - check correctness on doing links/cuts out of order. lowkey it should be fine
+// from a correctness pov
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+void BatchTiers<SketchClass>::update_batch(const parlay::sequence<GraphUpdate> &updates) {
+
+    size_t num_updates = updates.size();
+    size_t num_tiers = ett.size();
+    assert(num_updates <= maximum_batch_size);
+    _already_checked_components.clear();
+    // std::cout << "Processing batch of size " << num_updates << " on " << num_tiers << " tiers." << std::endl;
+    
+    // treat all update endpoints as coming from independent components
+    _component_reps_dsu.reset();
+
+    // 0) Step 0: Process any necessary tree cut operations on every tier. 
+    // we WONT immediately do the sketch updates in this case, and will rely on the next parallel branch for that
+    tbb::parallel_for(
+        tbb::blocked_range<size_t>(0, ett.size()),
+        [&](const tbb::blocked_range<size_t>& r) {
+            for (size_t i = r.begin(); i != r.end(); ++i) {
+                for (const auto& update : updates) {
+                    if (update.type == DELETE && ett[i].has_edge(update.edge.src, update.edge.dst)) {
+                        ett[i].cut(update.edge.src, update.edge.dst);
+                    }
+                }
+            }
+        },
+        tbb::static_partitioner{}
+    );
+    // note: can just put this in the above region or use pardo
+    // and process on the LCT:
+    for (const auto& update : updates) {
+        if (update.type == DELETE && is_tree_edge(update.edge.src, update.edge.dst)) {
+            link_cut_tree.cut(update.edge.src, update.edge.dst);
+            query_ett.cut(update.edge.src, update.edge.dst);
+            transaction_log.push_back(update);
+        }
+    }
+    // 1) Step 1: Process all sketch aggs in true batch parallel.
+    // _process_sketch_aggs_only(updates);
+    // _process_sketch_aggs_tier_sequential(updates);
+    _process_sketch_aggs_with_cas(updates);
+    
+    // 2) Step 2: Check for isolated components.
+    uint32_t first_isolated_tier = _search_for_isolated_components(updates);
+    // std::cout << "First isolated tier: " << first_isolated_tier << std::endl;
+    if (first_isolated_tier == UINT32_MAX) {
+        // no isolated components found, so we can return early
+        return;
+    }
+    // the first isolated tier has had no link/cut modifications to it. so its roots array is a valid
+    // check 
+
+    _unique_update_ids.clear();
+    _unique_update_ids.resize(num_updates * 2);
+    std::atomic<size_t> num_unique_components = 0;
+    // construct _unique_update_ids such that it contains just ONE idx for every unique
+    // component at the first isolated tier
+    parlay::parlay_unordered_map_direct<SkipListNode<SketchClass>*, int32_t> component_to_unique_id(2048, true);
+    tbb::parallel_for(
+        tbb::blocked_range<size_t>(0, num_updates),
+        [&](const tbb::blocked_range<size_t>& r) {
+            for (size_t update_idx = r.begin(); update_idx != r.end(); ++update_idx) {
+                for (bool src_or_dst : {true, false}) {
+                    node_id_t vertex = src_or_dst ? updates[update_idx].edge.src : updates[update_idx].edge.dst;
+                    SkipListNode<SketchClass>* root = root_node(first_isolated_tier, update_idx, src_or_dst);
+                    // assign a unique id to this component if it doesnt have one already
+                    //
+                    std::optional<int32_t> existing_id = component_to_unique_id.Insert(root, vertex);
+                    if (!existing_id.has_value()) {
+                        size_t idx = num_unique_components.fetch_add(1);
+                        _unique_update_ids[idx] = vertex;
+                    }
+                }
+            }
+        });
+    _unique_update_ids.resize(num_unique_components.load());
+
+    // 3) proceed tier-serially: 
+    // * at the first isolated tier, collect all components that are isolated.
+    //  * each isolated component will give a new edge (a,b)
+    //  * if a path exists already between a and b in the final tier/LCT, then cut the maximum weight
+    //    edge on the path, starting from the tier where it first appears (call it tier M) and going until the final one.
+    //    
+    //    (NOTE THAT tier M has to have a higher index than the first isolated tier. Because we know that
+    //    the first isolated tier has a forest such that the endpoints of (a) and (b) were not connected.
+    //    if there were a lower index tier, that wouldve violated the subset invariant.)
+    //
+    //  * if apath does not exist, then we link the two endpoints in all tiers ABOVE the first isolated tier.
+    //    Note that this can cause NEW isolated components to appear in tiers above. 
+    //
+    //
+    //  * once we do this for every isolated component at the first isolated tier, check the next tier
+    //    to see if it has any isolated components. If it does, repeat (3) at the next tier.   
+    //
+    // 
+    // SHORT CUTS: we can also tell if a component is maximized by checking for an empty sketch. This
+    // means we can avoid doing further isolation checks. 
+    // for (uint32_t)
+    // TODO - is_empty check optimization
+    // return;
+    for (uint32_t tier = first_isolated_tier; tier < ett.size()-1; tier++) {
+        _updated_components[tier].clear();
+    }
+    for (uint32_t tier = first_isolated_tier; tier < ett.size()-1; tier++) {
+        bool components_maximized = _fix_isolations_at_tier(updates, tier);
+        if (components_maximized) {
+            // if all components were maximized, we can skip the next tier
+            // we know that at this point, there are no isolations at higher tiers.
+            // because all potential isolated components must be a union of the modified components
+            // found at this tier. so we can just return
+            // std::cout << "All components maximized at tier " << tier << ", skipping further checks" << std::endl;
+            return;
+        }
+    }
+};
+
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+std::vector<std::set<node_id_t>> BatchTiers<SketchClass>::get_cc() {
+    this->flush_buffer();
+	std::vector<std::set<node_id_t>> cc;
+	std::set<EulerTourNode<SketchClass>*> visited;
+	int top = ett.size()-1;
+	for (uint32_t i = 0; i < ett[top].ett_nodes.size(); i++) {
+        // TODO - this is simply incorrect with a hash map impl of ett_nodes
+		if (visited.find(&ett[top].ett_node(i)) == visited.end()) {
+			std::set<EulerTourNode<SketchClass>*> pointer_component = ett[top].ett_node(i).get_component();
+			std::set<node_id_t> component;
+			for (auto pointer : pointer_component) {
+				component.insert(pointer->vertex);
+				visited.insert(pointer);
+			}
+			cc.push_back(component);
+		}
+	}
+	return cc;
+}
+
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+bool BatchTiers<SketchClass>::is_connected(node_id_t a, node_id_t b) {
+    this->flush_buffer();
+    // TODO - use a sketchless ETT
+	// return this->link_cut_tree.find_root(a) == this->link_cut_tree.find_root(b);
+    return query_ett.is_connected(a, b); 
+}
+
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+void BatchTiers<SketchClass>::_process_sketch_aggs_only(const parlay::sequence<GraphUpdate> &updates) {
+    size_t num_updates = updates.size();
+    size_t num_tiers = ett.size();
+    assert(num_updates <= maximum_batch_size);
+    // 1) STEP 1: Speculative non-tree edge update processing
+    // (plus cleaning up and doing the updates for the tree edge deletions)
+    // in parallel, accross every tier and update,
+    // update the ETT aggregates
+    // then, reduce to find the maximum 
+    // TODO - make sure tree edge deletions arent being processed twice.
+    // parlay::parallel_for(0, num_tiers*num_updates, [&](size_t i) {
+    //     size_t tier = i / num_updates;
+    //     size_t update_idx = i % num_updates;
+    //     GraphUpdate update = updates[update_idx];
+    //     vec_t edge_id = concat_pairing_fn(update.edge.src, update.edge.dst);
+    //     SkipListNode<> *src_parent = ett[tier].update_sketch_atomic(update.edge.src, edge_id);
+    //     SkipListNode<> *dst_parent = ett[tier].update_sketch_atomic(update.edge.dst, edge_id);
+
+    //     root_node(tier, update_idx, true) = src_parent;
+    //     root_node(tier, update_idx, false) = dst_parent;
+    // }, granularity);
+
+    // step 1 memory optimization:
+    // enforce greater locality by first doing edges in
+    // lower, higher sorted order (only do the srcs)
+    // then in higher, lower (invert, then do dsts)
+    auto src_sorted_update_idxs = parlay::tabulate(num_updates, [&](size_t i) {
+        return i;
+    });
+    parlay::sort_inplace(src_sorted_update_idxs, [&](size_t i, size_t j) {
+        return updates[i].edge.src < updates[j].edge.src;
+    });
+    auto dst_sorted_update_idxs = parlay::tabulate(num_updates, [&](size_t i) {
+        return i;
+    });
+    parlay::sort_inplace(dst_sorted_update_idxs, [&](size_t i, size_t j) {
+        return updates[i].edge.dst < updates[j].edge.dst;
+    });
+
+    // bool conservative=true;
+    // do src updates:
+    // parlay::blocked_for(0, num_updates * num_tiers, granularity, [&](size_t block_idx, size_t start, size_t end) {
+        // for (size_t i = start; i < end; i++) {
+    tbb::parallel_for(
+        tbb::blocked_range<size_t>(0, num_updates * num_tiers, granularity),
+        [&](const tbb::blocked_range<size_t> &r) {
+            for (size_t i = r.begin(); i != r.end(); ++i) {
+            size_t tier = i / num_updates;
+            size_t update_idx = src_sorted_update_idxs[i % num_updates];
+            GraphUpdate update = updates[update_idx];
+            vec_t edge_id = concat_pairing_fn(update.edge.src, update.edge.dst);
+            ColumnEntryDelta delta = ett[tier].generate_entry_delta(update.edge.src, edge_id);
+            SkipListNode<SketchClass> *src_parent = ett[tier].update_sketch_atomic(update.edge.src, delta);
+            root_node(tier, update_idx, true) = src_parent;
+        }
+    });
+    // }, tbb::static_partitioner{});
+    // }, conservative);
+    // now dst updates:
+    // parlay::blocked_for(0, num_updates * num_tiers, granularity, [&](size_t block_idx, size_t start, size_t end) {
+        // for (size_t i = start; i < end; i++) {
+    tbb::parallel_for(
+        tbb::blocked_range<size_t>(0, num_updates * num_tiers, granularity),
+        [&](const tbb::blocked_range<size_t> &r) {
+            for (size_t i = r.begin(); i != r.end(); ++i) {
+                size_t tier = i / num_updates;
+                size_t update_idx = dst_sorted_update_idxs[i % num_updates];
+                GraphUpdate update = updates[update_idx];
+                vec_t edge_id = concat_pairing_fn(update.edge.src, update.edge.dst);
+                ColumnEntryDelta delta = ett[tier].generate_entry_delta(update.edge.dst, edge_id);
+                SkipListNode<SketchClass> *dst_parent = ett[tier].update_sketch_atomic(update.edge.dst, delta);
+                root_node(tier, update_idx, false) = dst_parent;
+                // }, conservative);}
+            }
+        });
+    // tbb::static_partitioner{});
+    // }, conservative);
+}
+
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+void BatchTiers<SketchClass>::_process_sketch_aggs_with_cas(const parlay::sequence<GraphUpdate> &updates) {
+    size_t num_updates = updates.size();
+    size_t num_tiers = ett.size();
+    assert(num_updates <= maximum_batch_size);
+    auto src_sorted_update_idxs = parlay::tabulate(num_updates, [&](size_t i) {
+        return i;
+    });
+    parlay::sort_inplace(src_sorted_update_idxs, [&](size_t i, size_t j) {
+        return updates[i].edge.src < updates[j].edge.src;
+    });
+    auto dst_sorted_update_idxs = parlay::tabulate(num_updates, [&](size_t i) {
+        return i;
+    });
+    parlay::sort_inplace(dst_sorted_update_idxs, [&](size_t i, size_t j) {
+        return updates[i].edge.dst < updates[j].edge.dst;
+    });
+    parlay::sequence<SkipListNode<SketchClass>*> temp_roots;
+    // in src order:
+    tbb::parallel_for(
+        tbb::blocked_range<size_t>(0, num_updates * num_tiers, granularity),
+        [&](const tbb::blocked_range<size_t>& r) {
+            for (size_t i = r.begin(); i != r.end(); ++i) {
+                size_t tier = i / num_updates;
+                size_t update_idx = src_sorted_update_idxs[i % num_updates];
+                GraphUpdate update = updates[update_idx];
+                const ColumnEntryDelta delta = ett[tier].generate_entry_delta(update.edge.src, concat_pairing_fn(update.edge.src, update.edge.dst));
+                SkipListNode<SketchClass>* src_parent = ett[tier].ett_node(
+                                                                     update.edge.src)
+                                                            .update_sketch_atomic_to_level(delta, 1);  // 3 levels up
+                SkipListNode<SketchClass>* root = src_parent->find_root_with_cas();
+                root_node(tier, update_idx, true) = root;
+            }
+        });
+    // in dst order:
+    tbb::parallel_for(
+        tbb::blocked_range<size_t>(0, num_updates * num_tiers, granularity),
+        [&](const tbb::blocked_range<size_t>& r) {
+            for (size_t i = r.begin(); i != r.end(); ++i) {
+                size_t tier = i / num_updates;
+                size_t update_idx = dst_sorted_update_idxs[i % num_updates];
+                GraphUpdate update = updates[update_idx];
+
+                const ColumnEntryDelta delta = ett[tier].generate_entry_delta(update.edge.dst, concat_pairing_fn(update.edge.src, update.edge.dst));
+                SkipListNode<SketchClass>* dst_parent = ett[tier].ett_node(
+                                                                     update.edge.dst)
+                                                            .update_sketch_atomic_to_level(delta, 1);  // 3 levels up
+                SkipListNode<SketchClass>* root = dst_parent->find_root_with_cas();
+                root_node(tier, update_idx, false) = root;
+            }
+        });
+    // TODO - this is gonna be unperformant, but I'd say worth it for simplicity in testing
+    // update root_node matrix
+    // in src order:
+    tbb::parallel_for(
+        tbb::blocked_range<size_t>(0, num_updates * num_tiers, granularity),
+        [&](const tbb::blocked_range<size_t>& r) {
+            for (size_t i = r.begin(); i != r.end(); ++i) {
+                size_t tier = i / num_updates;
+                size_t update_idx = src_sorted_update_idxs[i % num_updates];
+                GraphUpdate update = updates[update_idx];
+                if (root_node(tier, update_idx, true) != nullptr) {
+                    root_node(tier, update_idx, true)->recompute_aggs_topdown(2);
+                }
+                else {
+                    SkipListNode<SketchClass>* root = ett[tier].get_root(update.edge.src);
+                    root_node(tier, update_idx, true) = root;
+                }
+            }
+        });
+    // in dst order:
+    tbb::parallel_for(
+        tbb::blocked_range<size_t>(0, num_updates * num_tiers, granularity),
+        [&](const tbb::blocked_range<size_t>& r) {
+            for (size_t i = r.begin(); i != r.end(); ++i) {
+                size_t tier = i / num_updates;
+                size_t update_idx = dst_sorted_update_idxs[i % num_updates];
+                GraphUpdate update = updates[update_idx];
+                SkipListNode<SketchClass>* root = ett[tier].get_root(update.edge.dst);
+                if (root_node(tier, update_idx, false) != nullptr) {
+                    root_node(tier, update_idx, false)->recompute_aggs_topdown(2);
+                }
+                else {
+                    SkipListNode<SketchClass>* root = ett[tier].get_root(update.edge.dst);
+                    root_node(tier, update_idx, false) = root;
+                }
+            }
+        });
+}
+
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+void BatchTiers<SketchClass>::_process_sketch_aggs_tier_sequential(const parlay::sequence<GraphUpdate> &updates) {
+    size_t num_updates = updates.size();
+    size_t num_tiers = ett.size();
+    assert(num_updates <= maximum_batch_size);
+    auto src_sorted_update_idxs = parlay::tabulate(num_updates, [&](size_t i) {
+        return i;
+    });
+    // parlay::sort_inplace(src_sorted_update_idxs, [&](size_t i, size_t j) {
+    //     return updates[i].edge.src < updates[j].edge.src;
+    // });
+    auto dst_sorted_update_idxs = parlay::tabulate(num_updates, [&](size_t i) {
+        return i;
+    });
+    // parlay::sort_inplace(dst_sorted_update_idxs, [&](size_t i, size_t j) {
+    //     return updates[i].edge.dst < updates[j].edge.dst;
+    // });
+
+    // bool conservative=false;
+    // bool conservative=true;
+    tbb::parallel_for(
+        tbb::blocked_range<size_t>(0, num_tiers, 1),
+        [&](const tbb::blocked_range<size_t> &r) {
+            for (size_t tier = r.begin(); tier != r.end(); ++tier) {
+                for (size_t i = 0; i < num_updates; i++) {
+                    size_t update_idx = src_sorted_update_idxs[i];
+                    // size_t update_idx = i;
+                    GraphUpdate update = updates[update_idx];
+                    vec_t edge_id = concat_pairing_fn(update.edge.src, update.edge.dst);
+                    // SkipListNode<SketchClass> *src_parent = ett[tier].update_sketch(update.edge.src, edge_id);
+                    const ColumnEntryDelta delta = ett[tier].generate_entry_delta(update.edge.src, edge_id);
+                    SkipListNode<SketchClass> *src_parent = ett[tier].update_sketch(update.edge.src, delta);
+                    // SkipListNode<SketchClass> *src_parent = ett[tier].update_sketch_atomic(update.edge.src, delta);
+                    
+                    root_node(tier, update_idx, true) = src_parent;
+                }
+                for (size_t i = 0; i < num_updates; i++) {
+                    root_node(tier, i, true)->process_updates();
+                }
+                for (size_t i = 0; i < num_updates; i++) {
+                    size_t update_idx = dst_sorted_update_idxs[i];
+                    // size_t update_idx = i;
+                    GraphUpdate update = updates[update_idx];
+                    vec_t edge_id = concat_pairing_fn(update.edge.src, update.edge.dst);
+                    // SkipListNode<SketchClass> *dst_parent = ett[tier].update_sketch(update.edge.dst, edge_id);
+                    const ColumnEntryDelta delta = ett[tier].generate_entry_delta(update.edge.dst, edge_id);
+                    SkipListNode<SketchClass> *dst_parent = ett[tier].update_sketch(update.edge.dst, delta);
+                    root_node(tier, update_idx, false) = dst_parent;
+                }
+                for (size_t i = 0; i < num_updates; i++) {
+                    root_node(tier, i, false)->process_updates();
+                }
+            }
+        },
+        tbb::static_partitioner{}
+    );
+    // 0, conservative);
+    // tbb::parallel_for(
+    //     tbb::blocked_range<size_t>(0, num_tiers, 1),
+    //     [&](const tbb::blocked_range<size_t> &r) {
+    //         for (size_t tier = r.begin(); tier != r.end(); ++tier) {
+    //             // for (size_t tier = 0; tier < num_tiers; tier++) {
+    //             // source loop:
+    //             parlay::sequence<ColumnEntryDelta> _deltas_buffer;
+    //             size_t i = 0;
+    //             while (i < num_updates) {
+    //                 _deltas_buffer.clear();
+    //                 size_t j = i;
+    //                 while (j < num_updates && updates[src_sorted_update_idxs[j]].edge.src == updates[src_sorted_update_idxs[i]].edge.src) {
+    //                     GraphUpdate update = updates[src_sorted_update_idxs[j]];
+    //                     vec_t edge_id = concat_pairing_fn(
+    //                         update.edge.src,
+    //                         update.edge.dst);
+    //                     auto delta = ett[tier].generate_entry_delta(
+    //                         update.edge.src,
+    //                         edge_id);
+    //                     _deltas_buffer.push_back(delta);
+
+    //                     j++;
+    //                 }
+    //                 SkipListNode<SketchClass> *src_parent = this->ett[tier].update_sketch(
+    //                     updates[src_sorted_update_idxs[i]].edge.src,
+    //                     _deltas_buffer.head(_deltas_buffer.size()));
+    //                 for (size_t k = i; k < j; k++) {
+    //                     size_t update_idx = src_sorted_update_idxs[k];
+    //                     root_node(tier, update_idx, true) = src_parent;
+    //                 }
+    //                 i = j;
+    //             }
+    //             // dest loop:
+    //             i = 0;
+    //             while (i < num_updates) {
+    //                 _deltas_buffer.clear();
+    //                 size_t j = i;
+    //                 while (j < num_updates && updates[dst_sorted_update_idxs[j]].edge.dst == updates[dst_sorted_update_idxs[i]].edge.dst) {
+    //                     GraphUpdate update = updates[dst_sorted_update_idxs[j]];
+    //                     vec_t edge_id = concat_pairing_fn(
+    //                         update.edge.src,
+    //                         update.edge.dst);
+    //                     auto delta = ett[tier].generate_entry_delta(
+    //                         update.edge.dst,
+    //                         edge_id);
+    //                     _deltas_buffer.push_back(delta);
+    //                     j++;
+    //                 }
+    //                 SkipListNode<SketchClass> *dst_parent = this->ett[tier].update_sketch(
+    //                     updates[dst_sorted_update_idxs[i]].edge.dst,
+    //                     _deltas_buffer.head(_deltas_buffer.size()));
+    //                 for (size_t k = i; k < j; k++) {
+    //                     size_t update_idx = dst_sorted_update_idxs[k];
+    //                     root_node(tier, update_idx, false) = dst_parent;
+    //                 }
+    //                 i = j;
+    //             }
+    //             parlay::parallel_for(0, num_updates, [&](size_t k) {
+    //                 root_node(tier, k, true)->process_updates();
+    //                 root_node(tier, k, false)->process_updates();
+    //             });
+    //             // for (size_t k = 0; k < num_updates; k++) {
+    //             //     root_node(tier, k, true)->process_updates();
+    //             //     root_node(tier, k, false)->process_updates();
+    //             // }
+    //         }
+    //     },
+    //     tbb::static_partitioner{}
+    // ); 
+}
+
+template<typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+uint32_t BatchTiers<SketchClass>::_search_for_isolated_components(const parlay::sequence<GraphUpdate> &updates) {
+    size_t num_updates = updates.size();
+    size_t num_tiers = ett.size();
+    assert(num_updates <= maximum_batch_size);
+    // we can use parlay::find, as long as we are using "tier-major" order
+    auto isolation_tabulate = parlay::delayed_tabulate(
+        (num_tiers - 1) * num_updates,
+        [&](size_t i) {
+            size_t tier = i / num_updates;
+            size_t update_idx = i % num_updates;
+            for (bool src_or_dst : {true, false}) {
+                SkipListNode<SketchClass> *root = root_node(tier, update_idx, src_or_dst);
+                SkipListNode<SketchClass> *next_root = root_node(tier + 1, update_idx, src_or_dst);
+                uint32_t tier_size = root->size;
+                uint32_t next_size = next_root->size;
+                if (tier_size == next_size) {
+                    // This means that the component is isolated
+                    if (root->sketch_agg.sample().result == GOOD) {
+                        // this means that the component is isolated
+                        // std::cout << "isolation found at tier " << tier << " for update idx " << update_idx << std::endl;
+                        return true;
+                    }
+                }
+            }
+            return false;
+        });
+    auto first_isolated_iter = parlay::find(isolation_tabulate, true);
+    if (first_isolated_iter == isolation_tabulate.end()) {
+        // no isolated components!
+        return UINT32_MAX;
+    }
+    uint32_t first_isolated_idx = first_isolated_iter - isolation_tabulate.begin();
+    // note - i dont think we care about the isolation idx
+    uint32_t first_isolated_tier = first_isolated_idx / num_updates;
+    return first_isolated_tier;
+}
+
+template<typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+bool BatchTiers<SketchClass>::_fix_isolations_at_tier(const parlay::sequence<GraphUpdate> &updates, uint32_t tier) {
+    size_t num_updates = updates.size();
+    // size_t num_tiers = ett.size();
+
+    // needs to be atomically updated.
+    bool components_maximized = true;
+    for (size_t i= 0 ; i < _unique_update_ids.size(); i++) {
+        node_id_t vertex = _unique_update_ids[i];
+        _updated_components[tier].push_back(vertex);
+    }
+    // for each update, we only need to grab ROOTS
+    // for (size_t i=0; i < num_updates * 2; i++) {
+    // for (size_t i = 0; i < num_updates * 2; i++) {
+        // // only if you are STILL a root.
+        // // AND your sketch is non-empty
+        // likely_if (!_component_reps_dsu.is_root(i)) {
+        //     // return;
+        //     continue;
+        // }
+        // bool src_or_dst = static_cast<bool>(i % 2);
+        // size_t update_idx = i / 2;
+        // _updated_components[tier].push_back(
+        //     src_or_dst ? updates[update_idx].edge.src : updates[update_idx].edge.dst);
+    // };
+    // now, _updated_components contains all components that need to be
+    // including ones that may have been inherited from doing links/cuts below.
+    // for (size_t i = 0; i < _updated_components[tier].size(); i++) {
+    parlay::sequence<SkipListNode<SketchClass>*> temp_roots;
+    std::atomic<size_t> num_temp_roots = 0;
+    temp_roots.resize(_updated_components[tier].size());
+
+    tbb::parallel_for(
+        tbb::blocked_range<size_t>(0, _updated_components[tier].size()),
+        [&](const tbb::blocked_range<size_t>& r) {
+            for (size_t i = r.begin(); i != r.end(); ++i) {
+                node_id_t vertex_in_component = _updated_components[tier][i];
+                // TODO - we can do some work to avoid checking the same component (maybe?)
+                // in case a component was previously merged already
+                // SkipListNode<SketchClass>* component_root = ett[tier].get_root(vertex_in_component);
+                SkipListNode<SketchClass>* component_root = ett[tier].ett_node(vertex_in_component).get_allowed_caller()->find_root_with_cas();
+                if (component_root == nullptr) {
+                    continue;
+                }
+                size_t idx = num_temp_roots.fetch_add(1);
+                temp_roots[idx] = component_root;
+                // component_root->clear_cas_flags();
+                SkipListNode<SketchClass>* next_tier_root = ett[tier + 1].get_root(vertex_in_component);
+
+                // TODO - this is no longer necessary. because we are using the DSU to keep the smallest
+                // possible set of _updated_components settings
+                // actually, we'll keep it for now anyway.
+                // this is because the current DSU filter is just being used as a simple filter.
+                // since we arent doing any changes to it past the first isolated tier.
+                if (_already_checked_components.find((size_t)(component_root)) != _already_checked_components.end()) {
+                    // std::cout << "yerr" << std::endl;
+                    // return;
+                    continue;
+                }
+                // _already_checked_components.insert_or_assign((size_t)component_root, tier);
+                // _already_checked_components[(size_t)component_root] = tier;
+                _already_checked_components.Insert((size_t)component_root, tier);
+                SketchClass& ett_agg = component_root->sketch_agg;
+                // TODO - do we want to sample before? idts. but we can at least
+                // do the empty check with a special new primitive
+                SketchSample query_result = ett_agg.sample();
+                if (query_result.result != ZERO) {
+                    if (components_maximized) {
+                        // bool f = false;
+                        // bool t = true;
+                        __sync_bool_compare_and_swap((bool*)&components_maximized, true, false);
+                    }
+                }
+                {
+                    if (component_root->size == next_tier_root->size) {
+                        if (query_result.result == GOOD) {
+                            std::lock_guard<std::mutex> guard(this->lct_and_query_ett_lock);
+                            // .. and see if a path exists between the endpoints in the LCT
+                            edge_id_t edge = query_result.idx;
+                            node_id_t a = (node_id_t)edge;
+                            node_id_t b = (node_id_t)(edge >> 32);
+
+                            // check if a path exists between the endpoints
+                            // auto a_root = link_cut_tree.find_root(a);
+                            // auto b_root = link_cut_tree.find_root(b);
+                            // TODO - ETT
+
+                            // if it does, then we either need to cut it, or ignore this update
+
+                            // if (a_root == b_root) {
+                            if (link_cut_tree.connected(a, b)) {
+                                // a path exists, so we need to cut the maximum weight edge
+                                // on the path
+                                // THIS REALLY CANT BE PARALLELIZED atm
+                                std::pair<Edge, int8_t> max_edge = link_cut_tree.path_query(a, b);
+                                node_id_t c = max_edge.first.src;
+                                node_id_t d = max_edge.first.dst;
+                                // node_id_t c = (node_id_t)max_edge.first;
+                                // node_id_t d = (node_id_t)(max_edge.first >> 32);
+                                uint32_t first_appeared_tier = max_edge.second;
+                                // if the first appeared tier is equal to tier+1, then we should check if this
+                                // was a link we had just discovered. If so, we neither cut it, not include this link.
+                                if (first_appeared_tier == tier + 1) {
+                                    // YOU KNOW that these couldnt have been connected in the tier above
+                                    // because otherwise the components coulld not have been the same size
+                                    // (which is necessary for isolation condition)
+                                    //
+                                    // so: DO NOTHING
+                                } else {
+                                    // likewise, if it's a higher tier, definitely perform the cut
+                                    _pending_cuts.push_back({{c, d}, first_appeared_tier});
+                                    link_cut_tree.cut(c, d);
+                                    query_ett.cut(c, d);
+                                    transaction_log.push_back({{c, d}, DELETE});
+
+                                    // and push the link we just found
+                                    _pending_links.push_back({a, b});
+                                    link_cut_tree.link(a, b, tier + 1);
+                                    query_ett.link(a, b);
+                                    transaction_log.push_back({{a, b}, INSERT});
+                                    // and update the dsu
+                                }
+                            } else {
+                                // if there was no competing link between the endpoints in the LCT,
+                                // then we just link them.
+                                _pending_links.push_back({a, b});
+                                link_cut_tree.link(a, b, tier + 1);
+                                query_ett.link(a, b);
+                                transaction_log.push_back({{a, b}, INSERT});
+                            }
+                        }
+                    }
+                }
+            }
+        });
+    // clear cas flags:
+    tbb::parallel_for(
+        tbb::blocked_range<size_t>(0, num_temp_roots),
+        [&](const tbb::blocked_range<size_t>& r) {
+            for (size_t i = r.begin(); i != r.end(); ++i) {
+                temp_roots[i]->clear_cas_flags();
+            }
+        });
+
+    // at this point, we know exactly what cuts and links we need to do at higher tiers.
+    // for each tier, we'll perform the cuts and links, and then add any entries to _updated_components[tier] that
+    // we need to.
+    // parlay::parallel_for(tier + 1, ett.size(), [&](size_t t) {
+    tbb::parallel_for(
+        tbb::blocked_range<size_t>(tier + 1, ett.size(), 1),
+        [&](const tbb::blocked_range<size_t> &r) {
+            for (size_t t = r.begin(); t != r.end(); ++t) {
+                // for (size_t t = tier + 1; t < ett.size(); t
+                for (auto &cut : _pending_cuts) {
+                    // do not perform cut if the edge has not yet appeared (duh?)
+                    if (cut.second < t)
+                        continue;
+                    // cut the edge in the current tier
+                    ett[t].cut(cut.first.src, cut.first.dst);
+                }
+                for (const Edge &link : _pending_links) {
+                    ett[t].link(link.src, link.dst);
+                }
+            }
+        },
+        tbb::static_partitioner{});
+    // });
+
+    // at this point, all links and cuts induced have been performed, and we have a log
+    // of components that need to be checked for isolation in the next tier.
+    _pending_links.clear();
+    _pending_cuts.clear();
+    _already_checked_components.clear();
+
+    return components_maximized;
+
+}
+
+template class BatchTiers<DefaultSketchColumn>; 
\ No newline at end of file
diff --git a/src/euler_tour_tree.cpp b/src/euler_tour_tree.cpp
index a0ef06d..a79cbed 100644
--- a/src/euler_tour_tree.cpp
+++ b/src/euler_tour_tree.cpp
@@ -2,49 +2,101 @@
 
 #include <euler_tour_tree.h>
 
-EulerTourTree::EulerTourTree(node_id_t num_nodes, uint32_t tier_num, int seed) {
-  // Initialize all the ETT node
-    ett_nodes.reserve(num_nodes);
-    for (node_id_t i = 0; i < num_nodes; ++i) {
-        ett_nodes.emplace_back(seed, i, tier_num);
+template <typename SketchClass, typename Container>
+    requires(SketchColumnConcept<SketchClass, vec_t>)
+EulerTourTree<SketchClass, Container>::EulerTourTree(node_id_t num_nodes, uint32_t tier_num, int seed) : temp_sketch(0, seed), seed(seed), max_num_nodes(num_nodes), tier_num(tier_num) {
+    if constexpr (std::is_same_v<Container, std::vector<EulerTourNode<SketchClass>>>) {
+        ett_nodes.reserve(num_nodes);
+        for (node_id_t i = 0; i < num_nodes; ++i) {
+            ett_nodes.emplace_back(seed, i, tier_num);
+        }
     }
-    // Initialize the temp_sketch
-    this->temp_sketch = new Sketch(sketch_len, seed, 1, sketch_err);
+    this->temp_sketch = SketchClass(
+        SketchClass::suggest_capacity(sketch_len), seed);
 }
 
-void EulerTourTree::link(node_id_t u, node_id_t v) {
-  ett_nodes[u].link(ett_nodes[v], temp_sketch);
+template <typename SketchClass, typename Container> requires(SketchColumnConcept<SketchClass, vec_t>)
+void EulerTourTree<SketchClass, Container>::link(node_id_t u, node_id_t v) {
+  ett_node(u).link(ett_node(v), temp_sketch);
 }
 
-void EulerTourTree::cut(node_id_t u, node_id_t v) {
-  ett_nodes[u].cut(ett_nodes[v], temp_sketch);
+template <typename SketchClass, typename Container> requires(SketchColumnConcept<SketchClass, vec_t>)
+void EulerTourTree<SketchClass, Container>::cut(node_id_t u, node_id_t v) {
+  ett_node(u).cut(ett_node(v), temp_sketch);
 }
 
-bool EulerTourTree::has_edge(node_id_t u, node_id_t v) {
-  return ett_nodes[u].has_edge_to(&ett_nodes[v]);
+template <typename SketchClass, typename Container> requires(SketchColumnConcept<SketchClass, vec_t>)
+bool EulerTourTree<SketchClass, Container>::has_edge(node_id_t u, node_id_t v) {
+  return ett_node(u).has_edge_to(&ett_node(v));
 }
 
-SkipListNode* EulerTourTree::update_sketch(node_id_t u, vec_t update_idx) {
-  return ett_nodes[u].update_sketch(update_idx);
+template <typename SketchClass, typename Container> requires(SketchColumnConcept<SketchClass, vec_t>)
+SkipListNode<SketchClass>* EulerTourTree<SketchClass, Container>::update_sketch(node_id_t u, vec_t update_idx) {
+  return ett_node(u).update_sketch(update_idx);
 }
 
-std::pair<SkipListNode*, SkipListNode*> EulerTourTree::update_sketches(node_id_t u, node_id_t v, vec_t update_idx) {
+template <typename SketchClass, typename Container> requires(SketchColumnConcept<SketchClass, vec_t>)
+SkipListNode<SketchClass>* EulerTourTree<SketchClass, Container>::update_sketch(node_id_t u, const ColumnEntryDelta &delta) {
+  return ett_node(u).update_sketch(delta);
+}
+
+template <typename SketchClass, typename Container> requires(SketchColumnConcept<SketchClass, vec_t>)
+SkipListNode<SketchClass>* EulerTourTree<SketchClass, Container>::update_sketch(node_id_t u, const ColumnEntryDeltas &deltas) {
+  if (deltas.size() >= 8) {
+    // std::cout << "Using temp sketch for batch of size " << deltas.size() << std::endl;
+    this->temp_sketch.zero_contents();
+    for (const auto& delta : deltas) {
+      this->temp_sketch.apply_entry_delta(delta);
+    }
+    return ett_node(u).update_sketch((const SketchClass&)temp_sketch);
+  }
+  else {
+    return ett_node(u).update_sketch(deltas);
+  }
+}
+
+template <typename SketchClass, typename Container> requires(SketchColumnConcept<SketchClass, vec_t>)
+SkipListNode<SketchClass>* EulerTourTree<SketchClass, Container>::update_sketch(node_id_t u, const SketchClass &sketch) {
+  return ett_node(u).update_sketch(sketch);
+}
+
+
+template <typename SketchClass, typename Container> requires(SketchColumnConcept<SketchClass, vec_t>)
+SkipListNode<SketchClass>* EulerTourTree<SketchClass, Container>::update_sketch_atomic(node_id_t u, vec_t update_idx) {
+  return ett_node(u).update_sketch_atomic(update_idx);
+}
+
+template <typename SketchClass, typename Container> requires(SketchColumnConcept<SketchClass, vec_t>)
+SkipListNode<SketchClass>* EulerTourTree<SketchClass, Container>::update_sketch_atomic(node_id_t u, const ColumnEntryDelta &delta) {
+  return ett_node(u).update_sketch_atomic(delta);
+}
+
+template <typename SketchClass, typename Container> requires(SketchColumnConcept<SketchClass, vec_t>)
+SkipListNode<SketchClass>* EulerTourTree<SketchClass, Container>::update_sketch_atomic(node_id_t u, const ColumnEntryDeltas &deltas) {
+  return ett_node(u).update_sketch_atomic(deltas);
+}
+
+template <typename SketchClass, typename Container> requires(SketchColumnConcept<SketchClass, vec_t>)
+std::pair<SkipListNode<SketchClass> *, SkipListNode<SketchClass> *>
+EulerTourTree<SketchClass, Container>::update_sketches(node_id_t u, node_id_t v,
+                                            vec_t update_idx) {
   // Update the paths in lockstep, stopping at the first common node
-  SkipListNode* curr1 = ett_nodes[u].allowed_caller;
-  SkipListNode* curr2 = ett_nodes[v].allowed_caller;
-	SkipListNode *prev1, *prev2;
+  SkipListNode<SketchClass>* curr1 = ett_node(u).allowed_caller;
+  SkipListNode<SketchClass>* curr2 = ett_node(v).allowed_caller;
+	SkipListNode<SketchClass> *prev1, *prev2;
+  ColumnEntryDelta delta = generate_entry_delta(u, update_idx);
 	while (curr1 || curr2) {
     if (curr1 == curr2) {
-      SkipListNode* root  = curr1->get_root();
+      SkipListNode<>* root  = curr1->get_root();
       return {root, root};
     }
     if (curr1) {
-      curr1->update_agg(update_idx);
+      curr1->update_agg_entry_delta(delta);
       prev1 = curr1;
       curr1 = prev1->get_parent();
     }
     if (curr2) {
-      curr2->update_agg(update_idx);
+      curr2->update_agg_entry_delta(delta);
       prev2 = curr2;
       curr2 = prev2->get_parent();
     }
@@ -52,106 +104,211 @@ std::pair<SkipListNode*, SkipListNode*> EulerTourTree::update_sketches(node_id_t
 	return {prev1, prev2};
 }
 
-SkipListNode* EulerTourTree::get_root(node_id_t u) {
-  return ett_nodes[u].get_root();
+template <typename SketchClass, typename Container> requires(SketchColumnConcept<SketchClass, vec_t>)
+SkipListNode<SketchClass>* EulerTourTree<SketchClass, Container>::get_root(node_id_t u) {
+  return ett_node(u).get_root();
 }
 
-Sketch* EulerTourTree::get_aggregate(node_id_t u) {
-  return ett_nodes[u].get_aggregate();
+template <typename SketchClass, typename Container> requires(SketchColumnConcept<SketchClass, vec_t>)
+const SketchClass& EulerTourTree<SketchClass, Container>::get_aggregate(node_id_t u) {
+  return ett_node(u).get_aggregate();
 }
 
-uint32_t EulerTourTree::get_size(node_id_t u) {
-  return ett_nodes[u].get_size();
+template <typename SketchClass, typename Container> requires(SketchColumnConcept<SketchClass, vec_t>)
+uint32_t EulerTourTree<SketchClass, Container>::get_size(node_id_t u) {
+  return ett_node(u).get_size();
 }
 
-EulerTourNode::EulerTourNode(long seed, node_id_t vertex, uint32_t tier) : seed(seed), vertex(vertex), tier(tier) {
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+EulerTourNode<SketchClass>::EulerTourNode(long seed, node_id_t vertex, uint32_t tier) : seed(seed), vertex(vertex), tier(tier) {
   // Initialize sentinel
-  this->make_edge(nullptr, nullptr);
+  this->make_edge(nullptr);
 }
 
-EulerTourNode::EulerTourNode(long seed) : seed(seed) {
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+EulerTourNode<SketchClass>::EulerTourNode(long seed) : seed(seed) {
   // Initialize sentinel
-  this->make_edge(nullptr, nullptr);
+  this->make_edge(nullptr);
 }
 
-EulerTourNode::~EulerTourNode() {
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+EulerTourNode<SketchClass>::~EulerTourNode() {
   // Final boundary nodes are a memory leak
   // Need to somehow delete all the skiplist nodes at the end
   // for (auto edge : edges)
   //   edge.second->uninit_element(false);
 }
 
-SkipListNode* EulerTourNode::make_edge(EulerTourNode* other, Sketch* temp_sketch) {
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+SkipListNode<SketchClass>* EulerTourNode<SketchClass>::make_edge(EulerTourNode<SketchClass>* other, SketchClass &temp_sketch) {
   assert(!other || this->tier == other->tier);
   //Constructing a new SkipListNode with pointer to this ETT object
-  SkipListNode* node;
+  SkipListNode<SketchClass>* node;
   if (allowed_caller == nullptr) {
-    node = SkipListNode::init_element(this, true);
+    node = SkipListNode<SketchClass>::init_element(this, true);
     allowed_caller = node;
-    if (temp_sketch != nullptr) {
+    if (temp_sketch.is_initialized()) {
       node->update_path_agg(temp_sketch);
-      temp_sketch->zero_contents();
+      // note: this is really poorly written,
+      // but we KNOW that a move was not performed here.
+      // because in this branch, node is instantiated with a sketch
+      temp_sketch.zero_contents();
     }
   } else {
-    node = SkipListNode::init_element(this, false);
+    node = SkipListNode<SketchClass>::init_element(this, false);
   }
   //Add the new SkipListNode to the edge list
   return this->edges.emplace(std::make_pair(other, node)).first->second;
   //Returns the new node pointer or the one that already existed if it did
 }
 
-void EulerTourNode::delete_edge(EulerTourNode* other, Sketch* temp_sketch) {
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+SkipListNode<SketchClass>* EulerTourNode<SketchClass>::make_edge(EulerTourNode<SketchClass>* other) {
   assert(!other || this->tier == other->tier);
-  SkipListNode* node_to_delete = this->edges[other];
+  //Constructing a new SkipListNode with pointer to this ETT object
+  SkipListNode<SketchClass>* node;
+  if (allowed_caller == nullptr) {
+    node = SkipListNode<SketchClass>::init_element(this, true);
+    allowed_caller = node;
+  } else {
+    node = SkipListNode<SketchClass>::init_element(this, false);
+  }
+  //Add the new SkipListNode to the edge list
+  return this->edges.emplace(std::make_pair(other, node)).first->second;
+  //Returns the new node pointer or the one that already existed if it did
+}
+
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+void EulerTourNode<SketchClass>::delete_edge(EulerTourNode<SketchClass>* other, SketchClass& temp_sketch) {
+  assert(!other || this->tier == other->tier);
+  SkipListNode<SketchClass>* node_to_delete = this->edges[other];
   this->edges.erase(other);
   if (node_to_delete == allowed_caller) {
     if (this->edges.empty()) {
       allowed_caller = nullptr;
       node_to_delete->process_updates();
       // std::cout << node_to_delete << std::endl;
-      temp_sketch->merge(*node_to_delete->sketch_agg);
-      node_to_delete->sketch_agg = nullptr;
+      // temp_sketch = std::move(node_to_delete->sketch_agg);
+      temp_sketch.merge(std::move(node_to_delete->sketch_agg));
+      // node_to_delete->sketch_agg = nullptr;
+      node_to_delete->sketch_agg = SketchClass(0, seed); // We just gave the sketch to new allowed caller
     } else {
       allowed_caller = this->edges.begin()->second;
       node_to_delete->process_updates();
       allowed_caller->update_path_agg(node_to_delete->sketch_agg);
-      node_to_delete->sketch_agg = nullptr; // We just gave the sketch to new allowed caller
+      node_to_delete->sketch_agg = SketchClass(0, seed); // We just gave the sketch to new allowed caller
     }
   }
   node_to_delete->uninit_element(true);
 }
 
-SkipListNode* EulerTourNode::update_sketch(vec_t update_idx) {
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+SkipListNode<SketchClass>* EulerTourNode<SketchClass>::update_sketch(vec_t update_idx) {
   assert(allowed_caller);
   return this->allowed_caller->update_path_agg(update_idx);
 }
 
-SkipListNode* EulerTourNode::get_root() {
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+SkipListNode<SketchClass>* EulerTourNode<SketchClass>::update_sketch(const ColumnEntryDelta &delta) {
+  assert(allowed_caller);
+  return this->allowed_caller->update_path_agg(delta);
+}
+
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+SkipListNode<SketchClass>* EulerTourNode<SketchClass>::update_sketch(const ColumnEntryDeltas &deltas) {
+  assert(allowed_caller);
+  return this->allowed_caller->update_path_agg(deltas);
+}
+
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+SkipListNode<SketchClass>* EulerTourNode<SketchClass>::update_sketch(const SketchClass &sketch) {
+  assert(allowed_caller);
+  return this->allowed_caller->update_path_agg(sketch);
+}
+
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+SkipListNode<SketchClass>* EulerTourNode<SketchClass>::update_sketch_atomic(vec_t update_idx) {
+  assert(allowed_caller);
+  return this->allowed_caller->update_path_agg_atomic(update_idx);
+}
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+SkipListNode<SketchClass>* EulerTourNode<SketchClass>::update_sketch_atomic(const ColumnEntryDelta &delta) {
+  assert(allowed_caller);
+  return this->allowed_caller->update_path_agg_atomic(delta);
+}
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+SkipListNode<SketchClass>* EulerTourNode<SketchClass>::update_sketch_atomic(const ColumnEntryDeltas &deltas) {
+  assert(allowed_caller);
+  return this->allowed_caller->update_path_agg_atomic(deltas);
+}
+
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+SkipListNode<SketchClass>* EulerTourNode<SketchClass>::update_sketch_noagg_atomic(const ColumnEntryDelta &delta) {
+  assert(allowed_caller);
+  this->allowed_caller->update_agg_entry_delta(delta);
+  return this->allowed_caller;
+}
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+SkipListNode<SketchClass>* EulerTourNode<SketchClass>::update_sketch_atomic_to_level(const ColumnEntryDelta &delta, uint32_t level) {
+  assert(allowed_caller);
+  // return this->allowed_caller->update_agg_atomic_to_level(level);
+
+  SkipListNode<SketchClass>* curr = this->allowed_caller;
+  SkipListNode<SketchClass>* prev = nullptr;
+  while (curr != nullptr && level > 0) {
+    curr->update_agg_atomic_entry_delta(delta);
+    prev = curr;
+    curr = curr->get_parent();
+    level--;
+  }
+  if (prev) {
+      std::atomic_ref<int8_t> atomic_needs_update(prev->needs_update);
+      atomic_needs_update.store(
+          AggUpdateState::PARENT_IS_STALE,
+          std::memory_order_relaxed
+      );
+  }
+  return prev;
+}
+
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+void EulerTourNode<SketchClass>::recompute_aggregates_parallel() {
+  assert(allowed_caller);
+}
+
+
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+SkipListNode<SketchClass>* EulerTourNode<SketchClass>::get_root() const {
   return this->allowed_caller->get_root();
 }
 
 //Get the aggregate sketch at the root of the ETT for this node
-Sketch* EulerTourNode::get_aggregate() {
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+const SketchClass& EulerTourNode<SketchClass>::get_aggregate() {
   assert(allowed_caller);
   return this->allowed_caller->get_list_aggregate();
 }
 
-uint32_t EulerTourNode::get_size() {
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+uint32_t EulerTourNode<SketchClass>::get_size() {
   return this->allowed_caller->get_list_size();
 }
 
-bool EulerTourNode::has_edge_to(EulerTourNode* other) {
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+bool EulerTourNode<SketchClass>::has_edge_to(EulerTourNode<SketchClass>* other) {
   return !(this->edges.find(other) == this->edges.end());
 }
 
-std::set<EulerTourNode*> EulerTourNode::get_component() {
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+std::set<EulerTourNode<SketchClass>*> EulerTourNode<SketchClass>::get_component() {
   return this->allowed_caller->get_component();
 }
 
-bool EulerTourNode::link(EulerTourNode& other, Sketch* temp_sketch) {
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+bool EulerTourNode<SketchClass>::link(EulerTourNode<SketchClass>& other, SketchClass& temp_sketch) {
   assert(this->tier == other.tier);
-  SkipListNode* this_sentinel = this->edges.begin()->second->get_last();
-  SkipListNode* other_sentinel = other.edges.begin()->second->get_last();
+  SkipListNode<SketchClass>* this_sentinel = this->edges.begin()->second->get_last();
+  SkipListNode<SketchClass>* other_sentinel = other.edges.begin()->second->get_last();
 
   // There should always be a sentinel
   assert(this_sentinel == this_sentinel->node->edges.at(nullptr));
@@ -171,19 +328,19 @@ bool EulerTourNode::link(EulerTourNode& other, Sketch* temp_sketch) {
   // ^                    ^
   // '--------------------'--- might be null
 
-  SkipListNode* aux_this_right = this->edges.begin()->second;
-  SkipListNode* aux_this_left = SkipListNode::split_left(aux_this_right);
+  SkipListNode<SketchClass>* aux_this_right = this->edges.begin()->second;
+  SkipListNode<SketchClass>* aux_this_left = SkipListNode<SketchClass>::split_left(aux_this_right);
 
   // Unlink and destroy other_sentinel
-  SkipListNode* aux_other = SkipListNode::split_left(other_sentinel);
+  SkipListNode<SketchClass>* aux_other = SkipListNode<SketchClass>::split_left(other_sentinel);
   other_sentinel->node->delete_edge(nullptr, temp_sketch);
 
-  SkipListNode* aux_other_left, *aux_other_right;
+  SkipListNode<SketchClass>* aux_other_left, *aux_other_right;
   if (aux_other == nullptr) {
     aux_other_right = aux_other_left = nullptr;
   } else {
     aux_other_right = other.edges.begin()->second;
-    aux_other_left = SkipListNode::split_left(aux_other_right);
+    aux_other_left = SkipListNode<SketchClass>::split_left(aux_other_right);
   }
 
   // reroot other tree
@@ -191,45 +348,60 @@ bool EulerTourNode::link(EulerTourNode& other, Sketch* temp_sketch) {
   // R  LR           L    R  LR           L
   // N                    N
 
-  SkipListNode* aux_edge_left = this->make_edge(&other, temp_sketch);
-  SkipListNode* aux_edge_right = other.make_edge(this, temp_sketch);
+  SkipListNode<SketchClass>* aux_edge_left = this->make_edge(&other, temp_sketch);
+  SkipListNode<SketchClass>* aux_edge_right = other.make_edge(this, temp_sketch);
 
-  SkipListNode::join(aux_this_left, aux_edge_left, aux_other_right,
+  SkipListNode<SketchClass>::join(aux_this_left, aux_edge_left, aux_other_right,
       aux_other_left, aux_edge_right, aux_this_right);
 
   return true;
 }
 
-bool EulerTourNode::cut(EulerTourNode& other, Sketch* temp_sketch) {
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+bool EulerTourNode<SketchClass>::cut(EulerTourNode<SketchClass>& other, SketchClass& temp_sketch) {
   assert(this->tier == other.tier);
   if (this->edges.find(&other) == this->edges.end()) {
     assert(other.edges.find(this) == other.edges.end());
     return false;
   }
-  SkipListNode* e1 = this->edges[&other];
-  SkipListNode* e2 = other.edges[this];
+  SkipListNode<SketchClass>* e1 = this->edges[&other];
+  SkipListNode<SketchClass>* e2 = other.edges[this];
 
-  SkipListNode* frag1r = SkipListNode::split_right(e1);
+  SkipListNode<SketchClass>* frag1r = SkipListNode<SketchClass>::split_right(e1);
   bool order_is_e1e2 = e2->get_last() != e1;
-  SkipListNode* frag1l = SkipListNode::split_left(e1);
+  SkipListNode<SketchClass>* frag1l = SkipListNode<SketchClass>::split_left(e1);
   this->delete_edge(&other, temp_sketch);
-  SkipListNode* frag2r = SkipListNode::split_right(e2);
-  SkipListNode* frag2l = SkipListNode::split_left(e2);
+  SkipListNode<SketchClass>* frag2r = SkipListNode<SketchClass>::split_right(e2);
+  SkipListNode<SketchClass>* frag2l = SkipListNode<SketchClass>::split_left(e2);
   other.delete_edge(this, temp_sketch);
 
   if (order_is_e1e2) {
     // e1 is to the left of e2
     // e2 should be made into a sentinel
-    SkipListNode* sentinel = other.make_edge(nullptr, temp_sketch);
-    SkipListNode::join(frag2l, sentinel);
-    SkipListNode::join(frag1l, frag2r);
+    SkipListNode<SketchClass>* sentinel = other.make_edge(nullptr, temp_sketch);
+    SkipListNode<SketchClass>::join(frag2l, sentinel);
+    SkipListNode<SketchClass>::join(frag1l, frag2r);
   } else {
     // e2 is to the left of e1
     // e1 should be made into a sentinel
-    SkipListNode* sentinel = this->make_edge(nullptr, temp_sketch);
-    SkipListNode::join(frag2r, sentinel);
-    SkipListNode::join(frag2l, frag1r);
+    SkipListNode<SketchClass>* sentinel = this->make_edge(nullptr, temp_sketch);
+    SkipListNode<SketchClass>::join(frag2r, sentinel);
+    SkipListNode<SketchClass>::join(frag2l, frag1r);
   }
 
   return true;
 }
+
+
+template class EulerTourNode<DefaultSketchColumn>;
+
+// using VectorContainer = std::vector<EulerTourNode<DefaultSketchColumn>>;
+// using HashmapContainer = absl::flat_hash_map<node_id_t, EulerTourNode<DefaultSketchColumn>*>;
+template class EulerTourTree<DefaultSketchColumn, VectorContainer>;
+template class EulerTourTree<DefaultSketchColumn, HashmapContainer>;
+
+
+using ETTWithHashmap = EulerTourTree<DefaultSketchColumn, absl::flat_hash_map<node_id_t, SkipListNode<DefaultSketchColumn>*>>;
+using ETTWithVector = EulerTourTree<DefaultSketchColumn, std::vector<EulerTourNode<DefaultSketchColumn>>>;
+
+// template std::ostream& operator<<(std::ostream&, const EulerTourNode<FixedSizeSketchColumn>&);
\ No newline at end of file
diff --git a/src/graph_tiers.cpp b/src/graph_tiers.cpp
index 4272bd9..09790ad 100644
--- a/src/graph_tiers.cpp
+++ b/src/graph_tiers.cpp
@@ -21,7 +21,8 @@ long tiers_grown = 0;
 long normal_refreshes = 0;
 
 
-GraphTiers::GraphTiers(node_id_t num_nodes) : link_cut_tree(num_nodes) {
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+GraphTiers<SketchClass>::GraphTiers(node_id_t num_nodes, uint64_t seed) : link_cut_tree(num_nodes) {
 	// Algorithm parameters
 	uint32_t num_tiers = log2(num_nodes)/(log2(3)-1);
 
@@ -29,7 +30,7 @@ GraphTiers::GraphTiers(node_id_t num_nodes) : link_cut_tree(num_nodes) {
 	std::random_device dev;
     std::mt19937 rng(dev());
     std::uniform_int_distribution<std::mt19937::result_type> dist(0,MAX_INT);
-    int seed = dist(rng);
+    // int seed = dist(rng);
     std::cout << "SEED: " << seed << std::endl;
     rng.seed(seed);
 	dist(rng); // To give 1:1 correspondence with MPI seeds
@@ -38,49 +39,60 @@ GraphTiers::GraphTiers(node_id_t num_nodes) : link_cut_tree(num_nodes) {
 		ett.emplace_back(num_nodes, i, tier_seed);
 	}
 
-	root_nodes.reserve(num_tiers*2);
+	root_nodes.resize(num_tiers*2);
 }
 
-GraphTiers::~GraphTiers() {}
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+GraphTiers<SketchClass>::~GraphTiers() {}
 
-void GraphTiers::update(GraphUpdate update) {
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+void GraphTiers<SketchClass>::update(GraphUpdate update) {
 	edge_id_t edge = VERTICES_TO_EDGE(update.edge.src, update.edge.dst);
 	// Update the sketches of both endpoints of the edge in all tiers
 	if (update.type == DELETE && link_cut_tree.has_edge(update.edge.src, update.edge.dst)) {
 		link_cut_tree.cut(update.edge.src, update.edge.dst);
 	}
 	START(su);
-	#pragma omp parallel for
+	std::atomic<bool> did_cut(false);
+	// #pragma omp parallel for
 	for (uint32_t i = 0; i < ett.size(); i++) {
 		if (update.type == DELETE && ett[i].has_edge(update.edge.src, update.edge.dst)) {
+			did_cut = true;
 			ett[i].cut(update.edge.src, update.edge.dst);
 			ENDPOINT_CANARY("Cutting Tier " << i << " ETT With", update.edge.src, update.edge.dst);
 		}
+		// maintain roots of u,v endpoints
 		root_nodes[2*i] = ett[i].update_sketch(update.edge.src, (vec_t)edge);
 		root_nodes[2*i+1] = ett[i].update_sketch(update.edge.dst, (vec_t)edge);
 		ENDPOINT_CANARY("Updating Sketch With", update.edge.src, update.edge.dst);
+		
 	}
 	STOP(sketch_time, su);
 	// Refresh the data structure
 	START(ref);
-	refresh(update);
+	this->refresh(update, did_cut);
 	STOP(refresh_time, ref);
 }
 
-void GraphTiers::refresh(GraphUpdate update) {
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+void GraphTiers<SketchClass>::refresh(GraphUpdate update, bool did_cut) {
 	// In parallel check if all tiers are not isolated
 	START(iso);
 	std::atomic<bool> isolated(false);
-	//#pragma omp parallel for
+	// #pragma omp parallel for
 	for (uint32_t tier = 0; tier < ett.size()-1; tier++) {
 		// Check if the tree containing first endpoint is isolated
 		uint32_t tier_size1 = root_nodes[2*tier]->size;
 		uint32_t next_size1 = root_nodes[2*(tier+1)]->size;
+		// NOTE - We know that we are a subset of the next tier's component
+		// by maintenance of variants. 
+		// thus, if the sizes are equal, we are not a proper subset
+		// but are a subset. This means we are violating 
 		if (tier_size1 == next_size1) {
 			root_nodes[2*tier]->process_updates();
-			Sketch* ett_agg1 = root_nodes[2*tier]->sketch_agg;
-			ett_agg1->reset_sample_state();
-			SketchSample query_result1 = ett_agg1->sample();
+			SketchClass &ett_agg1 = root_nodes[2*tier]->sketch_agg;
+			ett_agg1.reset_sample_state();
+			SketchSample<> query_result1 = ett_agg1.sample();
 			if (query_result1.result == GOOD) {
 				isolated = true;
 				continue;
@@ -91,9 +103,9 @@ void GraphTiers::refresh(GraphUpdate update) {
 		uint32_t next_size2 = root_nodes[2*(tier+1)+1]->size;
 		if (tier_size2 == next_size2) {
 			root_nodes[2*tier+1]->process_updates();
-			Sketch* ett_agg2 = root_nodes[2*tier+1]->sketch_agg;
-			ett_agg2->reset_sample_state();
-			SketchSample query_result2 = ett_agg2->sample();
+			SketchClass &ett_agg2 = root_nodes[2*tier+1]->sketch_agg;
+			ett_agg2.reset_sample_state();
+			SketchSample query_result2 = ett_agg2.sample();
 			if (query_result2.result == GOOD) {
 				isolated = true;
 				continue;
@@ -101,11 +113,12 @@ void GraphTiers::refresh(GraphUpdate update) {
 		}
 	}
 	STOP(parallel_isolated_check, iso);
+	if (isolated || did_cut) normal_refreshes++;
 	if (!isolated)
 		return;
-	normal_refreshes++;
 	// For each tier for each endpoint of the edge
 	for (uint32_t tier = 0; tier < ett.size()-1; tier++) {
+		bool both_components_maximized = true;
 		for (node_id_t v : {update.edge.src, update.edge.dst}) {
 			// Check if the tree containing this endpoint is isolated
 			START(size);
@@ -117,14 +130,18 @@ void GraphTiers::refresh(GraphUpdate update) {
 				continue;
 
 			START(agg);
-			SkipListNode* root = ett[tier].get_root(v);
+			SkipListNode<SketchClass>* root = ett[tier].get_root(v);
 			root->process_updates();
-			Sketch* ett_agg = root->sketch_agg;
+			SketchClass &ett_agg = root->sketch_agg;
 			STOP(ett_get_agg, agg);
 			START(sq);
-			ett_agg->reset_sample_state();
-			SketchSample query_result = ett_agg->sample();
+			ett_agg.reset_sample_state();
+			SketchSample query_result = ett_agg.sample();
 			STOP(sketch_query, sq);
+			
+			if (query_result.result != ZERO) {
+				both_components_maximized = false;
+			}
 
 			// Check for new edge to eliminate isolation
 			if (query_result.result != GOOD)
@@ -150,7 +167,7 @@ void GraphTiers::refresh(GraphUpdate update) {
 
 				// Remove the maximum tier edge on all paths where it exists
 				START(ett1);
-				#pragma omp parallel for
+				// #pragma omp parallel for
 				for (uint32_t i = max.second; i < ett.size(); i++) {
 					ett[i].cut(c,d);
 					ENDPOINT_CANARY("Cutting Tier " << i << " ETT With", c, d);
@@ -163,7 +180,7 @@ void GraphTiers::refresh(GraphUpdate update) {
 
 			// Join the ETTs for the endpoints of the edge on all tiers above the current
 			START(ett2);
-			#pragma omp parallel for
+			// #pragma omp parallel for
 			for (uint32_t i = tier+1; i < ett.size(); i++) {
 				ett[i].link(a,b);
 				ENDPOINT_CANARY("Linking Tier " << i << " ETT With", a, b);
@@ -173,16 +190,20 @@ void GraphTiers::refresh(GraphUpdate update) {
 			link_cut_tree.link(a,b, tier+1);
 			STOP(lct_time, lct4);
 		}
+		// if (both_components_maximized) {
+		// 	break;
+		// }
 	}
 }
 
-std::vector<std::set<node_id_t>> GraphTiers::get_cc() {
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+std::vector<std::set<node_id_t>> GraphTiers<SketchClass>::get_cc() {
 	std::vector<std::set<node_id_t>> cc;
-	std::set<EulerTourNode*> visited;
+	std::set<EulerTourNode<SketchClass>*> visited;
 	int top = ett.size()-1;
 	for (uint32_t i = 0; i < ett[top].ett_nodes.size(); i++) {
-		if (visited.find(&ett[top].ett_nodes[i]) == visited.end()) {
-			std::set<EulerTourNode*> pointer_component = ett[top].ett_nodes[i].get_component();
+		if (visited.find(&ett[top].ett_node(i)) == visited.end()) {
+			std::set<EulerTourNode<SketchClass>*> pointer_component = ett[top].ett_node(i).get_component();
 			std::set<node_id_t> component;
 			for (auto pointer : pointer_component) {
 				component.insert(pointer->vertex);
@@ -194,6 +215,9 @@ std::vector<std::set<node_id_t>> GraphTiers::get_cc() {
 	return cc;
 }
 
-bool GraphTiers::is_connected(node_id_t a, node_id_t b) {
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+bool GraphTiers<SketchClass>::is_connected(node_id_t a, node_id_t b) {
 	return this->link_cut_tree.find_root(a) == this->link_cut_tree.find_root(b);
 }
+
+template class GraphTiers<DefaultSketchColumn>;
\ No newline at end of file
diff --git a/src/input_node.cpp b/src/input_node.cpp
index 0b3430a..291232c 100644
--- a/src/input_node.cpp
+++ b/src/input_node.cpp
@@ -3,6 +3,7 @@
 
 long normal_refreshes = 0;
 long dt_operation_time = 0;
+long num_updates = 0;
 
 InputNode::InputNode(node_id_t num_nodes, uint32_t num_tiers, int batch_size, int seed) :
     num_nodes(num_nodes), num_tiers(num_tiers), link_cut_tree(num_nodes), query_ett(num_nodes, 0, seed) {
@@ -24,6 +25,7 @@ InputNode::~InputNode() {
 }
 
 void InputNode::update(GraphUpdate update) {
+    num_updates++;
     UpdateMessage update_message;
     update_message.update = update;
     update_buffer[buffer_size++] = update_message;
@@ -34,10 +36,16 @@ void InputNode::update(GraphUpdate update) {
 void InputNode::process_updates() {
     if (buffer_size == 1)
         return;
+    // BUFFER PRE-PROCESSING !
+    // for every update; if we know it's isolated (adds new connectivity) info,
+    // swap it to the front of the buffer
+
+
     uint32_t num_updates = buffer_size-1;
     // If less than 1/10 of the last updates are isolated use sliding window
     bool prev_strat = using_sliding_window;
-    using_sliding_window = false;//(isolation_count<history_size/10) ? true : false;
+    // using_sliding_window = false;//(isolation_count<history_size/10) ? true : false;
+    using_sliding_window = true;
     if (using_sliding_window != prev_strat)
         std::cout << "SWITCHED TO " << (using_sliding_window ? "SLIDING WINDOW" : "NORMAL STRAT") << std::endl;
     // Broadcast the batch of updates to all nodes
@@ -48,10 +56,15 @@ void InputNode::process_updates() {
     for (uint32_t i = 0; i < num_updates; i++) {
         GraphUpdate update = update_buffer[i+1].update;
         split_revert_buffer[i] = MAX_INT;
-        unlikely_if (update.type == DELETE && link_cut_tree.has_edge(update.edge.src, update.edge.dst)) {
-            split_revert_buffer[i] = link_cut_tree.get_edge_weight(update.edge.src, update.edge.dst);
+        unlikely_if (update.type == DELETE && query_ett.has_edge(update.edge.src, update.edge.dst)) {
+            std::pair<Edge, int8_t> max_edge = link_cut_tree.path_query(update.edge.src, update.edge.dst);
+            split_revert_buffer[i] = max_edge.second;
+            // probably where most structural (spanning forest) deletes happen?
+            // potentially - revisit
             link_cut_tree.cut(update.edge.src, update.edge.dst);
             query_ett.cut(update.edge.src, update.edge.dst);
+            // transaction_log.add(update.edge, DELETE);
+            transaction_log.push_back(update);
         }
     }
     // Attempt to do the entire batch parallel with greedy refresh
@@ -70,6 +83,9 @@ void InputNode::process_updates() {
         unlikely_if (split_revert_buffer[update_idx-1] != MAX_INT) {
             link_cut_tree.link(update.edge.src, update.edge.dst, split_revert_buffer[update_idx-1]);
             query_ett.link(update.edge.src, update.edge.dst);
+            // transaction_log.add(update.edge, generate_entry_dINSERT);
+            // // TODO - not actually sure if update is an insert type
+            transaction_log.push_back(GraphUpdate{update.edge, INSERT});
         }
     }
     // Update the isolation history
@@ -85,9 +101,11 @@ void InputNode::process_updates() {
     for (int update_idx = minimum_isolated_update; update_idx < end_update_idx; update_idx++) {
         GraphUpdate update = update_buffer[update_idx].update;
         START(dt_operation_timer1);
-        unlikely_if (update.type == DELETE && link_cut_tree.has_edge(update.edge.src, update.edge.dst)) {
+        unlikely_if (update.type == DELETE && query_ett.has_edge(update.edge.src, update.edge.dst)) {
             link_cut_tree.cut(update.edge.src, update.edge.dst);
             query_ett.cut(update.edge.src, update.edge.dst);
+            // transaction_log.add(update.edge, DELETE);
+            transaction_log.push_back(update);
         }
         STOP(dt_operation_time, dt_operation_timer1);
         uint32_t start_tier = 0;
@@ -102,21 +120,27 @@ void InputNode::process_updates() {
         MPI_Send(&refresh_message, sizeof(RefreshMessage), MPI_BYTE, start_tier+1, 0, MPI_COMM_WORLD);
         for (uint32_t tier = start_tier; tier < num_tiers; tier++) {
             int rank = tier + 1;
+            // bool break_early = true;
             if (tier != 0)
             for (auto endpoint : {0,1}) {
                 std::ignore = endpoint;
                 // Receive a broadcast to see if the current tier/endpoint is isolated or not
                 EttUpdateMessage update_message;
                 bcast(&update_message, sizeof(UpdateMessage), rank);
-                if (update_message.type == NOT_ISOLATED)
+                if (update_message.type == NOT_ISOLATED) {
                     continue;
+                }
+                // else {
+                //     break_early = false;
+                // }
                 this_update_isolated = true;
                 // Process a LCT query message first
                 LctResponseMessage response_message;
-                response_message.connected = link_cut_tree.find_root(update_message.endpoint1) == link_cut_tree.find_root(update_message.endpoint2);
+                // response_message.connected = link_cut_tree.find_root(update_message.endpoint1) == link_cut_tree.find_root(update_message.endpoint2);
+                response_message.connected = query_ett.is_connected(update_message.endpoint1, update_message.endpoint2);
                 if (response_message.connected) {
-                    std::pair<edge_id_t, uint32_t> max = link_cut_tree.path_aggregate(update_message.endpoint1, update_message.endpoint2);
-                    response_message.cycle_edge = max.first;
+                    std::pair<Edge, int8_t> max = link_cut_tree.path_query(update_message.endpoint1, update_message.endpoint2);
+                    response_message.cycle_edge = VERTICES_TO_EDGE(max.first.src, max.first.dst);
                     response_message.weight = max.second;
                 }
                 MPI_Send(&response_message, sizeof(LctResponseMessage), MPI_BYTE, rank, 0, MPI_COMM_WORLD);
@@ -130,14 +154,21 @@ void InputNode::process_updates() {
                     if (update_message.type == LINK) {
                         link_cut_tree.link(update_message.endpoint1, update_message.endpoint2, update_message.start_tier);
                         query_ett.link(update_message.endpoint1, update_message.endpoint2);
+                        // transaction_log.add(update_message, INSERT);
+                        transaction_log.push_back(
+                            GraphUpdate{Edge{update_message.endpoint1, update_message.endpoint2}, INSERT});
                         break;
                     } else if (update_message.type == CUT) {
                         link_cut_tree.cut(update_message.endpoint1, update_message.endpoint2);
                         query_ett.cut(update_message.endpoint1, update_message.endpoint2);
+                        // transaction_log.add(update_message, DELETE);
+                        transaction_log.push_back(
+                            GraphUpdate{Edge{update_message.endpoint1, update_message.endpoint2}, DELETE});
                     }
                     STOP(dt_operation_time, dt_operation_timer2);
                 }
             }
+            // if (break_early) break;
         }
         isolation_count -= (int)isolation_history_queue.front();
         isolation_history_queue.pop();
@@ -180,4 +211,5 @@ void InputNode::end() {
      std::cout << "======================= INPUT NODE ======================" << std::endl;
      std::cout << "Dynamic tree operations time (ms): " << dt_operation_time/1000 << std::endl;
      std::cout << "Normal refreshes: " << normal_refreshes << std::endl;
+     std::cout << "Number of updates: " << num_updates << std::endl;
 }
diff --git a/src/link_cut_tree.cpp b/src/link_cut_tree.cpp
index a7e3dde..19def89 100644
--- a/src/link_cut_tree.cpp
+++ b/src/link_cut_tree.cpp
@@ -228,14 +228,25 @@ LinkCutNode* LinkCutNode::splay() {
     return this;
 }
 
-LinkCutTree::LinkCutTree(node_id_t num_nodes) : nodes(num_nodes) {}
 
-LinkCutNode* LinkCutTree::join(LinkCutNode* v, LinkCutNode* w) {
+template <typename Container>
+LinkCutTree<Container>::LinkCutTree(node_id_t num_nodes) : max_nodes(num_nodes) {
+    if constexpr (std::is_same_v<Container, std::vector<LinkCutNode>>) {
+        nodes = Container(num_nodes);
+        nodes.reserve(num_nodes);
+        for (node_id_t i = 0; i < num_nodes; ++i)
+            nodes.emplace_back();
+    }
+    initialize_all_nodes();
+}
+
+template <typename Container>
+LinkCutNode* LinkCutTree<Container>::join(LinkCutNode* v, LinkCutNode* w) {
     assert(v != nullptr && w != nullptr && v->get_parent() == nullptr && w->get_parent() == nullptr);
     LinkCutNode* tail = v->get_tail();
     LinkCutNode* head = w->get_head();
-    node_id_t tail_id = tail-&(this->nodes[0]);
-    node_id_t head_id = head-&(this->nodes[0]);
+    node_id_t tail_id = tail - this->get_node_ptr(0);
+    node_id_t head_id = head - this->get_node_ptr(0);
     edge_id_t edge = (tail_id < head_id) ? (((edge_id_t)tail_id << 32) + head_id) : (((edge_id_t)head_id << 32) + tail_id);
     tail->make_preferred_edge(edge);
     head->make_preferred_edge(edge);
@@ -248,15 +259,16 @@ LinkCutNode* LinkCutTree::join(LinkCutNode* v, LinkCutNode* w) {
     return tail;
 }
 
-std::pair<LinkCutNode*, LinkCutNode*> LinkCutTree::split(LinkCutNode* v) {
+template <typename Container>
+std::pair<LinkCutNode*, LinkCutNode*> LinkCutTree<Container>::split(LinkCutNode* v) {
     assert(v != nullptr);
     v->splay();
     LinkCutNode* r = v->get_right();
     LinkCutNode* w = nullptr;
     if (r != nullptr) {
         w = r->recompute_head();
-        node_id_t v_id = v-&(this->nodes[0]);
-        node_id_t w_id = w-&(this->nodes[0]);
+        node_id_t v_id = v - this->get_node_ptr(0);
+        node_id_t w_id = w - this->get_node_ptr(0);
         edge_id_t edge = (v_id < w_id) ? (((edge_id_t)v_id << 32) + w_id) : (((edge_id_t)w_id << 32) + v_id);
         v->unmake_preferred_edge(edge);
         w->unmake_preferred_edge(edge);
@@ -273,14 +285,16 @@ std::pair<LinkCutNode*, LinkCutNode*> LinkCutTree::split(LinkCutNode* v) {
     return paths;
 }
 
-LinkCutNode* LinkCutTree::splice(LinkCutNode* p) {
+template <typename Container>
+LinkCutNode* LinkCutTree<Container>::splice(LinkCutNode* p) {
     LinkCutNode* v = p->get_head()->get_dparent();
     std::pair<LinkCutNode*, LinkCutNode*> paths = this->split(v);
     p->get_head()->set_dparent(nullptr);
     return this->join(paths.first, p);
 }
 
-LinkCutNode* LinkCutTree::expose(LinkCutNode* v) {
+template <typename Container>
+LinkCutNode* LinkCutTree<Container>::expose(LinkCutNode* v) {
     std::pair<LinkCutNode*, LinkCutNode*> paths = this->split(v);
     LinkCutNode* p = paths.first;
     while(p->get_head()->get_dparent() != nullptr) {
@@ -289,7 +303,8 @@ LinkCutNode* LinkCutTree::expose(LinkCutNode* v) {
     return p;
 }
 
-LinkCutNode* LinkCutTree::evert(LinkCutNode* v) {
+template <typename Container>
+LinkCutNode* LinkCutTree<Container>::evert(LinkCutNode* v) {
     LinkCutNode* p = this->expose(v);
     p->reverse();
     p->recompute_head();
@@ -297,10 +312,11 @@ LinkCutNode* LinkCutTree::evert(LinkCutNode* v) {
     return p;
 }
 
-void LinkCutTree::link(node_id_t v, node_id_t w, uint32_t weight) {
+template <typename Container>
+void LinkCutTree<Container>::link(node_id_t v, node_id_t w, uint32_t weight) {
     assert(find_root(v) != find_root(w));
-    LinkCutNode* v_node = &this->nodes[v];
-    LinkCutNode* w_node = &this->nodes[w];
+    LinkCutNode* v_node = this->get_node_ptr(v);
+    LinkCutNode* w_node = this->get_node_ptr(w);
     edge_id_t edge = (v < w) ? (((edge_id_t)v << 32) + w) : (((edge_id_t)w << 32) + v);
     v_node->insert_edge(edge, weight);
     w_node->insert_edge(edge, weight);
@@ -311,10 +327,11 @@ void LinkCutTree::link(node_id_t v, node_id_t w, uint32_t weight) {
     this->join(p_v, p_w);
 }
 
-void LinkCutTree::cut(node_id_t v, node_id_t w) {
+template <typename Container>
+void LinkCutTree<Container>::cut(node_id_t v, node_id_t w) {
     assert(find_root(v) == find_root(w));
-    LinkCutNode* v_node = &this->nodes[v];
-    LinkCutNode* w_node = &this->nodes[w];
+    LinkCutNode* v_node = this->get_node_ptr(v);
+    LinkCutNode* w_node = this->get_node_ptr(w);
     edge_id_t edge = (v < w) ? (((edge_id_t)v << 32) + w) : (((edge_id_t)w << 32) + v);
     v_node->remove_edge(edge);
     w_node->remove_edge(edge);
@@ -323,36 +340,42 @@ void LinkCutTree::cut(node_id_t v, node_id_t w) {
     w_node->set_dparent(nullptr);
 }
 
-void* LinkCutTree::find_root(node_id_t v) {
-    return this->expose(&this->nodes[v])->get_head();
+template <typename Container>
+void* LinkCutTree<Container>::find_root(node_id_t v) {
+    return this->expose(this->get_node_ptr(v))->get_head();
 }
 
-std::pair<edge_id_t, uint32_t> LinkCutTree::path_aggregate(node_id_t v, node_id_t w) {
+template <typename Container>
+std::pair<edge_id_t, uint32_t> LinkCutTree<Container>::path_aggregate(node_id_t v, node_id_t w) {
     assert(find_root(v) == find_root(w));
-    LinkCutNode* v_node = &this->nodes[v];
-    LinkCutNode* w_node = &this->nodes[w];
+    LinkCutNode* v_node = this->get_node_ptr(v);
+    LinkCutNode* w_node = this->get_node_ptr(w);
     this->evert(v_node);
     LinkCutNode* p = this->expose(w_node);
     return p->get_max_edge();
 }
 
-bool LinkCutTree::has_edge(node_id_t v1, node_id_t v2) {
+template <typename Container>
+bool LinkCutTree<Container>::has_edge(node_id_t v1, node_id_t v2) {
     edge_id_t e = VERTICES_TO_EDGE(v1, v2);
-    return nodes[v1].has_edge(e);
+    return this->node(v1).has_edge(e);
 }
 
-uint32_t LinkCutTree::get_edge_weight(node_id_t v1, node_id_t v2) {
+template <typename Container>
+uint32_t LinkCutTree<Container>::get_edge_weight(node_id_t v1, node_id_t v2) {
     edge_id_t e = VERTICES_TO_EDGE(v1, v2);
-    return nodes[v1].get_edge_weight(e);
+    return this->node(v1).get_edge_weight(e);
 }
 
-std::vector<std::set<node_id_t>> LinkCutTree::get_cc() {
+template <typename Container>
+std::vector<std::set<node_id_t>> LinkCutTree<Container>::get_cc() {
 	std::map<LinkCutNode*, std::set<node_id_t>> cc_map;
 	std::map<LinkCutNode*, LinkCutNode*> visited;
-	for (uint32_t i = 0; i < nodes.size(); i++) {
-		if (visited.find(&nodes[i]) == visited.end()) {
+	for (uint32_t i = 0; i < max_nodes; i++) {
+        if (!is_initialized(i)) continue;
+        if (visited.find(this->get_node_ptr(i)) == visited.end()) {
             std::set<LinkCutNode*> node_component;
-            LinkCutNode* curr = &nodes[i];
+            LinkCutNode* curr = this->get_node_ptr(i);
             while ((curr->get_parent() && visited.find(curr->get_parent()) == visited.end())
             || (curr->get_head()->get_dparent() && visited.find(curr->get_head()->get_dparent()) == visited.end())) {
                 node_component.insert(curr);
@@ -367,9 +390,9 @@ std::vector<std::set<node_id_t>> LinkCutTree::get_cc() {
                 std::set<node_id_t> component;
                 cc_map.insert({root, component});
             }
-			for (auto node : node_component) {
-				cc_map[root].insert(node-&nodes[0]);
-				visited.insert({node, root});
+			for (auto n : node_component) {
+                cc_map[root].insert(n - this->get_node_ptr(0));
+				visited.insert({n, root});
 			}
 		}
 	}
@@ -379,3 +402,6 @@ std::vector<std::set<node_id_t>> LinkCutTree::get_cc() {
     }
 	return cc;
 }
+
+
+template class LinkCutTree<>;
\ No newline at end of file
diff --git a/src/sketchless_euler_tour_tree.cpp b/src/sketchless_euler_tour_tree.cpp
index f6efd78..5d291e2 100644
--- a/src/sketchless_euler_tour_tree.cpp
+++ b/src/sketchless_euler_tour_tree.cpp
@@ -2,32 +2,37 @@
 
 #include <sketchless_euler_tour_tree.h>
 
-
-SketchlessEulerTourTree::SketchlessEulerTourTree(node_id_t num_nodes, uint32_t tier_num, int seed) {
-  // Initialize all the ETT node
-  ett_nodes.reserve(num_nodes);
-  for (node_id_t i = 0; i < num_nodes; ++i) {
-      ett_nodes.emplace_back(seed, i, tier_num);
-  }
+template <typename Container>
+SketchlessEulerTourTree<Container>::SketchlessEulerTourTree(node_id_t num_nodes, uint32_t tier_num, size_t seed) : seed(seed), tier_num(tier_num), max_num_nodes(num_nodes) {
+    if constexpr (std::is_same_v<Container, std::vector<SketchlessEulerTourNode>>) {
+        ett_nodes.reserve(num_nodes);
+        for (node_id_t i = 0; i < num_nodes; ++i) {
+            ett_nodes.emplace_back(seed, i, tier_num);
+        }
+    }
 }
 
-void SketchlessEulerTourTree::link(node_id_t u, node_id_t v) {
-  ett_nodes[u].link(ett_nodes[v]);
+template <typename Container>
+void SketchlessEulerTourTree<Container>::link(node_id_t u, node_id_t v) {
+  ett_node(u).link(ett_node(v));
 }
-
-void SketchlessEulerTourTree::cut(node_id_t u, node_id_t v) {
-  ett_nodes[u].cut(ett_nodes[v]);
+template <typename Container>
+void SketchlessEulerTourTree<Container>::cut(node_id_t u, node_id_t v) {
+  ett_node(u).cut(ett_node(v));
 }
 
-bool SketchlessEulerTourTree::has_edge(node_id_t u, node_id_t v) {
-  return ett_nodes[u].has_edge_to(&ett_nodes[v]);
+template <typename Container>
+bool SketchlessEulerTourTree<Container>::has_edge(node_id_t u, node_id_t v) {
+  return ett_node(u).has_edge_to(&ett_node(v));
 }
 
-SketchlessSkipListNode* SketchlessEulerTourTree::get_root(node_id_t u) {
-  return ett_nodes[u].get_root();
+template <typename Container>
+SketchlessSkipListNode* SketchlessEulerTourTree<Container>::get_root(node_id_t u) {
+  return ett_node(u).get_root();
 }
 
-bool SketchlessEulerTourTree::is_connected(node_id_t u, node_id_t v) {
+template <typename Container>
+bool SketchlessEulerTourTree<Container>::is_connected(node_id_t u, node_id_t v) {
   return get_root(u) == get_root(v);
 }
 
@@ -168,12 +173,14 @@ bool SketchlessEulerTourNode::cut(SketchlessEulerTourNode& other) {
 
   return true;
 }
-std::vector<std::set<node_id_t>> SketchlessEulerTourTree::cc_query() {
+template <typename Container>
+std::vector<std::set<node_id_t>> SketchlessEulerTourTree<Container>::cc_query() {
   std::vector<std::set<node_id_t>> cc;
 	std::set<SketchlessEulerTourNode*> visited;
-	for (uint32_t i = 0; i < ett_nodes.size(); i++) {
-		if (visited.find(&ett_nodes[i]) == visited.end()) {
-			std::set<SketchlessEulerTourNode*> pointer_component = ett_nodes[i].get_component();
+  // TODO - reimplement this.
+	for (uint32_t i = 0; i < max_num_nodes; i++) {
+		if (visited.find(&ett_node(i)) == visited.end()) {
+			std::set<SketchlessEulerTourNode*> pointer_component = ett_node(i).get_component();
 			std::set<node_id_t> component;
 			for (auto pointer : pointer_component) {
 				component.insert(pointer->vertex);
@@ -184,3 +191,8 @@ std::vector<std::set<node_id_t>> SketchlessEulerTourTree::cc_query() {
 	}
 	return cc; 
 }
+
+
+template class SketchlessEulerTourTree<>;
+// template class SketchlessEulerTourTree<std::vector<SketchlessEulerTourNode>>;
+// template class SketchlessEulerTourTree<absl::flat_hash_map<node_id_t, SketchlessEulerTourNode*>>;
diff --git a/src/skiplist.cpp b/src/skiplist.cpp
index 53b4ec0..e5a5b0f 100644
--- a/src/skiplist.cpp
+++ b/src/skiplist.cpp
@@ -10,16 +10,26 @@ long skiplist_seed = time(NULL);
 vec_t sketch_len;
 vec_t sketch_err;
 
-SkipListNode::SkipListNode(EulerTourNode* node, long seed, bool has_sketch) : node(node) {
-	if (has_sketch) sketch_agg = new Sketch(sketch_len, seed, 1, sketch_err);
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+SkipListNode<SketchClass>::SkipListNode(EulerTourNode<SketchClass>* node, long seed, bool has_sketch) : node(node), sketch_agg(0, seed) {
+  // if (has_sketch) sketch_agg = new Sketch(sketch_len, seed, 1, sketch_err);
+  // TODO - FIGURE OUT HOW TO DO SEEDING PROPERLY
+  // if (has_sketch)
+  //   sketch_agg = new SketchClass(
+  //       SketchClass::suggest_capacity(sketch_len), seed);
+  if (has_sketch)
+    this->sketch_agg = SketchClass(SketchClass::suggest_capacity(sketch_len), seed);
+  return;
 }
 
-SkipListNode::~SkipListNode() {
-	if (sketch_agg) delete sketch_agg;
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+SkipListNode<SketchClass>::~SkipListNode() {
+	// if (sketch_agg) delete sketch_agg;
 }
 
-void SkipListNode::uninit_element(bool delete_bdry) {
-	SkipListNode* list_curr = this;
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+void SkipListNode<SketchClass>::uninit_element(bool delete_bdry) {
+	SkipListNode<SketchClass>* list_curr = this;
 	SkipListNode* list_prev;
 	SkipListNode* bdry_curr = this->left;
 	SkipListNode* bdry_prev;
@@ -37,7 +47,8 @@ void SkipListNode::uninit_element(bool delete_bdry) {
 	}
 }
 
-SkipListNode* SkipListNode::init_element(EulerTourNode* node, bool is_allowed_caller) {
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+SkipListNode<SketchClass>* SkipListNode<SketchClass>::init_element(EulerTourNode<SketchClass>* node, bool is_allowed_caller) {
 	long seed = node->get_seed();
 	// NOTE: WE SHOULD MAKE IT SO DIFFERENT SKIPLIST NODES FOR THE SAME ELEMENT CAN BE DIFFERENT HEIGHTS
 	uint64_t element_height = height_factor*__builtin_ctzll(XXH3_64bits_withSeed(&node->vertex, sizeof(node_id_t), skiplist_seed))+1;
@@ -77,7 +88,8 @@ SkipListNode* SkipListNode::init_element(EulerTourNode* node, bool is_allowed_ca
 	return root->get_last();
 }
 
-SkipListNode* SkipListNode::get_parent() {
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+SkipListNode<SketchClass>* SkipListNode<SketchClass>::get_parent() const {
 	// SkipListNode* curr = this;
 	// while (curr && !curr->up) {
 	// 	curr = curr->left;
@@ -86,17 +98,19 @@ SkipListNode* SkipListNode::get_parent() {
 	return parent;
 }
 
-SkipListNode* SkipListNode::get_root() {
-	SkipListNode* prev = nullptr;
-	SkipListNode* curr = this;
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+SkipListNode<SketchClass>* SkipListNode<SketchClass>::get_root() const {
+	const SkipListNode* prev = nullptr;
+	const SkipListNode* curr = this;
 	while (curr) {
 		prev = curr;
 		curr = prev->get_parent();
 	}
-	return prev;
+	return (SkipListNode*) prev;
 }
 
-SkipListNode* SkipListNode::get_first() {
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+SkipListNode<SketchClass>* SkipListNode<SketchClass>::get_first() const {
 	// Go to the root first and then down to the first element, because if we start at some lower level
 	// we may have to travel right a lot more on that level, takes log time instead of linear time
 	SkipListNode* prev = nullptr;
@@ -108,7 +122,8 @@ SkipListNode* SkipListNode::get_first() {
 	return prev;
 }
 
-SkipListNode* SkipListNode::get_last() {
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+SkipListNode<SketchClass>* SkipListNode<SketchClass>::get_last() const {
 	// Go to the root first and then down to the last element, because if we start at some lower level
 	// we may have to travel left a lot more on that level, takes log time instead of linear time
 	SkipListNode* prev = nullptr;
@@ -120,32 +135,45 @@ SkipListNode* SkipListNode::get_last() {
 	return prev;
 }
 
-uint32_t SkipListNode::get_list_size() {
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+uint32_t SkipListNode<SketchClass>::get_list_size() {
 	return this->get_root()->size;
 }
 
-Sketch* SkipListNode::get_list_aggregate() {
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+const SketchClass& SkipListNode<SketchClass>::get_list_aggregate() {
 	return this->get_root()->sketch_agg;
 }
 
-void SkipListNode::update_agg(vec_t update_idx) {
-	if (!this->sketch_agg) // Only do something if this node has a sketch
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+void SkipListNode<SketchClass>::update_agg(vec_t update_idx) {
+	if (!this->sketch_agg.is_initialized()) // Only do something if this node has a sketch
 		return;
 	this->update_buffer[this->buffer_size] = update_idx;
 	this->buffer_size++;
-	if (this->buffer_size == skiplist_buffer_cap)
+	if (this->buffer_size == SKETCH_BUFFER_SIZE)
 		this->process_updates();
 }
 
-void SkipListNode::process_updates() {
-	if (!this->sketch_agg) // Only do something if this node has a sketch
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+void SkipListNode<SketchClass>::update_agg_atomic(vec_t update_idx) {
+	if (!this->sketch_agg.is_initialized()) // Only do something if this node has a sketch
+		return;
+	// TODO - do we need to do batchiing here too?
+	this->sketch_agg.atomic_update(update_idx);
+}
+
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+void SkipListNode<SketchClass>::process_updates() {
+	if (!this->sketch_agg.is_initialized()) // Only do something if this node has a sketch
 		return;
 	for (int i = 0; i < buffer_size; ++i)
-		this->sketch_agg->update(update_buffer[i]);
+		this->sketch_agg.update(update_buffer[i]);
 	this->buffer_size = 0;
 }
 
-SkipListNode* SkipListNode::update_path_agg(vec_t update_idx) {
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+SkipListNode<SketchClass>* SkipListNode<SketchClass>::update_path_agg(vec_t update_idx) {
 	SkipListNode* curr = this;
 	SkipListNode* prev;
 	while (curr) {
@@ -156,22 +184,115 @@ SkipListNode* SkipListNode::update_path_agg(vec_t update_idx) {
 	return prev;
 }
 
-SkipListNode* SkipListNode::update_path_agg(Sketch* sketch) {
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+SkipListNode<SketchClass>* SkipListNode<SketchClass>::update_path_agg_atomic(vec_t update_idx) {
+	SkipListNode* curr = this;
+	SkipListNode* prev;
+	while (curr) {
+		curr->update_agg_atomic(update_idx);
+		prev = curr;
+		curr = prev->get_parent();
+	}
+	return prev;
+}
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+SkipListNode<SketchClass>* SkipListNode<SketchClass>::update_path_agg(const ColumnEntryDelta &delta) {
+	SkipListNode* curr = this;
+	SkipListNode* prev;
+	while (curr) {
+		// __builtin_prefetch(curr->get_parent());
+		curr->update_agg_entry_delta(delta);
+		prev = curr;
+		curr = prev->get_parent();
+	}
+	return prev;
+}
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+SkipListNode<SketchClass>* SkipListNode<SketchClass>::update_path_agg_atomic(const ColumnEntryDelta &delta) {
+	SkipListNode* curr = this;
+	SkipListNode* prev;
+	while (curr) {
+		// __builtin_prefetch(curr->get_parent());
+		curr->update_agg_atomic_entry_delta(delta);
+		prev = curr;
+		curr = prev->get_parent();
+	}
+	return prev;
+}
+
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+SkipListNode<SketchClass>* SkipListNode<SketchClass>::update_path_agg(const ColumnEntryDeltas &deltas) {
 	SkipListNode* curr = this;
 	SkipListNode* prev;
 	while (curr) {
-		if (!curr->sketch_agg)
-			curr->sketch_agg = sketch;
-		else
-			curr->sketch_agg->merge(*sketch);
+		curr->update_agg_entry_deltas(deltas);
 		prev = curr;
 		curr = prev->get_parent();
 	}
 	return prev;
 }
 
-std::set<EulerTourNode*> SkipListNode::get_component() {
-	std::set<EulerTourNode*> nodes;
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+SkipListNode<SketchClass>* SkipListNode<SketchClass>::update_path_agg_atomic(const ColumnEntryDeltas &deltas) {
+	SkipListNode* curr = this;
+	SkipListNode* prev;
+	while (curr) {
+		curr->update_agg_atomic_entry_deltas(deltas);
+		prev = curr;
+		curr = prev->get_parent();
+	}
+	return prev;
+}
+
+
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+SkipListNode<SketchClass>* SkipListNode<SketchClass>::update_path_agg(SketchClass &sketch) {
+	// returns the last node that was updated
+	SkipListNode* curr = this;
+	SkipListNode* prev;
+	if (!this->sketch_agg.is_initialized()) {
+		this->sketch_agg = std::move(sketch);
+		prev = curr;
+		curr = prev->get_parent();
+		while (curr)
+		{
+			curr->sketch_agg.merge(this->sketch_agg);
+			prev = curr;
+			curr = prev->get_parent();
+		}
+		// this->sketch_agg.zero_contents();
+	} else {
+	  while (curr) {
+		curr->sketch_agg.merge(sketch);
+		prev = curr;
+		curr = prev->get_parent();
+	  }
+	}
+	return prev;
+}
+
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+SkipListNode<SketchClass>* SkipListNode<SketchClass>::update_path_agg(const SketchClass &sketch) {
+	// returns the last node that was updated
+	SkipListNode* curr = this;
+	SkipListNode* prev;
+	if (!this->sketch_agg.is_initialized()) {
+		assert(false);
+		// NOTE - SHOULD NOT USE IN THIS CASE
+		// TODO - make this code less confusing if possible.
+	} else {
+	  while (curr) {
+		curr->sketch_agg.merge(sketch);
+		prev = curr;
+		curr = prev->get_parent();
+	  }
+	}
+	return prev;
+}
+
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+std::set<EulerTourNode<SketchClass>*> SkipListNode<SketchClass>::get_component() {
+	std::set<EulerTourNode<SketchClass>*> nodes;
 	SkipListNode* curr = this->get_first()->right; //Skip over the boundary node
 	while (curr) {
 		nodes.insert(curr->node);
@@ -180,7 +301,8 @@ std::set<EulerTourNode*> SkipListNode::get_component() {
 	return nodes;
 }
 
-void SkipListNode::uninit_list() {
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+void SkipListNode<SketchClass>::uninit_list() {
 	SkipListNode* curr = this->get_first();
 	SkipListNode* prev;
 	while (curr) {
@@ -191,13 +313,14 @@ void SkipListNode::uninit_list() {
 	prev->uninit_element(false);
 }
 
-SkipListNode* SkipListNode::join(SkipListNode* left, SkipListNode* right) {
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+SkipListNode<SketchClass>* SkipListNode<SketchClass>::join(SkipListNode<SketchClass>* left, SkipListNode<SketchClass>* right) {
 	assert(left || right);
 	if (!left) return right->get_root();
 	if (!right) return left->get_root();
 
-	long seed = left->sketch_agg ? left->sketch_agg->get_seed()
-	 : left->get_parent()->sketch_agg->get_seed();
+	long seed = left->sketch_agg.is_initialized() ? left->sketch_agg.get_seed()
+	 : left->get_parent()->sketch_agg.get_seed();
 
 	SkipListNode* l_curr = left->get_last();
 	SkipListNode* r_curr = right->get_first(); // this is the bottom boundary node
@@ -211,8 +334,8 @@ SkipListNode* SkipListNode::join(SkipListNode* left, SkipListNode* right) {
 		l_curr->right = r_curr->right; // skip over boundary node
 		if (r_curr->right) r_curr->right->left = l_curr; // skip over boundary node, but to the left
 		r_curr->process_updates();
-		if (l_curr->sketch_agg && r_curr->sketch_agg) // Only if that skiplist node has a sketch
-			l_curr->sketch_agg->merge(*r_curr->sketch_agg);
+		if (l_curr->sketch_agg.is_initialized() && r_curr->sketch_agg.is_initialized()) // Only if that skiplist node has a sketch
+			l_curr->sketch_agg.merge(r_curr->sketch_agg);
 		l_curr->size += r_curr->size-1;
 
 		if (r_prev) delete r_prev; // Delete old boundary nodes
@@ -224,7 +347,7 @@ SkipListNode* SkipListNode::join(SkipListNode* left, SkipListNode* right) {
 
 	// If left list was taller add the root agg in right to the rest in left
 	while (l_curr) {
-		l_curr->sketch_agg->merge(*r_prev->sketch_agg);
+		l_curr->sketch_agg.merge(r_prev->sketch_agg);
 		l_curr->size += r_prev->size-1;
 		l_prev = l_curr;
 		l_curr = l_prev->get_parent();
@@ -233,10 +356,12 @@ SkipListNode* SkipListNode::join(SkipListNode* left, SkipListNode* right) {
 	// If right list was taller add new boundary nodes to left list
 	if (r_curr) {
 		// Cache the left root to initialize the new boundary nodes
-		Sketch* l_root_agg = new Sketch(sketch_len, seed, 1, sketch_err);
+		// Sketch* l_root_agg = new Sketch(sketch_len, seed, 1, sketch_err);
+		SketchClass l_root_agg = SketchClass(
+			SketchClass::suggest_capacity(sketch_len), seed);
 		l_prev->process_updates();
-		l_root_agg->merge(*l_prev->sketch_agg);
-		l_root_agg->merge(*r_prev->sketch_agg);
+		l_root_agg.merge(l_prev->sketch_agg);
+		l_root_agg.merge(r_prev->sketch_agg);
 		uint32_t l_root_size = l_prev->size - (r_prev->size-1);
 		while (r_curr) {
 			l_curr = new SkipListNode(nullptr, seed, true);
@@ -246,10 +371,10 @@ SkipListNode* SkipListNode::join(SkipListNode* left, SkipListNode* right) {
 			l_curr->right = r_curr->right;
 			if (r_curr->right) r_curr->right->left = l_curr;
 
-			l_curr->sketch_agg->merge(*l_root_agg);
+			l_curr->sketch_agg.merge(l_root_agg);
 			l_curr->size = l_root_size;
 			r_curr->process_updates();
-			l_curr->sketch_agg->merge(*r_curr->sketch_agg);
+			l_curr->sketch_agg.merge(r_curr->sketch_agg);
 			l_curr->size += r_curr->size-1;
 
 			if (r_prev) delete r_prev; // Delete old boundary nodes
@@ -257,7 +382,7 @@ SkipListNode* SkipListNode::join(SkipListNode* left, SkipListNode* right) {
 			r_prev = r_curr;
 			r_curr = r_prev->up;
 		}
-		delete l_root_agg;
+		// delete l_root_agg;
 	}
 	delete r_prev;
 	// Update parent pointers in right list
@@ -273,7 +398,8 @@ SkipListNode* SkipListNode::join(SkipListNode* left, SkipListNode* right) {
 	return l_prev;
 }
 
-SkipListNode* SkipListNode::split_left(SkipListNode* node) {
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+SkipListNode<SketchClass>* SkipListNode<SketchClass>::split_left(SkipListNode<SketchClass>* node) {
 	assert(node && node->left && !node->down);
 	// If just splitting off the boundary nodes do nothing instead
 	if (!node->left->left) {
@@ -292,19 +418,19 @@ SkipListNode* SkipListNode::split_left(SkipListNode* node) {
 		r_curr->left = bdry;
 		bdry->right = r_curr;
 		l_curr->right = nullptr;
-		if (l_curr->sketch_agg && bdry->sketch_agg) // Only if its not the bottom sketchless node
-			l_curr->sketch_agg->merge(*bdry->sketch_agg); // XOR addition same as subtraction
+		if (l_curr->sketch_agg.is_initialized() && bdry->sketch_agg.is_initialized()) // Only if its not the bottom sketchless node
+			l_curr->sketch_agg.merge(bdry->sketch_agg); // XOR addition same as subtraction
 		l_curr->size -= bdry->size-1;
 		// Get next l_curr, r_curr, and bdry
 		l_curr = l_curr->get_parent();
 		new_bdry = new SkipListNode(nullptr, seed, true);
-		if (bdry->sketch_agg) // Only if its not the bottom sketchless node
-			new_bdry->sketch_agg->merge(*bdry->sketch_agg);
+		if (bdry->sketch_agg.is_initialized()) // Only if its not the bottom sketchless node
+			new_bdry->sketch_agg.merge(bdry->sketch_agg);
 		new_bdry->size = bdry->size;
 		while (r_curr && !r_curr->up) {
 			r_curr->process_updates();
-			if (r_curr->sketch_agg) // Only if that skiplist node has a sketch
-				new_bdry->sketch_agg->merge(*r_curr->sketch_agg);
+			if (r_curr->sketch_agg.is_initialized()) // Only if that skiplist node has a sketch
+				new_bdry->sketch_agg.merge(r_curr->sketch_agg);
 			new_bdry->size += r_curr->size;
 			r_curr->parent = new_bdry;
 			r_curr = r_curr->right;
@@ -318,7 +444,7 @@ SkipListNode* SkipListNode::split_left(SkipListNode* node) {
 	// Subtract the final right agg from the rest of the aggs on left path
 	SkipListNode* l_prev = nullptr;
 	while (l_curr) {
-		l_curr->sketch_agg->merge(*bdry->sketch_agg); // XOR addition same as subtraction
+		l_curr->sketch_agg.merge(bdry->sketch_agg); // XOR addition same as subtraction
 		l_curr->size -= bdry->size-1;
 		l_prev  = l_curr;
 		l_curr = l_curr->get_parent();
@@ -336,7 +462,8 @@ SkipListNode* SkipListNode::split_left(SkipListNode* node) {
 	return l_prev;
 }
 
-SkipListNode* SkipListNode::split_right(SkipListNode* node) {
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+SkipListNode<SketchClass>* SkipListNode<SketchClass>::split_right(SkipListNode<SketchClass>* node) {
 	assert(node);
 	SkipListNode* right = node->right;
 	if (!right) return nullptr;
@@ -344,6 +471,10 @@ SkipListNode* SkipListNode::split_right(SkipListNode* node) {
 	return right->get_root();
 }
 
-SkipListNode* SkipListNode::next() {
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+SkipListNode<SketchClass>* SkipListNode<SketchClass>::next() {
 	return this->right;
 }
+
+
+template class SkipListNode<DefaultSketchColumn>;
\ No newline at end of file
diff --git a/src/tier_node.cpp b/src/tier_node.cpp
index 805d25d..0830e28 100644
--- a/src/tier_node.cpp
+++ b/src/tier_node.cpp
@@ -31,7 +31,8 @@ void TierNode::main() {
         // Receive a batch of updates and check if it is the end of stream
         bcast(update_buffer, sizeof(UpdateMessage)*(batch_size+1), 0);
         if (update_buffer[0].end) {
-            // std::cout << "============= TIER " << tier_num << " NODE =============" << std::endl;
+            std::cout << "============= TIER " << tier_num << " NODE =============" << std::endl 
+            << "Number of components: " << ett.num_components() << std::endl;
             // std::cout << "Greedy batch time (ms): " << greedy_batch_time/1000 << std::endl;
             // std::cout << "\tSketch update time (ms): " << sketch_update_time/1000 << std::endl;
             // std::cout << "\tSketch query time (ms): " << sketch_query_time/1000 << std::endl;
@@ -48,6 +49,9 @@ void TierNode::main() {
         for (uint32_t i = 0; i < num_updates; i++) {
             // Perform the sketch updating or root finding
             GraphUpdate update = update_buffer[i+1].update;
+            // TODO - do this in a different way?
+            initialize_node(update.edge.src);
+            initialize_node(update.edge.dst);
             edge_id_t edge = VERTICES_TO_EDGE(update.edge.src, update.edge.dst);
             split_revert_buffer[i] = false;
             unlikely_if (update.type == DELETE && ett.has_edge(update.edge.src, update.edge.dst)) {
@@ -58,11 +62,11 @@ void TierNode::main() {
             auto roots = ett.update_sketches(update.edge.src, update.edge.dst, (vec_t)edge);
             ENDPOINT_CANARY("Updating Sketch With", update.edge.src, update.edge.dst);
             roots.first->process_updates();
-            roots.first->sketch_agg->reset_sample_state();
-            query_result_buffer[2*i] = roots.first->sketch_agg->sample().result;
+            roots.first->sketch_agg.reset_sample_state();
+            query_result_buffer[2*i] = roots.first->sketch_agg.sample().result;
             roots.second->process_updates();
-            roots.second->sketch_agg->reset_sample_state();
-            query_result_buffer[2*i+1] = roots.second->sketch_agg->sample().result;
+            roots.second->sketch_agg.reset_sample_state();
+            query_result_buffer[2*i+1] = roots.second->sketch_agg.sample().result;
     
             // Prepare greedy batch size messages
             GreedyRefreshMessage this_sizes;
@@ -152,11 +156,11 @@ void TierNode::main() {
                         e2.v = refresh_message.endpoints.second.v;
                         for (RefreshEndpoint* e : {&e1, &e2}) {
                             e->prev_tier_size = ett.get_size(e->v);
-                            SkipListNode* root = ett.get_root(e->v);
+                            SkipListNode<DefaultSketchColumn>* root = ett.get_root(e->v);
                             root->process_updates();
-                            Sketch* ett_agg = root->sketch_agg;
-                            ett_agg->reset_sample_state();
-                            e->sketch_query_result = ett_agg->sample();
+                            DefaultSketchColumn &ett_agg = root->sketch_agg;
+                            ett_agg.reset_sample_state();
+                            e->sketch_query_result = ett_agg.sample();
                         }
                         RefreshMessage next_refresh_message;
                         next_refresh_message.endpoints = {e1, e2};
@@ -169,6 +173,8 @@ void TierNode::main() {
                 for (int endpoint : {0,1}) {
                     std::ignore = endpoint;
                     // Receive a broadcast to see if the endpoint at the current tier is isolated or not
+                    // OR to see if the component is maximized.
+                    // if the component is maximized, further broadcasts are not needed
                     EttUpdateMessage update_message;
                     bcast(&update_message, sizeof(EttUpdateMessage), rank);
                     if (update_message.type == NOT_ISOLATED) continue;
diff --git a/test/euler_tour_tree_test.cpp b/test/euler_tour_tree_test.cpp
index 28f6273..7ec8e1b 100644
--- a/test/euler_tour_tree_test.cpp
+++ b/test/euler_tour_tree_test.cpp
@@ -6,7 +6,10 @@
 
 #include <euler_tour_tree.h>
 
-bool EulerTourNode::isvalid() const {
+#include "sketch_interfacing.h"
+
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+bool EulerTourNode<SketchClass>::isvalid() const {
   bool invalid = false;
   // validate allowed_caller is null iff edges is empty
   EXPECT_EQ(allowed_caller == nullptr, this->edges.empty()) << (invalid = true, "");
@@ -45,7 +48,8 @@ bool EulerTourNode::isvalid() const {
   return true;
 }
 
-std::ostream& operator<<(std::ostream& os, const EulerTourNode& ett) {
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+std::ostream& operator<<(std::ostream& os, const EulerTourNode<SketchClass>& ett) {
   os << "EulerTourNode " << &ett << std::endl;
   for (const auto& [k, v] : ett.edges) {
     os << "to EulerTourNode " << k << " is " << &v << std::endl;
@@ -55,8 +59,9 @@ std::ostream& operator<<(std::ostream& os, const EulerTourNode& ett) {
   return os;
 }
 
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
 std::ostream& operator<<(std::ostream& os,
-    const std::vector<EulerTourNode>& nodes) {
+    const std::vector<EulerTourNode<SketchClass>>& nodes) {
   for (const auto& node : nodes) {
     os << node;
   }
@@ -74,6 +79,8 @@ TEST(EulerTourTreeSuite, stress_test) {
   srand(seed);
   std::cout << "Seeding stress test with " << seed << std::endl;
   EulerTourTree ett(nodecount, 0, seed);
+  // ensure that all nodes are iniitalized:
+  ett.initialize_all_nodes();
 
   for (int i = 0; i < n; i++) {
     int a = rand() % nodecount, b = rand() % nodecount;
@@ -84,11 +91,18 @@ TEST(EulerTourTreeSuite, stress_test) {
     }
     if (i % n/100 == 0)
     {
-      ASSERT_TRUE(std::all_of(ett.ett_nodes.begin(), ett.ett_nodes.end(),
-            [](auto& node){return node.isvalid();}))
-          << "Stress test validation failed, final state:"
-          << std::endl
-          << ett.ett_nodes;
+      // TODO - bring back these test cases
+      for (int j=0; j < nodecount; j++) {
+        ASSERT_TRUE(ett.ett_node(j).isvalid());
+        // << "Stress test validation failed at iteration " 
+        // << i << ", node " << j << ", final state:" 
+        // << std::endl << ett.ett_nodes;
+      }
+      // ASSERT_TRUE(std::all_of(ett.ett_nodes.begin(), ett.ett_nodes.end(),
+      //       [](auto& node){return node.isvalid();}))
+      //     << "Stress test validation failed, final state:"
+      //     << std::endl
+      //     << ett.ett_nodes;
     }
   }
 }
@@ -103,7 +117,8 @@ TEST(EulerTourTreeSuite, random_links_and_cuts) {
   int seed = time(NULL);
   srand(seed);
   std::cout << "Seeding random links and cuts test with " << seed << std::endl;
-  EulerTourTree ett(nodecount, 0, seed);
+  EulerTourTree<DefaultSketchColumn> ett(nodecount, 0, seed);
+  ett.initialize_all_nodes();
   for (int i = 0; i < nodecount; i++)
     ett.update_sketch(i, (vec_t)i);
 
@@ -115,54 +130,66 @@ TEST(EulerTourTreeSuite, random_links_and_cuts) {
     } else {
       ett.cut(a,b);
     }
-    ASSERT_TRUE(std::all_of(ett.ett_nodes.begin(), ett.ett_nodes.end(),
-          [](auto& node){return node.isvalid();}))
-        << "Stress test validation failed, final state:"
-        << std::endl
-        << ett.ett_nodes;
+    for (int j=0; j < nodecount; j++) {
+      ASSERT_TRUE(ett.ett_node(j).isvalid()); 
+      // << "Random links and cuts validation failed at iteration " 
+      // << i << ", node " << j << ", final state:" 
+      // << std::endl << ett.ett_nodes;
+    }
+    // ASSERT_TRUE(std::all_of(ett.ett_nodes.begin(), ett.ett_nodes.end(),
+    //       [](auto& node){return node.isvalid();}))
+    //     << "Stress test validation failed, final state:"
+    //     << std::endl
+    //     << ett.ett_nodes;
   }
 
-  std::unordered_set<SkipListNode*> sentinels;
+  std::unordered_set<SkipListNode<DefaultSketchColumn>*> sentinels;
   for (int i = 0; i < nodecount; i++)
   {
-    SkipListNode *sentinel = ett.ett_nodes[i].edges.begin()->second->get_last();
+    SkipListNode<DefaultSketchColumn> *sentinel = ett.ett_node(i).edges.begin()->second->get_last();
     sentinels.insert(sentinel);
   }
 
   // Walk up from an occurrence of each node to the root of its auxiliary tre
-  std::unordered_map<SkipListNode*, Sketch*> aggs;
-  std::unordered_map<SkipListNode*, uint32_t> sizes;
+  std::unordered_map<SkipListNode<DefaultSketchColumn>*, DefaultSketchColumn*> aggs;
+  std::unordered_map<SkipListNode<DefaultSketchColumn>*, uint32_t> sizes;
   for (int i = 0; i < nodecount; i++)
   {
-    SkipListNode* sentinel = ett.ett_nodes[i].edges.begin()->second->get_last();
+    SkipListNode<DefaultSketchColumn>* sentinel = ett.ett_node(i).edges.begin()->second->get_last();
     if (aggs.find(sentinel) == aggs.end())
     {
-      Sketch* agg = new Sketch(sketch_len, seed, 1, sketch_err);
+      // DefaultSketchColumn* agg = new Sketch(sketch_len, seed, 1, sketch_err);
+      DefaultSketchColumn *agg = new DefaultSketchColumn(
+          DefaultSketchColumn::suggest_capacity(sketch_len), seed);
       aggs.insert({sentinel, agg});
-      SkipListNode* sentinel_root = sentinel->get_root();
+      SkipListNode<DefaultSketchColumn>* sentinel_root = sentinel->get_root();
+      
       sentinel_root->process_updates();
-      aggs[sentinel]->merge(*sentinel->get_list_aggregate());
+      aggs[sentinel]->merge(sentinel->get_list_aggregate());
       sizes[sentinel] = sentinel->get_list_size();
     }
   }
 
-  std::unordered_map<SkipListNode*, Sketch*> naive_aggs;
-  std::unordered_map<SkipListNode*, uint32_t> naive_sizes;
+  std::unordered_map<SkipListNode<DefaultSketchColumn>*, DefaultSketchColumn*> naive_aggs;
+  std::unordered_map<SkipListNode<DefaultSketchColumn>*, uint32_t> naive_sizes;
   // Naively compute aggregates for each connected component
   for (int i = 0; i < nodecount; i++)
   {
-    SkipListNode* sentinel = ett.ett_nodes[i].edges.begin()->second->get_last();
+    SkipListNode<DefaultSketchColumn>* sentinel = ett.ett_node(i).edges.begin()->second->get_last();
     sentinel->process_updates();
     if (naive_aggs.find(sentinel) != naive_aggs.end())
     {
-      naive_aggs[sentinel]->merge(*ett.ett_nodes[i].allowed_caller->sketch_agg);
+      naive_aggs[sentinel]->merge(ett.ett_node(i).allowed_caller->sketch_agg);
       naive_sizes[sentinel] += 1;
     }
     else
     {
-      Sketch* agg = new Sketch(sketch_len, seed, 1, sketch_err);
+      // Sketch* agg = new Sketch(sketch_len, seed, 1, sketch_err);
+      // DefaultSketchColumn *agg = new DefaultSketchColumn(4, 0);
+      DefaultSketchColumn *agg = new DefaultSketchColumn(
+          DefaultSketchColumn::suggest_capacity(sketch_len), seed);
       naive_aggs.insert({sentinel, agg});
-      naive_aggs[sentinel]->merge(*ett.ett_nodes[i].allowed_caller->sketch_agg);
+      naive_aggs[sentinel]->merge(ett.ett_node(i).allowed_caller->sketch_agg);
       naive_sizes[sentinel] = 1;
     }
   }
@@ -187,10 +214,14 @@ TEST(EulerTourTreeSuite, get_aggregate) {
   std::cout << "Seeding get aggregate test with " << seed << std::endl;
 
   // Keep a manual aggregate of all the sketches
-  Sketch true_aggregate(sketch_len, seed, 1, sketch_err);
+  // DefaultSketchColumn true_aggregate(sketch_len, seed, 1, sketch_err);
+  // DefaultSketchColumn true_aggregate(4, 0);
+  DefaultSketchColumn true_aggregate(
+      DefaultSketchColumn::suggest_capacity(sketch_len), seed);
 
   int nodecount = 1000;
-  EulerTourTree ett(nodecount, 0, seed);
+  EulerTourTree<DefaultSketchColumn> ett(nodecount, 0, seed);
+  ett.initialize_all_nodes();
 
   // Add value to each sketch, update the manual aggregate
   for (int i = 0; i < nodecount; i++)
@@ -205,6 +236,6 @@ TEST(EulerTourTreeSuite, get_aggregate) {
   }
 
   // Check that the ETT aggregate is properly maintained and gotten
-  Sketch* aggregate = ett.get_aggregate(0);
-  ASSERT_TRUE(*aggregate == true_aggregate);
+  const DefaultSketchColumn &aggregate = ett.get_aggregate(0);
+  ASSERT_TRUE(aggregate == true_aggregate);
 }
diff --git a/test/graph_tiers_test.cpp b/test/graph_tiers_test.cpp
index 53ac9a8..a08f82b 100644
--- a/test/graph_tiers_test.cpp
+++ b/test/graph_tiers_test.cpp
@@ -5,12 +5,17 @@
 #include <iostream>
 #include <fstream>
 #include "graph_tiers.h"
+#include "batch_tiers.h"
 #include "binary_graph_stream.h"
-#include "mat_graph_verifier.h"
+// #include "mat_graph_verifier.h"
+#include "graph_verifier.h"
 #include "util.h"
 
 const vec_t DEFAULT_SKETCH_ERR = 1;
 
+// using GraphTierSystem = GraphTiers<DefaultSketchColumn>;
+using GraphTierSystem = BatchTiers<DefaultSketchColumn>;
+
 auto start = std::chrono::high_resolution_clock::now();
 auto stop = std::chrono::high_resolution_clock::now();
 auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(stop - start);
@@ -32,54 +37,132 @@ static void print_metrics() {
     std::cout << "Total number of normal refreshes: " << normal_refreshes << std::endl;
 }
 
+TEST(GraphTiersSuite, gibbs_mixed_speed_test) {
+    BinaryGraphStream stream(stream_file, 100000);
+    long edgecount = stream.edges();
+    // height_factor = 1;//1./log2(log2(stream.nodes()));
+    height_factor = 1/log2(log2(stream.nodes()));
+    sketch_len = Sketch::calc_vector_length(stream.nodes());
+    sketch_err = DEFAULT_SKETCH_ERR;
+	std::random_device dev;
+    std::mt19937 rng(dev());
+    std::uniform_int_distribution<std::mt19937::result_type> dist(0,MAX_INT);
+    uint64_t seed = dist(rng);
+    GraphTierSystem gt(stream.nodes(), seed);
+    gt.initialize_all_nodes();
+
+    long total_update_time = 0;
+    long total_query_time = 0;
+    auto update_timer = std::chrono::high_resolution_clock::now();
+    auto query_timer = update_timer;
+    bool doing_updates = true;
+    for (long i = 0; i < edgecount; i++) {
+        // Read an update from the stream and have the input node process it
+        GraphUpdate operation = stream.get_edge();
+        if (operation.type == 2) { // 2 is the symbol for queries
+            unlikely_if (doing_updates) {
+                total_update_time += std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - update_timer).count();
+                doing_updates = false;
+                query_timer = std::chrono::high_resolution_clock::now();
+            }
+            gt.is_connected(operation.edge.src, operation.edge.dst);
+        } else {
+            unlikely_if (!doing_updates) {
+                total_query_time += std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - query_timer).count();
+                doing_updates = true;
+                update_timer = std::chrono::high_resolution_clock::now();
+            }
+            gt.update(operation);
+        }
+        unlikely_if(i%1000000 == 0 || i == edgecount-1) {
+            std::cout << "FINISHED OPERATION " << i << " OUT OF " << edgecount << " IN " << stream_file << std::endl;
+        }
+    }
+    if (doing_updates) {
+        total_update_time += std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - update_timer).count();
+    } else {
+        total_query_time += std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - query_timer).count();
+    }
+
+    std::cout << "Total update time(ms):   " << (total_update_time/1000) << std::endl;
+    std::cout << "Total query time(ms):    " << (total_query_time/1000) << std::endl;
+
+    std::ofstream file;
+    std::string out_file = "./../results/gibbs_speed_results/" + stream_file.substr(stream_file.find("/") + 1) + ".txt";
+    std::cout << "WRITING RESULTS TO " << out_file << std::endl;
+    file.open (out_file, std::ios_base::app);
+    file << " UPDATES/SECOND: " << ((long)(0.9*edgecount))/(1 + total_update_time/1000)*1000 << std::endl;
+    file << " QUERIES/SECOND: " << ((long)(0.1*edgecount))/(1 + total_query_time/1000)*1000 << std::endl;
+    file.close();
+}
+
 TEST(GraphTiersSuite, mini_correctness_test) {
+
     node_id_t numnodes = 10;
-    GraphTiers gt(numnodes);
-    MatGraphVerifier gv(numnodes);
+    height_factor = 1 / log2(log2(numnodes));
+    sketch_len = Sketch::calc_vector_length(numnodes);
+    sketch_err = DEFAULT_SKETCH_ERR;
+
+	std::random_device dev;
+    std::mt19937 rng(dev());
+    std::uniform_int_distribution<std::mt19937::result_type> dist(0,MAX_INT);
+    uint64_t seed = dist(rng);
+    GraphTierSystem gt(numnodes, seed);
+    gt.initialize_all_nodes();
+    GraphVerifier gv(numnodes);
 
     // Link all of the nodes into 1 connected component
     for (node_id_t i = 0; i < numnodes-1; i++) {
         gt.update({{i, i+1}, INSERT});
-        gv.edge_update(i,i+1);
-        std::vector<std::set<node_id_t>> cc = gt.get_cc();
-        try {
-            gv.reset_cc_state();
-            gv.verify_soln(cc);
-        } catch (IncorrectCCException& e) {
-            std::cout << "Incorrect cc found after linking nodes " << i << " and " << i+1 << std::endl;
-            std::cout << "GOT: " << cc.size() << " components, EXPECTED: " << numnodes-i-1 << " components" << std::endl;
-            FAIL();
+        gv.edge_update({i, i + 1});
+        if (i % 3 == 0) {
+            std::vector<std::set<node_id_t>> cc = gt.get_cc();
+            try {
+                // gv.reset_cc_state();
+                gv.verify_cc_from_component_set(cc);
+            } catch (IncorrectCCException& e) {
+                std::cout << "Incorrect cc found after linking nodes " << i << " and " << i + 1 << std::endl;
+                std::cout << "GOT: " << cc.size() << " components, EXPECTED: " << numnodes - i - 1 << " components" << std::endl;
+                FAIL();
+            }
         }
     }
     // One by one cut all of the nodes into singletons
     for (node_id_t i = 0; i < numnodes-1; i++) {
         gt.update({{i, i+1}, DELETE});
-        gv.edge_update(i,i+1);
-        std::vector<std::set<node_id_t>> cc = gt.get_cc();
-        try {
-            gv.reset_cc_state();
-            gv.verify_soln(cc);
-        } catch (IncorrectCCException& e) {
-            std::cout << "Incorrect cc found after cutting nodes " << i << " and " << i+1 << std::endl;
-            std::cout << "GOT: " << cc.size() << " components, EXPECTED: " << i+2 << " components" << std::endl;
-            FAIL();
+        gv.edge_update({i,i+1});
+        if (i % 3 == 0) {
+            std::vector<std::set<node_id_t>> cc = gt.get_cc();
+            try {
+                // gv.reset_cc_state();
+                gv.verify_cc_from_component_set(cc);
+            } catch (IncorrectCCException& e) {
+                std::cout << "Incorrect cc found after cutting nodes " << i << " and " << i + 1 << std::endl;
+                std::cout << "GOT: " << cc.size() << " components, EXPECTED: " << i + 2 << " components" << std::endl;
+                FAIL();
+            }
         }
     }
 }
 
 TEST(GraphTiersSuite, deletion_replace_correctness_test) {
     node_id_t numnodes = 50;
-    GraphTiers gt(numnodes);
-    MatGraphVerifier gv(numnodes);
+	std::random_device dev;
+    std::mt19937 rng(dev());
+    std::uniform_int_distribution<std::mt19937::result_type> dist(0,MAX_INT);
+    uint64_t seed = dist(rng);
+    GraphTierSystem gt(numnodes, seed);
+    gt.initialize_all_nodes();
+    GraphVerifier gv(numnodes);
 
     // Link all of the nodes into 1 connected component
     for (node_id_t i = 0; i < numnodes-1; i++) {
         gt.update({{i, i+1}, INSERT});
-        gv.edge_update(i,i+1);
+        gv.edge_update({i,i+1});
         std::vector<std::set<node_id_t>> cc = gt.get_cc();
         try {
-            gv.reset_cc_state();
-            gv.verify_soln(cc);
+            // gv.reset_cc_state();
+            gv.verify_cc_from_component_set(cc);
         } catch (IncorrectCCException& e) {
             std::cout << "Incorrect cc found after linking nodes " << i << " and " << i+1 << std::endl;
             std::cout << "GOT: " << cc.size() << " components, EXPECTED: " << numnodes-i-1 << " components" << std::endl;
@@ -93,19 +176,19 @@ TEST(GraphTiersSuite, deletion_replace_correctness_test) {
         second = rand() % numnodes;
 
     gt.update({{first, second}, INSERT});
-    gv.edge_update(first, second);
+    gv.edge_update({first, second});
 
     node_id_t distance = std::max(first, second) - std::min(first, second);
     // Cut a random edge
     first = std::min(first, second) + rand() % (distance-1);
 
     gt.update({{first, first+1}, DELETE});
-    gv.edge_update(first, first+1);
+    gv.edge_update({first, first+1});
 
     std::vector<std::set<node_id_t>> cc = gt.get_cc();
     try {
-        gv.reset_cc_state();
-        gv.verify_soln(cc);
+        // gv.reset_cc_state();
+        gv.verify_cc_from_component_set(cc);
     } catch (IncorrectCCException& e) {
         std::cout << "Incorrect cc found after cutting nodes " << first << " and " << first+1 << std::endl;
         std::cout << "GOT: " << cc.size() << " components, EXPECTED: 1 components" << std::endl;
@@ -115,7 +198,7 @@ TEST(GraphTiersSuite, deletion_replace_correctness_test) {
 }
 
 TEST(GraphTiersSuite, omp_correctness_test) {
-    omp_set_dynamic(1);
+    // omp_set_dynamic(1);
     try {
         BinaryGraphStream stream(stream_file, 100000);
 
@@ -123,25 +206,31 @@ TEST(GraphTiersSuite, omp_correctness_test) {
         sketch_len = Sketch::calc_vector_length(stream.nodes());
         sketch_err = DEFAULT_SKETCH_ERR;
 
-        GraphTiers gt(stream.nodes());
+        std::random_device dev;
+        std::mt19937 rng(dev());
+        std::uniform_int_distribution<std::mt19937::result_type> dist(0,MAX_INT);
+        uint64_t seed = dist(rng);
+        GraphTierSystem gt(stream.nodes(), seed);
+        gt.initialize_all_nodes();
         int edgecount = stream.edges();
         edgecount = 1000000;
-        MatGraphVerifier gv(stream.nodes());
+        GraphVerifier gv(stream.nodes());
         start = std::chrono::high_resolution_clock::now();
 
         for (int i = 0; i < edgecount; i++) {
             GraphUpdate update = stream.get_edge();
             gt.update(update);
-            gv.edge_update(update.edge.src, update.edge.dst);
+            gv.edge_update(update.edge);
             unlikely_if(i%1000 == 0 || i == edgecount-1) {
                 std::vector<std::set<node_id_t>> cc = gt.get_cc();
                 try {
-                    gv.reset_cc_state();
-                    gv.verify_soln(cc);
+                    // gv.reset_cc_state();
+                    gv.verify_cc_from_component_set(cc);
                     std::cout << "Update " << i << ", CCs correct." << std::endl;
                 } catch (IncorrectCCException& e) {
                     std::cout << "Incorrect connected components found at update "  << i << std::endl;
 		            std::cout << "GOT: " << cc.size() << std::endl;
+                    std::cout << "EXPECTED: " << gv.get_num_kruskal_ccs() << std::endl;
                     FAIL();
                 }
             }
@@ -157,16 +246,22 @@ TEST(GraphTiersSuite, omp_correctness_test) {
 }
 
 TEST(GraphTiersSuite, omp_speed_test) {
-    omp_set_dynamic(1);
+    // omp_set_dynamic(1);
     try {
 	    long time = 0;
         BinaryGraphStream stream(stream_file, 100000);
 
-        height_factor = 1./log2(log2(stream.nodes()));
+        // height_factor = 1;//1./log2(log2(stream.nodes()));
+        height_factor = 1/log2(log2(stream.nodes()));
         sketch_len = Sketch::calc_vector_length(stream.nodes());
         sketch_err = DEFAULT_SKETCH_ERR;
 
-        GraphTiers gt(stream.nodes());
+        std::random_device dev;
+        std::mt19937 rng(dev());
+        std::uniform_int_distribution<std::mt19937::result_type> dist(0,MAX_INT);
+        uint64_t seed = dist(rng);
+        GraphTierSystem gt(stream.nodes(), seed);
+        gt.initialize_all_nodes();
         int edgecount = stream.edges();
         start = std::chrono::high_resolution_clock::now();
 
@@ -174,7 +269,7 @@ TEST(GraphTiersSuite, omp_speed_test) {
         for (int i = 0; i < edgecount; i++) {
             GraphUpdate update = stream.get_edge();
             gt.update(update);
-            unlikely_if (i % 100000 == 0) {
+            unlikely_if (i % 1000000000 == 0) {
                 auto stop = std::chrono::high_resolution_clock::now();
                 auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(stop - start);
                 std::cout << "FINISHED UPDATE " << i << " OUT OF " << edgecount << " IN " << stream_file << std::endl;
@@ -193,7 +288,7 @@ TEST(GraphTiersSuite, omp_speed_test) {
 }
 
 TEST(GraphTiersSuite, query_speed_test) {
-    omp_set_dynamic(1);
+    // omp_set_dynamic(1);
     try {
 
         BinaryGraphStream stream(stream_file, 100000);
@@ -203,7 +298,13 @@ TEST(GraphTiersSuite, query_speed_test) {
         sketch_err = DEFAULT_SKETCH_ERR;
         
         int nodecount = stream.nodes();
-        GraphTiers gt(nodecount);
+
+        std::random_device dev;
+        std::mt19937 rng(dev());
+        std::uniform_int_distribution<std::mt19937::result_type> dist(0,MAX_INT);
+        uint64_t sketch_seed = dist(rng);
+        GraphTierSystem gt(nodecount, sketch_seed);
+        gt.initialize_all_nodes();
         int edgecount = 150000;
 
         std::cout << "Building up graph..." <<  std::endl;
diff --git a/test/hybrid_shmem_test_runner.cpp b/test/hybrid_shmem_test_runner.cpp
new file mode 100644
index 0000000..d3a36ea
--- /dev/null
+++ b/test/hybrid_shmem_test_runner.cpp
@@ -0,0 +1,14 @@
+#include <mpi.h>
+#include <gtest/gtest.h>
+#include "util.h"
+
+
+std::string stream_file;
+
+int main(int argc, char** argv) {
+  if (argc > 1)
+    stream_file = argv[1];
+  testing::InitGoogleTest(&argc, argv);
+  int ret = RUN_ALL_TESTS(); 
+  return ret; 
+}
diff --git a/test/hybrid_shmem_tests.cpp b/test/hybrid_shmem_tests.cpp
new file mode 100644
index 0000000..fe4614f
--- /dev/null
+++ b/test/hybrid_shmem_tests.cpp
@@ -0,0 +1,519 @@
+#include <gtest/gtest.h>
+#include <chrono>
+#include <signal.h>
+#include <omp.h>
+#include <iostream>
+#include <fstream>
+#include <cmath>
+#include "graph_tiers.h"
+#include "batch_tiers.h"
+#include "binary_graph_stream.h"
+// #include "mat_graph_verifier.h"
+#include "graph_verifier.h"
+#include "mpi_hybrid_conn.h"
+#include "util.h"
+
+const vec_t DEFAULT_SKETCH_ERR = 1;
+
+
+size_t update_batch_size = 200;
+
+static uint32_t compute_num_tiers(node_id_t node_count) {
+    if (node_count <= 100) {
+        return 5;
+    }
+    const double numerator = log2(static_cast<double>(node_count));
+    //const double denominator = log2(3.0) - 1.0;
+    // const double denominator=0.6;
+    const double denominator = 1.4;
+    auto tiers = static_cast<uint32_t>(numerator / denominator);
+    return std::max<uint32_t>(5, tiers);
+}
+
+// using GraphTierSystem = GraphTiers<DefaultSketchColumn>;
+using GraphTierSystem = BatchTiers<DefaultSketchColumn>;
+
+auto start = std::chrono::high_resolution_clock::now();
+auto stop = std::chrono::high_resolution_clock::now();
+auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(stop - start);
+
+static void print_metrics() {
+    stop = std::chrono::high_resolution_clock::now();
+    duration = std::chrono::duration_cast<std::chrono::milliseconds>(stop - start);
+    std::cout << "\nTotal time for all updates performed (ms): " << duration.count() << std::endl;
+    std::cout << "\tTotal time in Sketch update (ms): " << sketch_time/1000 << std::endl;
+    std::cout << "\tTotal time in Refresh function (ms): " << refresh_time/1000 << std::endl;
+    std::cout << "\t\tTime in Parallel isolated checking (ms): " << parallel_isolated_check/1000 << std::endl;
+    std::cout << "\t\tTime in Sketch queries (ms): " << sketch_query/1000 << std::endl;
+    std::cout << "\t\tTime in LCT operations (ms): " << lct_time/1000 << std::endl;
+    std::cout << "\t\tTime in ETT operations (ms): " << (ett_time+ett_find_root+ett_get_agg)/1000 << std::endl;
+    std::cout << "\t\t\tETT Split and Join (ms): " << ett_time/1000 << std::endl;
+    std::cout << "\t\t\tETT Find Tree Root (ms): " << ett_find_root/1000 << std::endl;
+    std::cout << "\t\t\tETT Get Aggregate (ms): " << ett_get_agg/1000 << std::endl;
+    std::cout << "Total number of tiers grown: " << tiers_grown << std::endl;
+    std::cout << "Total number of normal refreshes: " << normal_refreshes << std::endl;
+}
+
+TEST(HybridGraphTiersSuite, gibbs_mixed_speed_test) {
+    BinaryGraphStream stream(stream_file, 100000);
+    long edgecount = stream.edges();
+    // height_factor = 1;//1./log2(log2(stream.nodes()));
+    height_factor = 1/log2(log2(stream.nodes()));
+    sketch_len = Sketch::calc_vector_length(stream.nodes());
+    sketch_err = DEFAULT_SKETCH_ERR;
+	std::random_device dev;
+    std::mt19937 rng(dev());
+    std::uniform_int_distribution<std::mt19937::result_type> dist(0,MAX_INT);
+    uint64_t seed = dist(rng);
+    // GraphTierSystem gt(stream.nodes(), seed);
+    // HybridConnectivityManager<GraphTierSystem> 
+    uint32_t num_tiers = log2(stream.nodes())/(log2(3)-1);
+    HybridConnectivityManager<GraphTierSystem> hybrid_driver(
+        stream.nodes(), num_tiers, update_batch_size, seed
+    );
+
+    long total_update_time = 0;
+    long total_query_time = 0;
+    auto update_timer = std::chrono::high_resolution_clock::now();
+    auto query_timer = update_timer;
+    bool doing_updates = true;
+    for (long i = 0; i < edgecount; i++) {
+        // Read an update from the stream and have the input node process it
+        GraphUpdate operation = stream.get_edge();
+        if (operation.type == 2) { // 2 is the symbol for queries
+            unlikely_if (doing_updates) {
+                total_update_time += std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - update_timer).count();
+                doing_updates = false;
+                query_timer = std::chrono::high_resolution_clock::now();
+            }
+            hybrid_driver.connectivity_query(operation.edge.src, operation.edge.dst);
+        } else {
+            unlikely_if (!doing_updates) {
+                total_query_time += std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - query_timer).count();
+                doing_updates = true;
+                update_timer = std::chrono::high_resolution_clock::now();
+            }
+            hybrid_driver.update(operation);
+        }
+        unlikely_if(i%1000000 == 0 || i == edgecount-1) {
+            std::cout << "FINISHED OPERATION " << i << " OUT OF " << edgecount << " IN " << stream_file << std::endl;
+            std::cout << "Sketched nodes: " << hybrid_driver.sketched_node_count() << " out of " << stream.nodes() << std::endl;
+            // std::cout << "-  Space usage of CF: " << hybrid_driver.get_space_usage_cf()/(1024*1024) << " MB" << std::endl;
+            // std::cout << "-  Space usage of Driver: " << hybrid_driver.get_space_usage_driver()/(1024*1024) << " MB" << std::endl;
+            // std::cout << "-  Space usage of Sketches: " << hybrid_driver.space_usage_conn_sketch()/(1024*1024) << " MB" << std::endl;
+            // std::cout << "-  Space usage of Recovery Sketches: " << hybrid_driver.space_usage_recovery_sketch()/(1024*1024) << " MB" << std::endl;
+            std::cout << "-  Total edges: " << hybrid_driver.total_edges() << std::endl;
+            std::cout << "-  Sketched edges: " << hybrid_driver.num_sketched_edges() << std::endl;
+            double percent_sketched = 100.0 * ((double)hybrid_driver.num_sketched_edges()) / ((double)hybrid_driver.total_edges());
+            std::cout << "-  Percent sketched edges: " << percent_sketched << "%" << std::endl;
+        }
+    }
+    if (doing_updates) {
+        total_update_time += std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - update_timer).count();
+    } else {
+        total_query_time += std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - query_timer).count();
+    }
+
+    std::cout << "Total update time(ms):   " << (total_update_time/1000) << std::endl;
+    std::cout << "Total query time(ms):    " << (total_query_time/1000) << std::endl;
+
+    std::ofstream file;
+    std::string out_file = "./../results/gibbs_speed_results/" + stream_file.substr(stream_file.find("/") + 1) + ".txt";
+    std::cout << "WRITING RESULTS TO " << out_file << std::endl;
+    file.open (out_file, std::ios_base::app);
+    file << " UPDATES/SECOND: " << ((long)(0.9*edgecount))/(1 + total_update_time/1000)*1000 << std::endl;
+    file << " QUERIES/SECOND: " << ((long)(0.1*edgecount))/(1 + total_query_time/1000)*1000 << std::endl;
+    file.close();
+}
+TEST(HybridGraphTiersSuite, sparse_only_speed_test) {
+    BinaryGraphStream stream(stream_file, 100000);
+    long edgecount = stream.edges();
+    // height_factor = 1;//1./log2(log2(stream.nodes()));
+    height_factor = 1/log2(log2(stream.nodes()));
+    sketch_len = Sketch::calc_vector_length(stream.nodes());
+    sketch_err = DEFAULT_SKETCH_ERR;
+	std::random_device dev;
+    std::mt19937 rng(dev());
+    std::uniform_int_distribution<std::mt19937::result_type> dist(0,MAX_INT);
+    uint64_t seed = dist(rng);
+    // GraphTierSystem gt(stream.nodes(), seed);
+    // HybridConnectivityManager<GraphTierSystem> 
+    uint32_t num_tiers = log2(stream.nodes())/(log2(3)-1);
+    SCCWN<> cf_algo(stream.nodes());
+
+    long total_update_time = 0;
+    long total_query_time = 0;
+    auto update_timer = std::chrono::high_resolution_clock::now();
+    auto query_timer = update_timer;
+    bool doing_updates = true;
+    for (long i = 0; i < edgecount; i++) {
+        // Read an update from the stream and have the input node process it
+        GraphUpdate operation = stream.get_edge();
+        if (operation.type == 2) { // 2 is the symbol for queries
+            unlikely_if (doing_updates) {
+                total_update_time += std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - update_timer).count();
+                doing_updates = false;
+                query_timer = std::chrono::high_resolution_clock::now();
+            }
+            cf_algo.is_connected(operation.edge.src, operation.edge.dst);
+        } else {
+            unlikely_if (!doing_updates) {
+                total_query_time += std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - query_timer).count();
+                doing_updates = true;
+                update_timer = std::chrono::high_resolution_clock::now();
+            }
+            if (operation.type == INSERT) {
+                cf_algo.insert(operation.edge.src, operation.edge.dst);
+            } else {
+                cf_algo.remove(operation.edge.src, operation.edge.dst);
+            }
+        }
+        unlikely_if(i%1000000 == 0 || i == edgecount-1) {
+            std::cout << "FINISHED OPERATION " << i << " OUT OF " << edgecount << " IN " << stream_file << std::endl;
+            if (i%20000000 == 0 || i == edgecount-1) {
+                std::cout << "-  Space usage of CF: " << cf_algo.getMemUsage()/(1024*1024) << " MB" << std::endl;
+            }
+        }
+    }
+    if (doing_updates) {
+        total_update_time += std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - update_timer).count();
+    } else {
+        total_query_time += std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - query_timer).count();
+    }
+    std::cout << "Total update time(ms):   " << (total_update_time/1000) << std::endl;
+    std::cout << "Total query time(ms):    " << (total_query_time/1000) << std::endl;
+
+    std::ofstream file;
+    std::string out_file = "./../results/gibbs_speed_results/" + stream_file.substr(stream_file.find("/") + 1) + ".txt";
+    std::cout << "WRITING RESULTS TO " << out_file << std::endl;
+    file.open (out_file, std::ios_base::app);
+    file << " UPDATES/SECOND: " << ((long)(0.9*edgecount))/(1 + total_update_time/1000)*1000 << std::endl;
+    file << " QUERIES/SECOND: " << ((long)(0.1*edgecount))/(1 + total_query_time/1000)*1000 << std::endl;
+    file.close();
+}
+
+TEST(HybridGraphTiersSuite, hybrid_memory_test) {
+    BinaryGraphStream stream(stream_file, 100000);
+    long edgecount = stream.edges();
+    // height_factor = 1;//1./log2(log2(stream.nodes()));
+    height_factor = 1/log2(log2(stream.nodes()));
+    sketch_len = Sketch::calc_vector_length(stream.nodes());
+    sketch_err = DEFAULT_SKETCH_ERR;
+	std::random_device dev;
+    std::mt19937 rng(dev());
+    std::uniform_int_distribution<std::mt19937::result_type> dist(0,MAX_INT);
+    uint64_t seed = dist(rng);
+    // GraphTierSystem gt(stream.nodes(), seed);
+    // HybridConnectivityManager<GraphTierSystem> 
+    uint32_t num_tiers = log2(stream.nodes())/(log2(3)-1);
+    HybridConnectivityManager<GraphTierSystem> hybrid_driver(
+        stream.nodes(), num_tiers, update_batch_size, seed
+    );
+
+    long total_update_time = 0;
+    long total_query_time = 0;
+    auto update_timer = std::chrono::high_resolution_clock::now();
+    auto query_timer = update_timer;
+    bool doing_updates = true;
+    for (long i = 0; i < edgecount; i++) {
+        // Read an update from the stream and have the input node process it
+        GraphUpdate operation = stream.get_edge();
+        if (operation.type == 2) { // 2 is the symbol for queries
+            unlikely_if (doing_updates) {
+                total_update_time += std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - update_timer).count();
+                doing_updates = false;
+                query_timer = std::chrono::high_resolution_clock::now();
+            }
+            hybrid_driver.connectivity_query(operation.edge.src, operation.edge.dst);
+        } else {
+            unlikely_if (!doing_updates) {
+                total_query_time += std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - query_timer).count();
+                doing_updates = true;
+                update_timer = std::chrono::high_resolution_clock::now();
+            }
+            hybrid_driver.update(operation);
+        }
+        unlikely_if(i%1000000 == 0 || i == edgecount-1) {
+            std::cout << "FINISHED OPERATION " << i << " OUT OF " << edgecount << " IN " << stream_file << std::endl;
+            if (i%20000000 == 0 || i == edgecount-1) {
+                std::cout << "Sketched nodes: " << hybrid_driver.sketched_node_count() << " out of " << stream.nodes() << std::endl;
+                std::cout << "-  Space usage of CF: " << hybrid_driver.get_space_usage_cf()/(1024*1024) << " MB" << std::endl;
+                std::cout << "-  Space usage of Driver: " << hybrid_driver.get_space_usage_driver()/(1024*1024) << " MB" << std::endl;
+                std::cout << "-  Space usage of Sketches: " << hybrid_driver.space_usage_conn_sketch()/(1024*1024) << " MB" << std::endl;
+                std::cout << "-  Space usage of Recovery Sketches: " << hybrid_driver.space_usage_recovery_sketch()/(1024*1024) << " MB" << std::endl;
+                std::cout << "-  Total edges: " << hybrid_driver.total_edges() << std::endl;
+                std::cout << "-  Sketched edges: " << hybrid_driver.num_sketched_edges() << std::endl;
+                double percent_sketched = 100.0 * ((double)hybrid_driver.num_sketched_edges()) / ((double)hybrid_driver.total_edges());
+                std::cout << "-  Percent sketched edges: " << percent_sketched << "%" << std::endl;
+            }
+        }
+    }
+    if (doing_updates) {
+        total_update_time += std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - update_timer).count();
+    } else {
+        total_query_time += std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - query_timer).count();
+    }
+
+    std::cout << "Total update time(ms):   " << (total_update_time/1000) << std::endl;
+    std::cout << "Total query time(ms):    " << (total_query_time/1000) << std::endl;
+
+    std::ofstream file;
+    std::string out_file = "./../results/gibbs_speed_results/" + stream_file.substr(stream_file.find("/") + 1) + ".txt";
+    std::cout << "WRITING RESULTS TO " << out_file << std::endl;
+    file.open (out_file, std::ios_base::app);
+    file << " UPDATES/SECOND: " << ((long)(0.9*edgecount))/(1 + total_update_time/1000)*1000 << std::endl;
+    file << " QUERIES/SECOND: " << ((long)(0.1*edgecount))/(1 + total_query_time/1000)*1000 << std::endl;
+    file.close();
+}
+
+TEST(HybridGraphTiersSuite, mini_correctness_test) {
+
+    node_id_t numnodes = 10;
+    height_factor = 1 / log2(log2(numnodes));
+    sketch_len = Sketch::calc_vector_length(numnodes);
+    sketch_err = DEFAULT_SKETCH_ERR;
+
+	std::random_device dev;
+    std::mt19937 rng(dev());
+    std::uniform_int_distribution<std::mt19937::result_type> dist(0,MAX_INT);
+    uint64_t seed = dist(rng);
+    uint32_t num_tiers = compute_num_tiers(numnodes);
+    HybridConnectivityManager<GraphTierSystem> hybrid_driver(
+        numnodes, num_tiers, update_batch_size, seed
+    );
+    GraphVerifier gv(numnodes);
+
+    // Link all of the nodes into 1 connected component
+    for (node_id_t i = 0; i < numnodes-1; i++) {
+        hybrid_driver.update({{i, i+1}, INSERT});
+        gv.edge_update({i, i + 1});
+        if (i % 3 == 0) {
+            std::vector<std::set<node_id_t>> cc = hybrid_driver.cc_query();
+            try {
+                // gv.reset_cc_state();
+                gv.verify_cc_from_component_set(cc);
+            } catch (IncorrectCCException& e) {
+                std::cout << "Incorrect cc found after linking nodes " << i << " and " << i + 1 << std::endl;
+                std::cout << "GOT: " << cc.size() << " components, EXPECTED: " << numnodes - i - 1 << " components" << std::endl;
+                FAIL();
+            }
+        }
+    }
+    // One by one cut all of the nodes into singletons
+    for (node_id_t i = 0; i < numnodes-1; i++) {
+        hybrid_driver.update({{i, i+1}, DELETE});
+        gv.edge_update({i,i+1});
+        if (i % 3 == 0) {
+            std::vector<std::set<node_id_t>> cc = hybrid_driver.cc_query();
+            try {
+                // gv.reset_cc_state();
+                gv.verify_cc_from_component_set(cc);
+            } catch (IncorrectCCException& e) {
+                std::cout << "Incorrect cc found after cutting nodes " << i << " and " << i + 1 << std::endl;
+                std::cout << "GOT: " << cc.size() << " components, EXPECTED: " << i + 2 << " components" << std::endl;
+                FAIL();
+            }
+        }
+    }
+}
+
+TEST(HybridGraphTiersSuite, deletion_replace_correctness_test) {
+    node_id_t numnodes = 50;
+	std::random_device dev;
+    std::mt19937 rng(dev());
+    std::uniform_int_distribution<std::mt19937::result_type> dist(0,MAX_INT);
+    uint64_t seed = dist(rng);
+    uint32_t num_tiers = compute_num_tiers(numnodes);
+    HybridConnectivityManager<GraphTierSystem> hybrid_driver(
+        numnodes, num_tiers, update_batch_size, seed
+    );
+    GraphVerifier gv(numnodes);
+
+    // Link all of the nodes into 1 connected component
+    for (node_id_t i = 0; i < numnodes-1; i++) {
+    hybrid_driver.update({{i, i+1}, INSERT});
+        gv.edge_update({i,i+1});
+    std::vector<std::set<node_id_t>> cc = hybrid_driver.cc_query();
+        try {
+            // gv.reset_cc_state();
+            gv.verify_cc_from_component_set(cc);
+        } catch (IncorrectCCException& e) {
+            std::cout << "Incorrect cc found after linking nodes " << i << " and " << i+1 << std::endl;
+            std::cout << "GOT: " << cc.size() << " components, EXPECTED: " << numnodes-i-1 << " components" << std::endl;
+            FAIL();
+        }
+    }
+    // Generate a random bridge
+    node_id_t first = rand() % numnodes;
+    node_id_t second = rand() % numnodes;
+    while(first == second || second == first+1 || first == second+1)
+        second = rand() % numnodes;
+
+    hybrid_driver.update({{first, second}, INSERT});
+    gv.edge_update({first, second});
+
+    node_id_t distance = std::max(first, second) - std::min(first, second);
+    // Cut a random edge
+    first = std::min(first, second) + rand() % (distance-1);
+
+    hybrid_driver.update({{first, first+1}, DELETE});
+    gv.edge_update({first, first+1});
+
+    std::vector<std::set<node_id_t>> cc = hybrid_driver.cc_query();
+    try {
+        // gv.reset_cc_state();
+        gv.verify_cc_from_component_set(cc);
+    } catch (IncorrectCCException& e) {
+        std::cout << "Incorrect cc found after cutting nodes " << first << " and " << first+1 << std::endl;
+        std::cout << "GOT: " << cc.size() << " components, EXPECTED: 1 components" << std::endl;
+        FAIL();
+    }
+
+}
+
+TEST(HybridGraphTiersSuite, omp_correctness_test) {
+    // omp_set_dynamic(1);
+    try {
+        BinaryGraphStream stream(stream_file, 100000);
+
+        height_factor = 1/log2(log2(stream.nodes()));
+        sketch_len = Sketch::calc_vector_length(stream.nodes());
+        sketch_err = DEFAULT_SKETCH_ERR;
+
+        std::random_device dev;
+        std::mt19937 rng(dev());
+        std::uniform_int_distribution<std::mt19937::result_type> dist(0,MAX_INT);
+        uint64_t seed = dist(rng);
+        uint32_t num_tiers = compute_num_tiers(stream.nodes());
+        HybridConnectivityManager<GraphTierSystem> hybrid_driver(
+            stream.nodes(), num_tiers, update_batch_size, seed
+        );
+        int edgecount = stream.edges();
+        edgecount = 1000000;
+        GraphVerifier gv(stream.nodes());
+        start = std::chrono::high_resolution_clock::now();
+
+        for (int i = 0; i < edgecount; i++) {
+            GraphUpdate update = stream.get_edge();
+            hybrid_driver.update(update);
+            gv.edge_update(update.edge);
+            unlikely_if(i%1000 == 0 || i == edgecount-1) {
+                std::vector<std::set<node_id_t>> cc = hybrid_driver.cc_query();
+                try {
+                    // gv.reset_cc_state();
+                    gv.verify_cc_from_component_set(cc);
+                    std::cout << "Update " << i << ", CCs correct." << std::endl;
+                } catch (IncorrectCCException& e) {
+                    std::cout << "Incorrect connected components found at update "  << i << std::endl;
+		            std::cout << "GOT: " << cc.size() << std::endl;
+                    std::cout << "EXPECTED: " << gv.get_num_kruskal_ccs() << std::endl;
+                    FAIL();
+                }
+            }
+        }
+        std::ofstream file;
+        file.open ("omp_kron_results.txt", std::ios_base::app);
+        file << stream_file << " passed correctness test." << std::endl;
+        file.close();
+
+    } catch (BadStreamException& e) {
+        std::cout << "ERROR: Stream binary file not found." << std::endl;
+    }
+}
+
+TEST(HybridGraphTiersSuite, omp_speed_test) {
+    // omp_set_dynamic(1);
+    try {
+	    long time = 0;
+        BinaryGraphStream stream(stream_file, 100000);
+
+        // height_factor = 1;//1./log2(log2(stream.nodes()));
+        height_factor = 1/log2(log2(stream.nodes()));
+        sketch_len = Sketch::calc_vector_length(stream.nodes());
+        sketch_err = DEFAULT_SKETCH_ERR;
+
+        std::random_device dev;
+        std::mt19937 rng(dev());
+        std::uniform_int_distribution<std::mt19937::result_type> dist(0,MAX_INT);
+        uint64_t seed = dist(rng);
+        uint32_t num_tiers = compute_num_tiers(stream.nodes());
+        HybridConnectivityManager<GraphTierSystem> hybrid_driver(
+            stream.nodes(), num_tiers, update_batch_size, seed
+        );
+        int edgecount = stream.edges();
+        start = std::chrono::high_resolution_clock::now();
+
+	    START(timer);
+        for (int i = 0; i < edgecount; i++) {
+            GraphUpdate update = stream.get_edge();
+            hybrid_driver.update(update);
+            unlikely_if (i % 1000000000 == 0) {
+                auto stop = std::chrono::high_resolution_clock::now();
+                auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(stop - start);
+                std::cout << "FINISHED UPDATE " << i << " OUT OF " << edgecount << " IN " << stream_file << std::endl;
+            }
+        }
+	    STOP(time, timer);
+        print_metrics();
+        std::ofstream file;
+        file.open ("omp_kron_results.txt", std::ios_base::app);
+        file << stream_file << " time (ms): "<< time/1000 << std::endl;
+        file.close();
+
+    } catch (BadStreamException& e) {
+        std::cout << "ERROR: Stream binary file not found." << std::endl;
+    }
+}
+
+TEST(HybridGraphTiersSuite, query_speed_test) {
+    // omp_set_dynamic(1);
+    try {
+
+        BinaryGraphStream stream(stream_file, 100000);
+
+        height_factor = 1/log2(log2(stream.nodes()));
+        sketch_len = Sketch::calc_vector_length(stream.nodes());
+        sketch_err = DEFAULT_SKETCH_ERR;
+        
+        int nodecount = stream.nodes();
+
+        std::random_device dev;
+        std::mt19937 rng(dev());
+        std::uniform_int_distribution<std::mt19937::result_type> dist(0,MAX_INT);
+        uint64_t sketch_seed = dist(rng);
+        uint32_t num_tiers = compute_num_tiers(nodecount);
+        HybridConnectivityManager<GraphTierSystem> hybrid_driver(
+            nodecount, num_tiers, update_batch_size, sketch_seed
+        );
+        int edgecount = 150000;
+
+        std::cout << "Building up graph..." <<  std::endl;
+        for (int i = 0; i < edgecount; i++) {
+            GraphUpdate update = stream.get_edge();
+            hybrid_driver.update(update);
+        }
+
+        int querycount = 1000000;
+        int seed = time(NULL);
+        srand(seed);
+        std::cout << "Performing queries..." << std::endl;
+        auto start = std::chrono::high_resolution_clock::now();
+        for (int i = 0; i < querycount; i++) {
+            hybrid_driver.connectivity_query(rand()%nodecount, rand()%nodecount);
+        }
+        auto stop = std::chrono::high_resolution_clock::now();
+        auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(stop - start);
+        std::cout << querycount << " Connectivity Queries, Time:  " << duration.count() << std::endl;
+        start = std::chrono::high_resolution_clock::now();
+        for (int i = 0; i < querycount/100; i++) {
+            hybrid_driver.cc_query();
+        }
+        stop = std::chrono::high_resolution_clock::now();
+        duration = std::chrono::duration_cast<std::chrono::milliseconds>(stop - start);
+        std::cout << querycount/100 << " Connected Components Queries, Time:  " << duration.count() << std::endl;
+
+
+    } catch (BadStreamException& e) {
+        std::cout << "ERROR: Stream binary file not found." << std::endl;
+    }
+}
diff --git a/test/hybrid_test_runner.cpp b/test/hybrid_test_runner.cpp
new file mode 100644
index 0000000..99a458e
--- /dev/null
+++ b/test/hybrid_test_runner.cpp
@@ -0,0 +1,28 @@
+#include <mpi.h>
+#include <gtest/gtest.h>
+#include "util.h"
+
+
+std::string stream_file;
+int hybrid_threshold_arg;
+int batch_size_arg;
+double height_factor_arg;
+
+int main(int argc, char** argv) {
+  MPI_Init(&argc, &argv);
+  
+  if (argc < 5) {
+    std::cerr << "INCORRECT NUMBER OF ARGUMENTS." << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  stream_file = argv[1];
+  batch_size_arg = atoi(argv[2]);
+  height_factor_arg = atof(argv[3]);
+  hybrid_threshold_arg = atoi(argv[4]);
+
+  testing::InitGoogleTest(&argc, argv);
+  int ret = RUN_ALL_TESTS(); 
+  MPI_Finalize();
+  return ret; 
+}
diff --git a/test/hybrid_tests.cpp b/test/hybrid_tests.cpp
new file mode 100644
index 0000000..3dde5de
--- /dev/null
+++ b/test/hybrid_tests.cpp
@@ -0,0 +1,855 @@
+#include <gtest/gtest.h>
+#include <chrono>
+#include <signal.h>
+#include <unordered_map>
+#include <random>
+#include <iostream>
+#include <fstream>
+// #include <omp.h>
+#include "mpi_nodes.h"
+#include "binary_graph_stream.h"
+// #include "mat_graph_verifier.h"
+#include "graph_verifier.h"
+#include "mpi_hybrid_conn.h"
+#include "util.h"
+
+
+const int DEFAULT_BATCH_SIZE = 100;
+const int DEFAULT_HYBRID_THRESHOLD = 1400;
+const vec_t DEFAULT_SKETCH_ERR = 1;
+
+// TEST(GraphTierSuite, hybrid_mixed_speed_test) {
+//     int world_rank_buf;
+//     MPI_Comm_rank(MPI_COMM_WORLD, &world_rank_buf);
+//     uint32_t world_rank = world_rank_buf;
+//     int world_size_buf;
+//     MPI_Comm_size(MPI_COMM_WORLD, &world_size_buf);
+//     uint32_t world_size = world_size_buf;
+
+//     BinaryGraphStream stream(stream_file, 100000);
+//     uint32_t num_nodes = stream.nodes();
+//     uint32_t num_tiers = log2(num_nodes)/(log2(3)-1);
+
+//     // Parameters
+//     int update_batch_size = (batch_size_arg==0) ? DEFAULT_BATCH_SIZE : batch_size_arg;
+//     height_factor = (height_factor_arg==0) ? 1./log2(log2(num_nodes)) : height_factor_arg;
+//     sketchless_height_factor = height_factor;
+//     sketch_len = Sketch::calc_vector_length(num_nodes);
+// 	sketch_err = DEFAULT_SKETCH_ERR;
+
+//     std::cout << "BATCH SIZE: " << update_batch_size << " HEIGHT FACTOR " << height_factor << " SKETCH BUFFER: " << SKETCH_BUFFER_SIZE << std::endl;
+
+//     // Seeds
+//     std::random_device dev;
+//     std::mt19937 rng(dev());
+//     std::uniform_int_distribution<std::mt19937::result_type> dist(0,MAX_INT);
+//     int seed = dist(rng);
+//     bcast(&seed, sizeof(int), 0);
+//     std::cout << "SEED: " << seed << std::endl;
+//     rng.seed(seed);
+//     for (int i = 0; i < world_rank; i++)
+//         dist(rng);
+//     int tier_seed = dist(rng);
+
+//     if (world_size != num_tiers+1)
+//         FAIL() << "MPI world size too small for graph with " << num_nodes << " vertices. Correct world size is: " << num_tiers+1;
+
+//     if (world_rank == 0) {
+//         int seed = time(NULL);
+//         srand(seed);
+//         std::cout << "InputNode seed: " << seed << std::endl;
+//         InputNode input_node(num_nodes, num_tiers, update_batch_size, seed);
+//         long edgecount = stream.edges();
+//         // long count = 100000000;
+//         // edgecount = std::min(edgecount, count);
+//         long total_update_time = 0;
+//         long total_query_time = 0;
+//         auto update_timer = std::chrono::high_resolution_clock::now();
+//         auto query_timer = update_timer;
+//         bool doing_updates = true;
+//         for (long i = 0; i < edgecount; i++) {
+//             // Read an update from the stream and have the input node process it
+//             GraphUpdate operation = stream.get_edge();
+//             if (operation.type == 2) { // 2 is the symbol for queries
+//                 unlikely_if (doing_updates) {
+//                     total_update_time += std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - update_timer).count();
+//                     doing_updates = false;
+//                     query_timer = std::chrono::high_resolution_clock::now();
+//                 }
+//                 input_node.connectivity_query(operation.edge.src, operation.edge.dst);
+//             } else {
+//                 unlikely_if (!doing_updates) {
+//                     total_query_time += std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - query_timer).count();
+//                     doing_updates = true;
+//                     update_timer = std::chrono::high_resolution_clock::now();
+//                 }
+//                 input_node.update(operation);
+//             }
+//             unlikely_if(i%1000000 == 0 || i == edgecount-1) {
+//                 std::cout << "FINISHED OPERATION " << i << " OUT OF " << edgecount << " IN " << stream_file << std::endl;
+//             }
+//         }
+//         if (doing_updates) {
+//             total_update_time += std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - update_timer).count();
+//         } else {
+//             total_query_time += std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - query_timer).count();
+//         }
+//         // Communicate to all other nodes that the stream has ended
+//         input_node.end();
+//         std::cout << "Total update time(ms):   " << (total_update_time/1000) << std::endl;
+//         std::cout << "Total query time(ms):    " << (total_query_time/1000) << std::endl;
+//         std::cout << "Total time(ms):    " << (total_query_time + total_update_time)/1000 << std::endl;
+
+//         std::ofstream file;
+//         std::string out_file = "./../results/mpi_speed_results/" + stream_file.substr(stream_file.find("/") + 1) + ".txt";
+//         std::cout << "WRITING RESULTS TO " << out_file << std::endl;
+//         file.open (out_file, std::ios_base::app);
+//         file << " UPDATES/SECOND: " << (0.9*edgecount)/(total_update_time) << std::endl;
+//         file << " QUERIES/SECOND: " << (0.1*edgecount)/(total_query_time) << std::endl;
+//         file.close();
+
+//     } else if (world_rank < num_tiers+1) {
+//         int tier_num = world_rank-1;
+//         TierNode tier_node(num_nodes, tier_num, num_tiers, update_batch_size, tier_seed);
+//         tier_node.main();
+//     }
+// }
+
+TEST(GraphTierSuite, hybrid_update_speed_test) {
+    int world_rank_buf;
+    MPI_Comm_rank(MPI_COMM_WORLD, &world_rank_buf);
+    uint32_t world_rank = world_rank_buf;
+    int world_size_buf;
+    MPI_Comm_size(MPI_COMM_WORLD, &world_size_buf);
+    uint32_t world_size = world_size_buf;
+
+    BinaryGraphStream stream(stream_file, 100000);
+    uint32_t num_nodes = stream.nodes();
+    uint32_t num_tiers = log2(num_nodes)/(log2(3)-1);
+
+    // Parameters
+    int update_batch_size = (batch_size_arg==0) ? DEFAULT_BATCH_SIZE : batch_size_arg;
+    int threshold = (batch_size_arg==0) ? DEFAULT_HYBRID_THRESHOLD : hybrid_threshold_arg;
+    height_factor = (height_factor_arg==0) ? 1./log2(log2(num_nodes)) : height_factor_arg;
+    sketchless_height_factor = height_factor;
+    sketch_len = Sketch::calc_vector_length(num_nodes);
+	sketch_err = DEFAULT_SKETCH_ERR;
+
+    std::cout << "BATCH SIZE: " << update_batch_size << " HEIGHT FACTOR " << height_factor << std::endl;
+
+    // Seeds
+    std::random_device dev;
+    std::mt19937 rng(dev());
+    std::uniform_int_distribution<std::mt19937::result_type> dist(0,MAX_INT);
+    int seed = dist(rng);
+    bcast(&seed, sizeof(int), 0);
+    std::cout << "SEED: " << seed << std::endl;
+    rng.seed(seed);
+    for (int i = 0; i < world_rank; i++)
+        dist(rng);
+    int tier_seed = dist(rng);
+
+    if (world_size != num_tiers+1)
+        FAIL() << "MPI world size too small for graph with " << num_nodes << " vertices. Correct world size is: " << num_tiers+1;
+
+    if (world_rank == 0) {
+        int seed = time(NULL);
+        srand(seed);
+        std::cout << "InputNode seed: " << seed << std::endl;
+        // InputNode input_node(num_nodes, num_tiers, update_batch_size, seed);
+        HybridConnectivityManager<> hybrid_manager(
+            num_nodes, num_tiers, update_batch_size, seed
+        );
+        hybrid_manager.set_threshold(threshold);
+        long edgecount = stream.edges();
+        // long count = 100000000;
+        // edgecount = std::min(edgecount, count);
+        auto X = std::chrono::high_resolution_clock::now();
+        for (long i = 0; i < edgecount; i++) {
+            // Read an update from the stream and have the input node process it
+            GraphUpdate update = stream.get_edge();
+            hybrid_manager.update(update);
+            unlikely_if(i%1000000 == 0 || i == edgecount-1) {
+                std::cout << "FINISHED UPDATE " << i << " OUT OF " << edgecount << " IN " << stream_file << std::endl;
+                // std::cout << "Memory usage: " << hybrid_manager.cf_algo.getMemUsage() / 1000000 << std::endl;
+                std::cout << "Sketched nodes: " << hybrid_manager.num_sketched_vertices() << " out of " << num_nodes << std::endl;
+            }
+        }
+        // Communicate to all other nodes that the stream has ended
+        hybrid_manager.sketching_algo.end();
+        auto time = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - X).count();
+        std::cout << "Total time(ms): " << (time/1000) << std::endl;
+
+        std::ofstream file;
+        file.open ("./../results/mpi_update_results.txt", std::ios_base::app);
+        file << stream_file << " UPDATES/SECOND: " << edgecount/(time/1000)*1000 << std::endl;
+        file.close();
+
+    } else if (world_rank < num_tiers+1) {
+        int tier_num = world_rank-1;
+        TierNode tier_node(num_nodes, tier_num, num_tiers, update_batch_size, tier_seed);
+        tier_node.main();
+    }
+}
+
+TEST(GraphTiersSuite, hybrid_query_speed_test) {
+    int world_rank_buf;
+    MPI_Comm_rank(MPI_COMM_WORLD, &world_rank_buf);
+    uint32_t world_rank = world_rank_buf;
+    int world_size_buf;
+    MPI_Comm_size(MPI_COMM_WORLD, &world_size_buf);
+    uint32_t world_size = world_size_buf;
+
+    BinaryGraphStream stream(stream_file, 1000000);
+    uint32_t num_nodes = stream.nodes();
+    uint32_t num_tiers = log2(num_nodes)/(log2(3)-1);
+    int nodecount = stream.nodes();
+    int edgecount = stream.edges();
+    if (edgecount > 100000000) edgecount = 100000000;
+
+    // Parameters
+    int update_batch_size = (batch_size_arg==0) ? DEFAULT_BATCH_SIZE : batch_size_arg;
+    int threshold = (batch_size_arg==0) ? DEFAULT_HYBRID_THRESHOLD : hybrid_threshold_arg;
+    height_factor = (height_factor_arg==0) ? 1./log2(log2(num_nodes)) : height_factor_arg;
+	sketchless_height_factor = height_factor;
+    sketch_len = Sketch::calc_vector_length(num_nodes);
+	sketch_err = DEFAULT_SKETCH_ERR;
+
+    // Seeds
+    std::random_device dev;
+    std::mt19937 rng(dev());
+    std::uniform_int_distribution<std::mt19937::result_type> dist(0,MAX_INT);
+    int seed = dist(rng);
+    bcast(&seed, sizeof(int), 0);
+    std::cout << "SEED: " << seed << std::endl;
+    rng.seed(seed);
+    for (int i = 0; i < world_rank; i++)
+        dist(rng);
+    int tier_seed = dist(rng);
+
+    if (world_size != num_tiers+1)
+        FAIL() << "MPI world size too small for graph with " << num_nodes << " vertices. Correct world size is: " << num_tiers+1;
+
+    if (world_rank == 0) {
+        int seed = time(NULL);
+        srand(seed);
+        std::cout << "InputNode seed: " << seed << std::endl;
+        // InputNode input_node(num_nodes, num_tiers, update_batch_size, seed);
+        HybridConnectivityManager hybrid_driver(
+            num_nodes, num_tiers, update_batch_size, seed
+        );
+        hybrid_driver.set_threshold(threshold);
+
+        long total_time = 0;
+        for (int batch = 0; batch < 10; batch++) {
+            std::cout << stream_file << " update batch " << batch <<  std::endl;
+            for (int i = 0; i < edgecount/10; i++) {
+                GraphUpdate update = stream.get_edge();
+                hybrid_driver.update(update);
+            }
+
+            long querycount = 100000000;
+
+            std::cout << "Performing queries..." << std::endl;
+            auto X = std::chrono::high_resolution_clock::now();
+            for (int i = 0; i < querycount; i++) {
+                hybrid_driver.connectivity_query(rand()%nodecount, rand()%nodecount);
+            }
+            auto time = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - X).count();
+            std::cout << querycount << " Connectivity Queries, Time (ms):  " << time/1000 << std::endl;
+            total_time += time;
+        }
+        hybrid_driver.sketching_algo.end();
+
+        std::cout << "TOTAL TIME(ms): " << total_time/1000 << std::endl;
+        std::cout << "QUERIES/SECOND: " << 1000000000/(total_time/1000)*1000 << std::endl;
+        std::ofstream file;
+        file.open ("./../results/mpi_query_results.txt", std::ios_base::app);
+        file << stream_file << " QUERIES/SECOND: " << 1000000000/(total_time/1000)*1000 << std::endl;
+        file.close();
+
+    } else if (world_rank < num_tiers+1) {
+        int tier_num = world_rank-1;
+        TierNode tier_node(num_nodes, world_rank-1, num_tiers, update_batch_size, tier_seed);
+        tier_node.main();
+    }
+}
+
+TEST(GraphTierSuite, hybrid_memory_test) {
+    int world_rank_buf;
+    MPI_Comm_rank(MPI_COMM_WORLD, &world_rank_buf);
+    uint32_t world_rank = world_rank_buf;
+    int world_size_buf;
+    MPI_Comm_size(MPI_COMM_WORLD, &world_size_buf);
+    uint32_t world_size = world_size_buf;
+
+    BinaryGraphStream stream(stream_file, 100000);
+    uint32_t num_nodes = stream.nodes();
+    // uint32_t num_tiers = log2(num_nodes)/(log2(3)-1);
+    std::cout << "Theory-informed number of tiers: " << log2(num_nodes)/(log2(3)-1) << std::endl;
+    // TEMPORARY CHANGE - MAKE THE USER DECIDE WORLD SIZE
+    uint32_t num_tiers = world_size-1;
+    std::cout << "Using number of tiers: " << num_tiers << std::endl;
+
+    // Parameters
+    int update_batch_size = (batch_size_arg==0) ? DEFAULT_BATCH_SIZE : batch_size_arg;
+    int threshold = (batch_size_arg==0) ? DEFAULT_HYBRID_THRESHOLD : hybrid_threshold_arg;
+    height_factor = (height_factor_arg==0) ? 1./log2(log2(num_nodes)) : height_factor_arg;
+    sketchless_height_factor = height_factor;
+    sketch_len = Sketch::calc_vector_length(num_nodes);
+	sketch_err = DEFAULT_SKETCH_ERR;
+
+    std::cout << "BATCH SIZE: " << update_batch_size << " HEIGHT FACTOR " << height_factor << std::endl;
+
+    // Seeds
+    std::random_device dev;
+    std::mt19937 rng(dev());
+    std::uniform_int_distribution<std::mt19937::result_type> dist(0,MAX_INT);
+    int seed = dist(rng);
+    bcast(&seed, sizeof(int), 0);
+    std::cout << "SEED: " << seed << std::endl;
+    rng.seed(seed);
+    for (int i = 0; i < world_rank; i++)
+        dist(rng);
+    int tier_seed = dist(rng);
+
+    if (world_size != num_tiers+1)
+        FAIL() << "MPI world size too small for graph with " << num_nodes << " vertices. Correct world size is: " << num_tiers+1;
+
+    if (world_rank == 0) {
+        int seed = time(NULL);
+        srand(seed);
+        std::cout << "InputNode seed: " << seed << std::endl;
+        // InputNode input_node(num_nodes, num_tiers, update_batch_size, seed);
+        HybridConnectivityManager<> hybrid_manager(
+            num_nodes, num_tiers, update_batch_size, seed
+        );
+        hybrid_manager.set_threshold(threshold);
+        long edgecount = stream.edges();
+        // long count = 100000000;
+        // edgecount = std::min(edgecount, count);
+        auto X = std::chrono::high_resolution_clock::now();
+        for (long i = 0; i < edgecount; i++) {
+            // Read an update from the stream and have the input node process it
+            GraphUpdate update = stream.get_edge();
+            hybrid_manager.update(update);
+            unlikely_if(i%1000000 == 0 || i == edgecount-1) {
+                std::cout << "FINISHED UPDATE " << i << " OUT OF " << edgecount << " IN " << stream_file << std::endl;
+                // std::cout << "Memory usage: " << hybrid_manager.cf_algo.getMemUsage() / 1000000 << std::endl;
+                std::cout << "Sketched nodes: " << hybrid_manager.num_sketched_vertices() << " out of " << num_nodes << std::endl;
+            if (i%20000000 == 0 || i == edgecount-1) {
+                std::cout << "Sketched nodes: " << hybrid_manager.sketched_node_count() << " out of " << stream.nodes() << std::endl;
+                std::cout << "-  Space usage of CF: " << hybrid_manager.get_space_usage_cf()/(1024*1024) << " MB" << std::endl;
+                std::cout << "-  Space usage of Driver: " << hybrid_manager.get_space_usage_driver()/(1024*1024) << " MB" << std::endl;
+                std::cout << "-  Space usage of Sketches: " << hybrid_manager.space_usage_conn_sketch()/(1024*1024) << " MB" << std::endl;
+                std::cout << "-  Space usage of Recovery Sketches: " << hybrid_manager.space_usage_recovery_sketch()/(1024*1024) << " MB" << std::endl;
+                std::cout << "-  Total edges: " << hybrid_manager.total_edges() << std::endl;
+                std::cout << "-  Sketched edges: " << hybrid_manager.num_sketched_edges() << std::endl;
+                double percent_sketched = 100.0 * ((double)hybrid_manager.num_sketched_edges()) / ((double)hybrid_manager.total_edges());
+                std::cout << "-  Percent sketched edges: " << percent_sketched << "%" << std::endl;
+            }
+            }
+        }
+        // Communicate to all other nodes that the stream has ended
+        hybrid_manager.sketching_algo.end();
+        auto time = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - X).count();
+        std::cout << "Total time(ms): " << (time/1000) << std::endl;
+
+        std::ofstream file;
+        file.open ("./../results/mpi_update_results.txt", std::ios_base::app);
+        file << stream_file << " UPDATES/SECOND: " << edgecount/(time/1000)*1000 << std::endl;
+        file.close();
+
+    } else if (world_rank < num_tiers+1) {
+        int tier_num = world_rank-1;
+        TierNode tier_node(num_nodes, tier_num, num_tiers, update_batch_size, tier_seed);
+        tier_node.main();
+    }
+}
+
+TEST(GraphTiersSuite, hybrid_mini_correctness_test) {
+    int world_rank_buf;
+    MPI_Comm_rank(MPI_COMM_WORLD, &world_rank_buf);
+    uint32_t world_rank = world_rank_buf;
+    int world_size_buf;
+    MPI_Comm_size(MPI_COMM_WORLD, &world_size_buf);
+    uint32_t world_size = world_size_buf;
+
+    uint32_t num_nodes = 100;
+    uint32_t num_tiers = log2(num_nodes)/(log2(3)-1);
+    if (world_size != num_tiers+1)
+        FAIL() << "MPI world size too small for graph with " << num_nodes << " vertices. Correct world size is: " << num_tiers+1;
+    // Parameters
+    int update_batch_size = 1;
+    height_factor = 1;
+    sketch_len = Sketch::calc_vector_length(num_nodes);
+	sketch_err = DEFAULT_SKETCH_ERR;
+
+    // Seeds
+    std::random_device dev;
+    std::mt19937 rng(dev());
+    std::uniform_int_distribution<std::mt19937::result_type> dist(0,MAX_INT);
+    int seed = dist(rng);
+    bcast(&seed, sizeof(int), 0);
+    std::cout << "SEED: " << seed << std::endl;
+    rng.seed(seed);
+    for (int i = 0; i < world_rank; i++)
+        dist(rng);
+    int tier_seed = dist(rng);
+
+    if (world_rank == 0) {
+        int seed = time(NULL);
+        srand(seed);
+        std::cout << "InputNode seed: " << seed << std::endl;
+        // InputNode input_node(num_nodes, num_tiers, update_batch_size, seed);
+        // 
+        HybridConnectivityManager hybrid_driver(
+            num_nodes, num_tiers, update_batch_size, seed
+        );
+        GraphVerifier gv(num_nodes);
+        // Link all of the nodes into 1 connected component
+        for (node_id_t i = 0; i < num_nodes-1; i++) {
+            hybrid_driver.update({{i, i+1}, INSERT});
+            gv.edge_update({i,i+1});
+            std::cout << "Attempting query" << std::endl;
+            std::vector<std::set<node_id_t>> cc = hybrid_driver.cc_query();
+            try {
+                // gv.reset_cc_state();
+                gv.verify_cc_from_component_set(cc);
+            } catch (IncorrectCCException& e) {
+                std::cout << "Incorrect cc found after linking nodes " << i << " and " << i+1 << std::endl;
+                std::cout << "GOT: " << cc.size() << " components, EXPECTED: " << num_nodes-i-1 << " components" << std::endl;
+                FAIL();
+            }
+        }
+        // One by one cut all of the nodes into singletons
+        for (node_id_t i = 0; i < num_nodes-1; i++) {
+            hybrid_driver.update({{i, i+1}, DELETE});
+            gv.edge_update({i,i+1});
+            std::vector<std::set<node_id_t>> cc = hybrid_driver.cc_query();
+            try {
+                // gv.reset_cc_state();
+                gv.verify_cc_from_component_set(cc);
+            } catch (IncorrectCCException& e) {
+                std::cout << "Incorrect cc found after cutting nodes " << i << " and " << i+1 << std::endl;
+                std::cout << "GOT: " << cc.size() << " components, EXPECTED: " << i+2 << " components" << std::endl;
+                FAIL();
+            }
+        }
+        // Communicate to all other nodes that the stream has ended
+        hybrid_driver.sketching_algo.end();
+    } else if (world_rank < num_tiers+1) {
+        int tier_num = world_rank-1;
+        TierNode tier_node(num_nodes, tier_num, num_tiers, update_batch_size, tier_seed);
+        tier_node.main();
+    }
+}
+
+TEST(GraphTiersSuite, hybrid_small_correctness_test) {
+    int world_rank_buf;
+    MPI_Comm_rank(MPI_COMM_WORLD, &world_rank_buf);
+    uint32_t world_rank = world_rank_buf;
+    int world_size_buf;
+    MPI_Comm_size(MPI_COMM_WORLD, &world_size_buf);
+    uint32_t world_size = world_size_buf;
+
+    uint32_t num_nodes = 512;
+
+    uint32_t num_tiers = log2(num_nodes)/(log2(3)-1);
+    if (world_size != num_tiers+1)
+        FAIL() << "MPI world size too small for graph with " << num_nodes << " vertices. Correct world size is: " << num_tiers+1;
+    // Parameters
+    int update_batch_size = 1;
+    height_factor = 1;
+    sketch_len = Sketch::calc_vector_length(num_nodes);
+	sketch_err = DEFAULT_SKETCH_ERR;
+
+    // Seeds
+    std::random_device dev;
+    std::mt19937 rng(dev());
+    std::uniform_int_distribution<std::mt19937::result_type> dist(0,MAX_INT);
+    int seed = dist(rng);
+    bcast(&seed, sizeof(int), 0);
+    std::cout << "SEED: " << seed << std::endl;
+    rng.seed(seed);
+    for (int i = 0; i < world_rank; i++)
+        dist(rng);
+    int tier_seed = dist(rng);
+
+    if (world_rank == 0) {
+        int seed = time(NULL);
+        srand(seed);
+        std::cout << "InputNode seed: " << seed << std::endl;
+        // InputNode input_node(num_nodes, num_tiers, update_batch_size, seed);
+        // 
+        HybridConnectivityManager hybrid_driver(
+            num_nodes, num_tiers, update_batch_size, seed
+        );
+        hybrid_driver.set_threshold(10);
+        GraphVerifier gv(num_nodes);
+        // Link all of the nodes into 1 connected component
+        for (node_id_t i = 0; i < num_nodes-1; i++) {
+            hybrid_driver.update({{i, i+1}, INSERT});
+            gv.edge_update({i,i+1});
+            // std::cout << "Attempting query" << std::endl;
+            std::vector<std::set<node_id_t>> cc = hybrid_driver.cc_query();
+            try {
+                // gv.reset_cc_state();
+                gv.verify_cc_from_component_set(cc);
+            } catch (IncorrectCCException& e) {
+                std::cout << "Incorrect cc found after linking nodes " << i << " and " << i+1 << std::endl;
+                std::cout << "GOT: " << cc.size() << " components, EXPECTED: " << num_nodes-i-1 << " components" << std::endl;
+                FAIL();
+            }
+        }
+        // augment first few nodes so that they are hubs for the first half of the nodes.
+        node_id_t hub_nodes = 25;
+        for (node_id_t i=0; i < hub_nodes; i++) {
+            // don't insert any edges that already exist: 
+            for (node_id_t j = hub_nodes+2; j < num_nodes/2; j++) {
+                hybrid_driver.update({{i, j}, INSERT});
+                gv.edge_update({i,j});
+            }
+            std::vector<std::set<node_id_t>> cc = hybrid_driver.cc_query();
+            try {
+                // gv.reset_cc_state();
+                gv.verify_cc_from_component_set(cc);
+            } catch (IncorrectCCException& e) {
+                std::cout << "Incorrect cc found after cutting nodes " << i << " and " << i+1 << std::endl;
+                std::cout << "GOT: " << cc.size() << " components, EXPECTED: " << i+2 << " components" << std::endl;
+                FAIL();
+            }
+        }
+        std::cout << "Number of sketched nodes: " << hybrid_driver.num_sketched_vertices() << std::endl;
+        for (node_id_t i=0; i < hub_nodes; i++) {
+            for (node_id_t j = hub_nodes+2; j < num_nodes/2; j++) {
+                hybrid_driver.update({{i, j}, DELETE});
+                gv.edge_update({i,j});
+            }
+            std::vector<std::set<node_id_t>> cc = hybrid_driver.cc_query();
+            try {
+                // gv.reset_cc_state();
+                gv.verify_cc_from_component_set(cc);
+            } catch (IncorrectCCException& e) {
+                std::cout << "Incorrect cc found after cutting nodes " << i << " and " << i+1 << std::endl;
+                std::cout << "GOT: " << cc.size() << " components, EXPECTED: " << i+2 << " components" << std::endl;
+                FAIL();
+            }
+        }
+        std::cout << "Number of sketched nodes: " << hybrid_driver.num_sketched_vertices() << std::endl;
+        
+        // One by one cut all of the nodes into singletons
+        for (node_id_t i = 0; i < num_nodes-1; i++) {
+            hybrid_driver.update({{i, i+1}, DELETE});
+            gv.edge_update({i,i+1});
+            std::vector<std::set<node_id_t>> cc = hybrid_driver.cc_query();
+            try {
+                // gv.reset_cc_state();
+                gv.verify_cc_from_component_set(cc);
+            } catch (IncorrectCCException& e) {
+                std::cout << "Incorrect cc found after cutting nodes " << i << " and " << i+1 << std::endl;
+                std::cout << "GOT: " << cc.size() << " components, EXPECTED: " << i+2 << " components" << std::endl;
+                FAIL();
+            }
+        }
+        // Communicate to all other nodes that the stream has ended
+        hybrid_driver.sketching_algo.end();
+    } else if (world_rank < num_tiers+1) {
+        int tier_num = world_rank-1;
+        TierNode tier_node(num_nodes, tier_num, num_tiers, update_batch_size, tier_seed);
+        tier_node.main();
+    }
+}
+
+// TEST(GraphTiersSuite, hybrid_mini_replacement_test) {
+//     int world_rank_buf;
+//     MPI_Comm_rank(MPI_COMM_WORLD, &world_rank_buf);
+//     uint32_t world_rank = world_rank_buf;
+//     int world_size_buf;
+//     MPI_Comm_size(MPI_COMM_WORLD, &world_size_buf);
+//     uint32_t world_size = world_size_buf;
+
+//     uint32_t num_nodes = 100;
+//     uint32_t num_tiers = log2(num_nodes)/(log2(3)-1);
+//     if (world_size != num_tiers+1)
+//         FAIL() << "MPI world size too small for graph with " << num_nodes << " vertices. Correct world size is: " << num_tiers+1;
+//     // Parameters
+//     int update_batch_size = 1;
+//     height_factor = 1;
+//     sketch_len = Sketch::calc_vector_length(num_nodes);
+// 	sketch_err = DEFAULT_SKETCH_ERR;
+
+//     // Seeds
+//     std::random_device dev;
+//     std::mt19937 rng(dev());
+//     std::uniform_int_distribution<std::mt19937::result_type> dist(0,MAX_INT);
+//     int seed = dist(rng);
+//     bcast(&seed, sizeof(int), 0);
+//     std::cout << "SEED: " << seed << std::endl;
+//     rng.seed(seed);
+//     for (int i = 0; i < world_rank; i++)
+//         dist(rng);
+//     int tier_seed = dist(rng);
+
+//     if (world_rank == 0) {
+//         int seed = time(NULL);
+//         srand(seed);
+//         std::cout << "InputNode seed: " << seed << std::endl;
+//         InputNode input_node(num_nodes, num_tiers, update_batch_size, seed);
+//         GraphVerifier gv(num_nodes);
+//         // Link all of the nodes into 1 connected component
+//         for (node_id_t i = 0; i < num_nodes-1; i++) {
+//             input_node.update({{i, i+1}, INSERT});
+//             gv.edge_update({i,i+1});
+//             std::vector<std::set<node_id_t>> cc = input_node.cc_query();
+//             try {
+//                 // gv.reset_cc_state();
+//                 gv.verify_cc_from_component_set(cc);
+//             } catch (IncorrectCCException& e) {
+//                 std::cout << "Incorrect cc found after linking nodes " << i << " and " << i+1 << std::endl;
+//                 std::cout << "GOT: " << cc.size() << " components, EXPECTED: " << num_nodes-i-1 << " components" << std::endl;
+//                 FAIL();
+//             }
+//         }
+//         // Generate a random bridge
+//         node_id_t first = rand() % num_nodes;
+//         node_id_t second = rand() % num_nodes;
+//         while(first == second || second == first+1 || first == second+1)
+//             second = rand() % num_nodes;
+//         input_node.update({{first, second}, INSERT});
+//         gv.edge_update({first, second});
+//         node_id_t distance = std::max(first, second) - std::min(first, second);
+//         // Cut a random edge that should be replaced by the bridge
+//         first = std::min(first, second) + rand() % (distance-1);
+//         input_node.update({{first, first+1}, DELETE});
+//         gv.edge_update({first, first+1});
+//         // Check the coonected components
+//         std::vector<std::set<node_id_t>> cc = input_node.cc_query();
+//         try {
+//             // gv.reset_cc_state();
+//             gv.verify_cc_from_component_set(cc);
+//         } catch (IncorrectCCException& e) {
+//             std::cout << "Incorrect cc found after cutting nodes " << first << " and " << first+1 << std::endl;
+//             std::cout << "GOT: " << cc.size() << " components, EXPECTED: 1 components" << std::endl;
+//             FAIL();
+//         }
+//         // Communicate to all other nodes that the stream has ended
+//         input_node.end();
+//     } else if (world_rank < num_tiers+1) {
+//         int tier_num = world_rank-1;
+//         TierNode tier_node(num_nodes, tier_num, num_tiers, update_batch_size, tier_seed);
+//         tier_node.main();
+//     }
+// }
+
+// TEST(GraphTiersSuite, hybrid_mini_batch_test) {
+//     int world_rank_buf;
+//     MPI_Comm_rank(MPI_COMM_WORLD, &world_rank_buf);
+//     uint32_t world_rank = world_rank_buf;
+//     int world_size_buf;
+//     MPI_Comm_size(MPI_COMM_WORLD, &world_size_buf);
+//     uint32_t world_size = world_size_buf;
+
+//     uint32_t num_nodes = 100;
+//     uint32_t num_tiers = log2(num_nodes)/(log2(3)-1);
+//     if (world_size != num_tiers+1)
+//         FAIL() << "MPI world size too small for graph with " << num_nodes << " vertices. Correct world size is: " << num_tiers+1;
+//     // Parameters
+//     int update_batch_size = 10;
+//     height_factor = 1;
+//     sketch_len = Sketch::calc_vector_length(num_nodes);
+// 	sketch_err = DEFAULT_SKETCH_ERR;
+
+//     // Seeds
+//     std::random_device dev;
+//     std::mt19937 rng(dev());
+//     std::uniform_int_distribution<std::mt19937::result_type> dist(0,MAX_INT);
+//     int seed = dist(rng);
+//     bcast(&seed, sizeof(int), 0);
+//     std::cout << "SEED: " << seed << std::endl;
+//     rng.seed(seed);
+//     for (int i = 0; i < world_rank; i++)
+//         dist(rng);
+//     int tier_seed = dist(rng);
+
+//     if (world_rank == 0) {
+//         int seed = time(NULL);
+//         srand(seed);
+//         std::cout << "InputNode seed: " << seed << std::endl;
+//         InputNode input_node(num_nodes, num_tiers, update_batch_size, seed);
+//         GraphVerifier gv(num_nodes);
+//         // Link all of the nodes into 1 connected component
+//         for (node_id_t i = 0; i < num_nodes-1; i++) {
+//             input_node.update({{i, i+1}, INSERT});
+//             gv.edge_update({i,i+1});
+//             std::vector<std::set<node_id_t>> cc = input_node.cc_query();
+//             try {
+//                 // gv.reset_cc_state();
+//                 gv.verify_cc_from_component_set(cc);
+//             } catch (IncorrectCCException& e) {
+//                 std::cout << "Incorrect cc found after linking nodes " << i << " and " << i+1 << std::endl;
+//                 std::cout << "GOT: " << cc.size() << " components, EXPECTED: " << num_nodes-i-1 << " components" << std::endl;
+//                 FAIL();
+//             }
+//         }
+//         // Add a batch that has no isolations
+//         input_node.process_all_updates();
+//         for (node_id_t i=0; i<(node_id_t)update_batch_size; i++) {
+//             input_node.update({{i, i+2}, INSERT});
+//             gv.edge_update({i,i+2});
+//         }
+//         // Check the coonected components
+//         std::vector<std::set<node_id_t>> cc = input_node.cc_query();
+//         try {
+//             // gv.reset_cc_state();
+//             gv.verify_cc_from_component_set(cc);
+//         } catch (IncorrectCCException& e) {
+//             std::cout << "Incorrect cc found after batch with no isolations" << std::endl;
+//             std::cout << "GOT: " << cc.size() << " components, EXPECTED: 1 components" << std::endl;
+//             FAIL();
+//         }
+//         for (node_id_t i=0; i<(node_id_t)update_batch_size; i++) {
+//             input_node.update({{i, i+2}, DELETE});
+//             gv.edge_update({i,i+2});
+//         }
+//         input_node.process_all_updates();
+//         // Add a batch that has one isolated deletion in the middle
+//         for (node_id_t i=0; i<(node_id_t)update_batch_size/2-2; i++) {
+//             input_node.update({{i, i+2}, INSERT});
+//             gv.edge_update({i,i+2});
+//         }
+//         input_node.update({{(node_id_t)update_batch_size/2, (node_id_t)update_batch_size/2+1}, DELETE});
+//         gv.edge_update({(node_id_t)update_batch_size/2, (node_id_t)update_batch_size/2+1});
+//         for (node_id_t i=(node_id_t)update_batch_size/2+1; i<(node_id_t)update_batch_size+2; i++) {
+//             input_node.update({{i, i+3}, INSERT});
+//             gv.edge_update({i,i+3});
+//         }
+//         // Check the coonected components
+//         cc = input_node.cc_query();
+//         try {
+//             // gv.reset_cc_state();
+//             gv.verify_cc_from_component_set(cc);
+//         } catch (IncorrectCCException& e) {
+//             std::cout << "Incorrect cc found after batch with one isolated deletion" << std::endl;
+//             std::cout << "GOT: " << cc.size() << " components, EXPECTED: 1 components" << std::endl;
+//             FAIL();
+//         }
+//         input_node.update({{(node_id_t)update_batch_size/2, (node_id_t)update_batch_size/2+1}, INSERT});
+//         gv.edge_update({(node_id_t)update_batch_size/2, (node_id_t)update_batch_size/2+1});
+//         input_node.process_all_updates();
+//         // Add a batch with multiple forest edge deletions
+//         for (node_id_t i=0; i<(node_id_t)update_batch_size/2-2; i++) {
+//             input_node.update({{i, i+3}, INSERT});
+//             gv.edge_update({i,i+3});
+//         }
+//         input_node.update({{2*(node_id_t)update_batch_size, 2*(node_id_t)update_batch_size+2}, INSERT}); // Add a replacement edge
+//         gv.edge_update({2*(node_id_t)update_batch_size, 2*(node_id_t)update_batch_size+2});
+//         input_node.update({{2*(node_id_t)update_batch_size+2, 2*(node_id_t)update_batch_size+3}, DELETE}); // First isolation
+//         gv.edge_update({2*(node_id_t)update_batch_size+2, 2*(node_id_t)update_batch_size+3});
+//         input_node.update({{2*(node_id_t)update_batch_size+4, 2*(node_id_t)update_batch_size+5}, DELETE}); // Non-replacing delete
+//         gv.edge_update({2*(node_id_t)update_batch_size+4, 2*(node_id_t)update_batch_size+5});
+//         input_node.update({{2*(node_id_t)update_batch_size, 2*(node_id_t)update_batch_size+1}, DELETE}); // Replacement delete
+//         gv.edge_update({2*(node_id_t)update_batch_size, 2*(node_id_t)update_batch_size+1});
+//         for (node_id_t i=(node_id_t)update_batch_size/2+1; i<(node_id_t)update_batch_size; i++) {
+//             input_node.update({{i, i+3}, INSERT});
+//             gv.edge_update({i,i+3});
+//         }
+//         // Check the coonected components
+//         cc = input_node.cc_query();
+//         try {
+//             // gv.reset_cc_state();
+//             gv.verify_cc_from_component_set(cc);
+//         } catch (IncorrectCCException& e) {
+//             std::cout << "Incorrect cc found after batch with one isolated deletion" << std::endl;
+//             std::cout << "GOT: " << cc.size() << " components, EXPECTED: 1 components" << std::endl;
+//             FAIL();
+//         }
+//         // Communicate to all other nodes that the stream has ended
+//         input_node.end();
+//     } else if (world_rank < num_tiers+1) {
+//         int tier_num = world_rank-1;
+//         TierNode tier_node(num_nodes, tier_num, num_tiers, update_batch_size, tier_seed);
+//         tier_node.main();
+//     }
+// }
+
+TEST(GraphTiersSuite, hybrid_correctness_test) {
+    int world_rank_buf;
+    MPI_Comm_rank(MPI_COMM_WORLD, &world_rank_buf);
+    uint32_t world_rank = world_rank_buf;
+    int world_size_buf;
+    MPI_Comm_size(MPI_COMM_WORLD, &world_size_buf);
+    uint32_t world_size = world_size_buf;
+
+    BinaryGraphStream stream(stream_file, 100000);
+    uint32_t num_nodes = stream.nodes();
+    uint32_t num_tiers = log2(num_nodes)/(log2(3)-1);
+    // Parameters
+    int update_batch_size = DEFAULT_BATCH_SIZE;
+    height_factor = 1./log2(log2(num_nodes));
+    sketch_len = Sketch::calc_vector_length(num_nodes);
+	sketch_err = DEFAULT_SKETCH_ERR;
+
+    // Seeds
+    std::random_device dev;
+    std::mt19937 rng(dev());
+    std::uniform_int_distribution<std::mt19937::result_type> dist(0,MAX_INT);
+    int seed = dist(rng);
+    bcast(&seed, sizeof(int), 0);
+    std::cout << "SEED: " << seed << std::endl;
+    rng.seed(seed);
+    for (int i = 0; i < world_rank; i++)
+        dist(rng);
+    int tier_seed = dist(rng);
+
+    if (world_size != num_tiers+1)
+        FAIL() << "MPI world size too small for graph with " << num_nodes << " vertices. Correct world size is: " << num_tiers+1;
+
+    if (world_rank == 0) {
+        int seed = time(NULL);
+        srand(seed);
+        std::cout << "InputNode seed: " << seed << std::endl;
+        // initialize data structures
+        // InputNode input_node(num_nodes, num_tiers, update_batch_size, seed);
+        // SCCWN cluster_forest(num_nodes);
+        HybridConnectivityManager hybrid_driver(
+            num_nodes, num_tiers, update_batch_size, seed
+        );
+        
+        GraphVerifier gv(num_nodes);
+        int edgecount = stream.edges();
+	    int count = 20000000;
+        edgecount = std::min(edgecount, count);
+        for (int i = 0; i < edgecount; i++) {
+            // Read an update from the stream and have the input node process it
+            GraphUpdate update = stream.get_edge();
+            hybrid_driver.update(update);
+            // Correctness testing by performing a cc query
+            gv.edge_update(update.edge);
+            unlikely_if(i%100000 == 0 || i == edgecount-1) {
+                std::vector<std::set<node_id_t>> cc = hybrid_driver.cc_query();
+                try {
+                    // gv.reset_cc_state();
+                    gv.verify_cc_from_component_set(cc);
+                    std::cout << "Update " << i << ", CCs correct." << std::endl;
+                } catch (IncorrectCCException& e) {
+                    std::cout << "Incorrect connected components found at update "  << i << std::endl;
+                    std::cout << "GOT: " << cc.size() << std::endl;
+                    hybrid_driver.sketching_algo.end();
+                    FAIL();
+                }
+            }
+        }
+        std::ofstream file;
+        file.open ("mpi_kron_results.txt", std::ios_base::app);
+        file << stream_file << " passed correctness test." << std::endl;
+        file.close();
+        // Communicate to all other nodes that the stream has ended
+        hybrid_driver.sketching_algo.end();
+
+    } else if (world_rank < num_tiers+1) {
+        int tier_num = world_rank-1;
+        TierNode tier_node(num_nodes, tier_num, num_tiers, update_batch_size, tier_seed);
+        tier_node.main();
+    }
+}
diff --git a/test/link_cut_tree_test.cpp b/test/link_cut_tree_test.cpp
index 312f567..7be18cc 100644
--- a/test/link_cut_tree_test.cpp
+++ b/test/link_cut_tree_test.cpp
@@ -78,34 +78,35 @@ TEST(LinkCutTreeSuite, join_split_test) {
     // power of 2 node count
     int nodecount = 1024;
     LinkCutTree lct(nodecount);
+    lct.initialize_all_nodes();
     // Join every 2,4,8,16... nodes
     for (int i = 2; i <= nodecount; i*=2) {
         for (int j = 0; j < nodecount; j+=i) {
-            lct.nodes[j].splay();
-            lct.nodes[j+i/2].splay();
+            lct.node(j).splay();
+            lct.node(j+i/2).splay();
             //std::cout << "Join nodes: " << &nodes[j] << " and " << &nodes[j+i/2] << "\n";
-            LinkCutNode* p = lct.join(&lct.nodes[j], &lct.nodes[j+i/2]);
-            EXPECT_EQ(p->get_head(), &lct.nodes[j]);
-            EXPECT_EQ(p->get_tail(), &lct.nodes[j+i-1]);
+            LinkCutNode* p = lct.join(&lct.node(j), &lct.node(j+i/2));
+            EXPECT_EQ(p->get_head(), &lct.node(j));
+            EXPECT_EQ(p->get_tail(), &lct.node(j+i-1));
         }
         // Validate all nodes
         for (int i = 0; i < nodecount; i++) {
-            validate(&lct.nodes[i]);
+            validate(&lct.node(i));
         }
     }
     // Split Every ...16,8,4,2 nodes
     for (int i = nodecount; i > 1; i/=2) {
         for (int j = 0; j < nodecount; j+=i) {
             //std::cout << "Split on node: " << &nodes[j+i/2-1] << "\n";
-            std::pair<LinkCutNode*, LinkCutNode*> paths = lct.split(&lct.nodes[j+i/2-1]);
-            EXPECT_EQ(paths.first->get_head(), &lct.nodes[j]);
-            EXPECT_EQ(paths.first->get_tail(), &lct.nodes[j+i/2-1]);
-            EXPECT_EQ(paths.second->get_head(), &lct.nodes[j+i/2]);
-            EXPECT_EQ(paths.second->get_tail(), &lct.nodes[j+i-1]);
+            std::pair<LinkCutNode*, LinkCutNode*> paths = lct.split(&lct.node(j+i/2-1));
+            EXPECT_EQ(paths.first->get_head(), &lct.node(j));
+            EXPECT_EQ(paths.first->get_tail(), &lct.node(j+i/2-1));
+            EXPECT_EQ(paths.second->get_head(), &lct.node(j+i/2));
+            EXPECT_EQ(paths.second->get_tail(), &lct.node(j+i-1));
         }
         // Validate all nodes
         for (int i = 0; i < nodecount; i++) {
-            validate(&lct.nodes[i]);
+            validate(&lct.node(i));
         }
     }
 }
@@ -114,48 +115,57 @@ TEST(LinkCutTreeSuite, expose_simple_test) {
     int pathcount = 100;
     int nodesperpath = 100;
     LinkCutTree lct(nodesperpath*pathcount);
+    lct.initialize_all_nodes();
     // Link all the nodes in each path together
     for (int path = 0; path < pathcount; path++) {
         for (int node = 0; node < nodesperpath-1; node++) {
-            lct.nodes[path*nodesperpath+node].splay();
-            lct.join(&lct.nodes[path*nodesperpath+node], &lct.nodes[path*nodesperpath+node+1]);
+            lct.node(path*nodesperpath+node).splay();
+            lct.join(&lct.node(path*nodesperpath+node), &lct.node(path*nodesperpath+node+1));
         }
     }
     // Link all the paths together with dparent pointers half way up the previous path
     for (int path = 1; path < pathcount; path++) {
-        lct.nodes[path*nodesperpath].set_dparent(&lct.nodes[path*nodesperpath-nodesperpath/2]);
+        lct.node(path*nodesperpath).set_dparent(&lct.node(path*nodesperpath-nodesperpath/2));
     }
     // Call expose on the node half way up the bottom path
-    LinkCutNode* p = lct.expose(&lct.nodes[pathcount*nodesperpath-nodesperpath/2]);
+    LinkCutNode* p = lct.expose(&lct.node(pathcount*nodesperpath-nodesperpath/2));
 
     // Validate all nodes
     for (int i = 0; i < pathcount*nodesperpath; i++) {
-        validate(&lct.nodes[i]);
+        validate(&lct.node(i));
     }
     // Validate head and tail of returned path
-    EXPECT_EQ(p->get_head(), &lct.nodes[0]);
-    EXPECT_EQ(p->get_tail(), &lct.nodes[pathcount*nodesperpath-nodesperpath/2]) << "Exposed node not tail of path";
+    EXPECT_EQ(p->get_head(), &lct.node(0));
+    EXPECT_EQ(p->get_tail(), &lct.node(pathcount*nodesperpath-nodesperpath/2)) << "Exposed node not tail of path";
     // Validate all dparent pointers
     for (int path = 0; path < pathcount; path++) {
-        EXPECT_EQ(lct.nodes[(path+1)*nodesperpath-nodesperpath/2+1].get_dparent(), &lct.nodes[(path+1)*nodesperpath-nodesperpath/2]);
+        EXPECT_EQ(lct.node((path+1)*nodesperpath-nodesperpath/2+1).get_dparent(), &lct.node((path+1)*nodesperpath-nodesperpath/2));
     }
 }
 
 TEST(LinkCutTreeSuite, random_links_and_cuts) {
+    // TODO - restore the test cases.
     int nodecount = 1000;
     LinkCutTree lct(nodecount);
+    lct.initialize_all_nodes();
     int seed = time(NULL);
     // Link all nodes
     for (int i = 0; i < nodecount-1; i++) {
         lct.link(i,i+1, rand()%100);
-        ASSERT_TRUE(std::all_of(lct.nodes.begin(), lct.nodes.end(), [](auto& node){return validate(&node);}))
-          << "One or more invalid nodes found" << std::endl;
+        // ASSERT_TRUE(std::all_of(lct.nodes.begin(), lct.nodes.end(), [](auto& node){return validate(&node);}))
+        //   << "One or more invalid nodes found" << std::endl;
+        for (int j = 0; j < nodecount; j++) {
+           ASSERT_TRUE(validate(&lct.node(j)));
+        }
     }
     // Cut every node
     for (int i = 0; i < nodecount-1; i+=1) {
         lct.cut(i,i+1);
-        ASSERT_TRUE(std::all_of(lct.nodes.begin(), lct.nodes.end(), [](auto& node){return validate(&node);}))
-          << "One or more invalid nodes found" << std::endl;
+        // ASSERT_TRUE(std::all_of(lct.nodes.begin(), lct.nodes.end(), [](auto& node){return validate(&node);}))
+        //   << "One or more invalid nodes found" << std::endl;
+        for (int j = 0; j < nodecount; j++) {
+           ASSERT_TRUE(validate(&lct.node(j)));
+        }
     }
     // Do random links and cuts
     int n = 5000;
@@ -169,21 +179,24 @@ TEST(LinkCutTreeSuite, random_links_and_cuts) {
                 //std::cout << i << ": Linking " << a << " and " << b << " weight " << weight << std::endl;
                 lct.link(a, b, weight);
                 //print_paths(&lct.nodes);
-            } else if (lct.nodes[a].edges.find(&lct.nodes[b]-&lct.nodes[0]) != lct.nodes[a].edges.end()) {
+            } else if (lct.node(a).edges.find(&lct.node(b)-&lct.node(0)) != lct.node(a).edges.end()) {
                 //std::cout << i << ": Cutting " << a << " and " << b << std::endl;
                 lct.cut(a, b);
                 //print_paths(&lct.nodes);
             }
-            ASSERT_TRUE(std::all_of(lct.nodes.begin(), lct.nodes.end(), [](auto& node){return validate(&node);}))
-             << "One or more invalid nodes found" << std::endl;
+            // ASSERT_TRUE(std::all_of(lct.nodes.begin(), lct.nodes.end(), [](auto& node){return validate(&node);}))
+            //  << "One or more invalid nodes found" << std::endl;
+            for (int j = 0; j < nodecount; j++) {
+               ASSERT_TRUE(validate(&lct.node(j)));
+            }
         }
     }
     // Manually compute the aggregates for each aux tree
     std::map<LinkCutNode*, uint32_t> path_aggregates;
     for (int i = 0; i < nodecount; i++) {
-        uint32_t nodemax = std::max(lct.nodes[i].edges[lct.nodes[i].preferred_edges.first],
-                lct.nodes[i].edges[lct.nodes[i].preferred_edges.second]);
-        LinkCutNode* curr = &lct.nodes[i];
+        uint32_t nodemax = std::max(lct.node(i).edges[lct.node(i).preferred_edges.first],
+                lct.node(i).edges[lct.node(i).preferred_edges.second]);
+        LinkCutNode* curr = &lct.node(i);
         while (curr) {
             if (curr->get_parent() == nullptr) {
                 if (path_aggregates.find(curr) != path_aggregates.end()) {
diff --git a/test/mpi_graph_tiers_test.cpp b/test/mpi_graph_tiers_test.cpp
index 3c05fd1..95fb799 100644
--- a/test/mpi_graph_tiers_test.cpp
+++ b/test/mpi_graph_tiers_test.cpp
@@ -5,17 +5,18 @@
 #include <random>
 #include <iostream>
 #include <fstream>
-#include <omp.h>
+// #include <omp.h>
 #include "mpi_nodes.h"
 #include "binary_graph_stream.h"
-#include "mat_graph_verifier.h"
+// #include "mat_graph_verifier.h"
+#include "graph_verifier.h"
 #include "util.h"
 
 
 const int DEFAULT_BATCH_SIZE = 100;
 const vec_t DEFAULT_SKETCH_ERR = 1;
 
-TEST(GraphTierSuite, mpi_update_speed_test) {
+TEST(GraphTierSuite, mpi_mixed_speed_test) {
     int world_rank_buf;
     MPI_Comm_rank(MPI_COMM_WORLD, &world_rank_buf);
     uint32_t world_rank = world_rank_buf;
@@ -25,8 +26,109 @@ TEST(GraphTierSuite, mpi_update_speed_test) {
 
     BinaryGraphStream stream(stream_file, 100000);
     uint32_t num_nodes = stream.nodes();
-    uint32_t num_tiers = log2(num_nodes)/(log2(3)-1);
+    // uint32_t num_tiers = log2(num_nodes)/(log2(3)-1);
+    uint32_t num_tiers = world_size - 1;
+    std::cout << "NUM TIERS: " << num_tiers << std::endl;
+
+    // Parameters
+    int update_batch_size = (batch_size_arg==0) ? DEFAULT_BATCH_SIZE : batch_size_arg;
+    height_factor = (height_factor_arg==0) ? 1./log2(log2(num_nodes)) : height_factor_arg;
+    sketchless_height_factor = height_factor;
+    sketch_len = Sketch::calc_vector_length(num_nodes);
+	sketch_err = DEFAULT_SKETCH_ERR;
 
+    std::cout << "BATCH SIZE: " << update_batch_size << " HEIGHT FACTOR " << height_factor << " SKETCH BUFFER: " << SKETCH_BUFFER_SIZE << std::endl;
+
+    // Seeds
+    std::random_device dev;
+    std::mt19937 rng(dev());
+    std::uniform_int_distribution<std::mt19937::result_type> dist(0,MAX_INT);
+    int seed = dist(rng);
+    bcast(&seed, sizeof(int), 0);
+    std::cout << "SEED: " << seed << std::endl;
+    rng.seed(seed);
+    for (int i = 0; i < world_rank; i++)
+        dist(rng);
+    int tier_seed = dist(rng);
+
+    if (world_size != num_tiers+1)
+        FAIL() << "MPI world size too small for graph with " << num_nodes << " vertices. Correct world size is: " << num_tiers+1;
+
+    if (world_rank == 0) {
+        int seed = time(NULL);
+        srand(seed);
+        std::cout << "InputNode seed: " << seed << std::endl;
+        InputNode input_node(num_nodes, num_tiers, update_batch_size, seed);
+        input_node.initialize_all_nodes();
+        long edgecount = stream.edges();
+        // long count = 100000000;
+        // edgecount = std::min(edgecount, count);
+        long total_update_time = 0;
+        long total_query_time = 0;
+        auto update_timer = std::chrono::high_resolution_clock::now();
+        auto query_timer = update_timer;
+        bool doing_updates = true;
+        for (long i = 0; i < edgecount; i++) {
+            // Read an update from the stream and have the input node process it
+            GraphUpdate operation = stream.get_edge();
+            if (operation.type == 2) { // 2 is the symbol for queries
+                unlikely_if (doing_updates) {
+                    total_update_time += std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - update_timer).count();
+                    doing_updates = false;
+                    query_timer = std::chrono::high_resolution_clock::now();
+                }
+                input_node.connectivity_query(operation.edge.src, operation.edge.dst);
+            } else {
+                unlikely_if (!doing_updates) {
+                    total_query_time += std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - query_timer).count();
+                    doing_updates = true;
+                    update_timer = std::chrono::high_resolution_clock::now();
+                }
+                input_node.update(operation);
+            }
+            unlikely_if(i%1000000 == 0 || i == edgecount-1) {
+                std::cout << "FINISHED OPERATION " << i << " OUT OF " << edgecount << " IN " << stream_file << std::endl;
+            }
+        }
+        if (doing_updates) {
+            total_update_time += std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - update_timer).count();
+        } else {
+            total_query_time += std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - query_timer).count();
+        }
+        // Communicate to all other nodes that the stream has ended
+        input_node.end();
+        std::cout << "Total update time(ms):   " << (total_update_time/1000) << std::endl;
+        std::cout << "Total query time(ms):    " << (total_query_time/1000) << std::endl;
+        std::cout << "Total time(ms):    " << (total_query_time + total_update_time)/1000 << std::endl;
+
+        std::ofstream file;
+        std::string out_file = "./../results/mpi_speed_results/" + stream_file.substr(stream_file.find("/") + 1) + ".txt";
+        std::cout << "WRITING RESULTS TO " << out_file << std::endl;
+        file.open (out_file, std::ios_base::app);
+        file << " UPDATES/SECOND: " << (0.9*edgecount)/(total_update_time) << std::endl;
+        file << " QUERIES/SECOND: " << (0.1*edgecount)/(total_query_time) << std::endl;
+        file.close();
+
+    } else if (world_rank < num_tiers+1) {
+        int tier_num = world_rank-1;
+        TierNode tier_node(num_nodes, tier_num, num_tiers, update_batch_size, tier_seed);
+        tier_node.main();
+    }
+}
+
+TEST(GraphTierSuite, mpi_update_speed_test) {
+    int world_rank_buf;
+    MPI_Comm_rank(MPI_COMM_WORLD, &world_rank_buf);
+    uint32_t world_rank = world_rank_buf;
+    int world_size_buf;
+    MPI_Comm_size(MPI_COMM_WORLD, &world_size_buf);
+    uint32_t world_size = world_size_buf;
+
+    BinaryGraphStream stream(stream_file, 100000);
+    uint32_t num_nodes = stream.nodes();
+    // uint32_t num_tiers = log2(num_nodes)/(log2(3)-1);
+    uint32_t num_tiers = world_size - 1;
+    std::cout << "NUM TIERS: " << num_tiers << std::endl;
     // Parameters
     int update_batch_size = (batch_size_arg==0) ? DEFAULT_BATCH_SIZE : batch_size_arg;
     height_factor = (height_factor_arg==0) ? 1./log2(log2(num_nodes)) : height_factor_arg;
@@ -56,6 +158,7 @@ TEST(GraphTierSuite, mpi_update_speed_test) {
         srand(seed);
         std::cout << "InputNode seed: " << seed << std::endl;
         InputNode input_node(num_nodes, num_tiers, update_batch_size, seed);
+        input_node.initialize_all_nodes();
         long edgecount = stream.edges();
         // long count = 100000000;
         // edgecount = std::min(edgecount, count);
@@ -127,6 +230,7 @@ TEST(GraphTiersSuite, mpi_query_speed_test) {
         srand(seed);
         std::cout << "InputNode seed: " << seed << std::endl;
         InputNode input_node(num_nodes, num_tiers, update_batch_size, seed);
+        input_node.initialize_all_nodes();
 
         long total_time = 0;
         for (int batch = 0; batch < 10; batch++) {
@@ -198,15 +302,16 @@ TEST(GraphTiersSuite, mpi_mini_correctness_test) {
         srand(seed);
         std::cout << "InputNode seed: " << seed << std::endl;
         InputNode input_node(num_nodes, num_tiers, update_batch_size, seed);
-        MatGraphVerifier gv(num_nodes);
+        input_node.initialize_all_nodes();
+        GraphVerifier gv(num_nodes);
         // Link all of the nodes into 1 connected component
         for (node_id_t i = 0; i < num_nodes-1; i++) {
             input_node.update({{i, i+1}, INSERT});
-            gv.edge_update(i,i+1);
+            gv.edge_update({i,i+1});
             std::vector<std::set<node_id_t>> cc = input_node.cc_query();
             try {
-                gv.reset_cc_state();
-                gv.verify_soln(cc);
+                // gv.reset_cc_state();
+                gv.verify_cc_from_component_set(cc);
             } catch (IncorrectCCException& e) {
                 std::cout << "Incorrect cc found after linking nodes " << i << " and " << i+1 << std::endl;
                 std::cout << "GOT: " << cc.size() << " components, EXPECTED: " << num_nodes-i-1 << " components" << std::endl;
@@ -216,11 +321,11 @@ TEST(GraphTiersSuite, mpi_mini_correctness_test) {
         // One by one cut all of the nodes into singletons
         for (node_id_t i = 0; i < num_nodes-1; i++) {
             input_node.update({{i, i+1}, DELETE});
-            gv.edge_update(i,i+1);
+            gv.edge_update({i,i+1});
             std::vector<std::set<node_id_t>> cc = input_node.cc_query();
             try {
-                gv.reset_cc_state();
-                gv.verify_soln(cc);
+                // gv.reset_cc_state();
+                gv.verify_cc_from_component_set(cc);
             } catch (IncorrectCCException& e) {
                 std::cout << "Incorrect cc found after cutting nodes " << i << " and " << i+1 << std::endl;
                 std::cout << "GOT: " << cc.size() << " components, EXPECTED: " << i+2 << " components" << std::endl;
@@ -271,15 +376,16 @@ TEST(GraphTiersSuite, mpi_mini_replacement_test) {
         srand(seed);
         std::cout << "InputNode seed: " << seed << std::endl;
         InputNode input_node(num_nodes, num_tiers, update_batch_size, seed);
-        MatGraphVerifier gv(num_nodes);
+        input_node.initialize_all_nodes();
+        GraphVerifier gv(num_nodes);
         // Link all of the nodes into 1 connected component
         for (node_id_t i = 0; i < num_nodes-1; i++) {
             input_node.update({{i, i+1}, INSERT});
-            gv.edge_update(i,i+1);
+            gv.edge_update({i,i+1});
             std::vector<std::set<node_id_t>> cc = input_node.cc_query();
             try {
-                gv.reset_cc_state();
-                gv.verify_soln(cc);
+                // gv.reset_cc_state();
+                gv.verify_cc_from_component_set(cc);
             } catch (IncorrectCCException& e) {
                 std::cout << "Incorrect cc found after linking nodes " << i << " and " << i+1 << std::endl;
                 std::cout << "GOT: " << cc.size() << " components, EXPECTED: " << num_nodes-i-1 << " components" << std::endl;
@@ -292,17 +398,17 @@ TEST(GraphTiersSuite, mpi_mini_replacement_test) {
         while(first == second || second == first+1 || first == second+1)
             second = rand() % num_nodes;
         input_node.update({{first, second}, INSERT});
-        gv.edge_update(first, second);
+        gv.edge_update({first, second});
         node_id_t distance = std::max(first, second) - std::min(first, second);
         // Cut a random edge that should be replaced by the bridge
         first = std::min(first, second) + rand() % (distance-1);
         input_node.update({{first, first+1}, DELETE});
-        gv.edge_update(first, first+1);
+        gv.edge_update({first, first+1});
         // Check the coonected components
         std::vector<std::set<node_id_t>> cc = input_node.cc_query();
         try {
-            gv.reset_cc_state();
-            gv.verify_soln(cc);
+            // gv.reset_cc_state();
+            gv.verify_cc_from_component_set(cc);
         } catch (IncorrectCCException& e) {
             std::cout << "Incorrect cc found after cutting nodes " << first << " and " << first+1 << std::endl;
             std::cout << "GOT: " << cc.size() << " components, EXPECTED: 1 components" << std::endl;
@@ -352,15 +458,16 @@ TEST(GraphTiersSuite, mpi_mini_batch_test) {
         srand(seed);
         std::cout << "InputNode seed: " << seed << std::endl;
         InputNode input_node(num_nodes, num_tiers, update_batch_size, seed);
-        MatGraphVerifier gv(num_nodes);
+        input_node.initialize_all_nodes();
+        GraphVerifier gv(num_nodes);
         // Link all of the nodes into 1 connected component
         for (node_id_t i = 0; i < num_nodes-1; i++) {
             input_node.update({{i, i+1}, INSERT});
-            gv.edge_update(i,i+1);
+            gv.edge_update({i,i+1});
             std::vector<std::set<node_id_t>> cc = input_node.cc_query();
             try {
-                gv.reset_cc_state();
-                gv.verify_soln(cc);
+                // gv.reset_cc_state();
+                gv.verify_cc_from_component_set(cc);
             } catch (IncorrectCCException& e) {
                 std::cout << "Incorrect cc found after linking nodes " << i << " and " << i+1 << std::endl;
                 std::cout << "GOT: " << cc.size() << " components, EXPECTED: " << num_nodes-i-1 << " components" << std::endl;
@@ -371,13 +478,13 @@ TEST(GraphTiersSuite, mpi_mini_batch_test) {
         input_node.process_all_updates();
         for (node_id_t i=0; i<(node_id_t)update_batch_size; i++) {
             input_node.update({{i, i+2}, INSERT});
-            gv.edge_update(i,i+2);
+            gv.edge_update({i,i+2});
         }
         // Check the coonected components
         std::vector<std::set<node_id_t>> cc = input_node.cc_query();
         try {
-            gv.reset_cc_state();
-            gv.verify_soln(cc);
+            // gv.reset_cc_state();
+            gv.verify_cc_from_component_set(cc);
         } catch (IncorrectCCException& e) {
             std::cout << "Incorrect cc found after batch with no isolations" << std::endl;
             std::cout << "GOT: " << cc.size() << " components, EXPECTED: 1 components" << std::endl;
@@ -385,55 +492,55 @@ TEST(GraphTiersSuite, mpi_mini_batch_test) {
         }
         for (node_id_t i=0; i<(node_id_t)update_batch_size; i++) {
             input_node.update({{i, i+2}, DELETE});
-            gv.edge_update(i,i+2);
+            gv.edge_update({i,i+2});
         }
         input_node.process_all_updates();
         // Add a batch that has one isolated deletion in the middle
         for (node_id_t i=0; i<(node_id_t)update_batch_size/2-2; i++) {
             input_node.update({{i, i+2}, INSERT});
-            gv.edge_update(i,i+2);
+            gv.edge_update({i,i+2});
         }
         input_node.update({{(node_id_t)update_batch_size/2, (node_id_t)update_batch_size/2+1}, DELETE});
-        gv.edge_update(update_batch_size/2, update_batch_size/2+1);
+        gv.edge_update({(node_id_t)update_batch_size/2, (node_id_t)update_batch_size/2+1});
         for (node_id_t i=(node_id_t)update_batch_size/2+1; i<(node_id_t)update_batch_size+2; i++) {
             input_node.update({{i, i+3}, INSERT});
-            gv.edge_update(i,i+3);
+            gv.edge_update({i,i+3});
         }
         // Check the coonected components
         cc = input_node.cc_query();
         try {
-            gv.reset_cc_state();
-            gv.verify_soln(cc);
+            // gv.reset_cc_state();
+            gv.verify_cc_from_component_set(cc);
         } catch (IncorrectCCException& e) {
             std::cout << "Incorrect cc found after batch with one isolated deletion" << std::endl;
             std::cout << "GOT: " << cc.size() << " components, EXPECTED: 1 components" << std::endl;
             FAIL();
         }
         input_node.update({{(node_id_t)update_batch_size/2, (node_id_t)update_batch_size/2+1}, INSERT});
-        gv.edge_update(update_batch_size/2, update_batch_size/2+1);
+        gv.edge_update({(node_id_t)update_batch_size/2, (node_id_t)update_batch_size/2+1});
         input_node.process_all_updates();
         // Add a batch with multiple forest edge deletions
         for (node_id_t i=0; i<(node_id_t)update_batch_size/2-2; i++) {
             input_node.update({{i, i+3}, INSERT});
-            gv.edge_update(i,i+3);
+            gv.edge_update({i,i+3});
         }
         input_node.update({{2*(node_id_t)update_batch_size, 2*(node_id_t)update_batch_size+2}, INSERT}); // Add a replacement edge
-        gv.edge_update(2*update_batch_size, 2*update_batch_size+2);
+        gv.edge_update({2*(node_id_t)update_batch_size, 2*(node_id_t)update_batch_size+2});
         input_node.update({{2*(node_id_t)update_batch_size+2, 2*(node_id_t)update_batch_size+3}, DELETE}); // First isolation
-        gv.edge_update(2*update_batch_size+2, 2*update_batch_size+3);
+        gv.edge_update({2*(node_id_t)update_batch_size+2, 2*(node_id_t)update_batch_size+3});
         input_node.update({{2*(node_id_t)update_batch_size+4, 2*(node_id_t)update_batch_size+5}, DELETE}); // Non-replacing delete
-        gv.edge_update(2*update_batch_size+4, 2*update_batch_size+5);
+        gv.edge_update({2*(node_id_t)update_batch_size+4, 2*(node_id_t)update_batch_size+5});
         input_node.update({{2*(node_id_t)update_batch_size, 2*(node_id_t)update_batch_size+1}, DELETE}); // Replacement delete
-        gv.edge_update(2*update_batch_size, 2*update_batch_size+1);
+        gv.edge_update({2*(node_id_t)update_batch_size, 2*(node_id_t)update_batch_size+1});
         for (node_id_t i=(node_id_t)update_batch_size/2+1; i<(node_id_t)update_batch_size; i++) {
             input_node.update({{i, i+3}, INSERT});
-            gv.edge_update(i,i+3);
+            gv.edge_update({i,i+3});
         }
         // Check the coonected components
         cc = input_node.cc_query();
         try {
-            gv.reset_cc_state();
-            gv.verify_soln(cc);
+            // gv.reset_cc_state();
+            gv.verify_cc_from_component_set(cc);
         } catch (IncorrectCCException& e) {
             std::cout << "Incorrect cc found after batch with one isolated deletion" << std::endl;
             std::cout << "GOT: " << cc.size() << " components, EXPECTED: 1 components" << std::endl;
@@ -485,7 +592,7 @@ TEST(GraphTiersSuite, mpi_correctness_test) {
         srand(seed);
         std::cout << "InputNode seed: " << seed << std::endl;
         InputNode input_node(num_nodes, num_tiers, update_batch_size, seed);
-        MatGraphVerifier gv(num_nodes);
+        GraphVerifier gv(num_nodes);
         int edgecount = stream.edges();
 	    int count = 20000000;
         edgecount = std::min(edgecount, count);
@@ -494,12 +601,12 @@ TEST(GraphTiersSuite, mpi_correctness_test) {
             GraphUpdate update = stream.get_edge();
             input_node.update(update);
             // Correctness testing by performing a cc query
-            gv.edge_update(update.edge.src, update.edge.dst);
+            gv.edge_update(update.edge);
             unlikely_if(i%1000 == 0 || i == edgecount-1) {
                 std::vector<std::set<node_id_t>> cc = input_node.cc_query();
                 try {
-                    gv.reset_cc_state();
-                    gv.verify_soln(cc);
+                    // gv.reset_cc_state();
+                    gv.verify_cc_from_component_set(cc);
                     std::cout << "Update " << i << ", CCs correct." << std::endl;
                 } catch (IncorrectCCException& e) {
                     std::cout << "Incorrect connected components found at update "  << i << std::endl;
diff --git a/test/skiplist_test.cpp b/test/skiplist_test.cpp
index 3b74b6a..1f9344a 100644
--- a/test/skiplist_test.cpp
+++ b/test/skiplist_test.cpp
@@ -2,8 +2,10 @@
 #include <gtest/gtest.h>
 #include "skiplist.h"
 #include "euler_tour_tree.h"
+#include "sketch_interfacing.h"
 
-bool SkipListNode::isvalid() {
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+bool SkipListNode<SketchClass>::isvalid() {
 	bool valid = true;
 	if (this->up && this->up->down != this) valid = false;
 	if (this->down && this->down->up != this) valid = false;
@@ -14,7 +16,8 @@ bool SkipListNode::isvalid() {
 	return valid;
 }
 
-int SkipListNode::print_list() {
+template <typename SketchClass> requires(SketchColumnConcept<SketchClass, vec_t>)
+int SkipListNode<SketchClass>::print_list() {
     SkipListNode* curr = this->get_first();
     while (curr) {
         if (curr->node) std::cout << curr->node->vertex << ":\t";
@@ -31,15 +34,17 @@ int SkipListNode::print_list() {
     return 0;
 }
 
-bool aggregate_correct(SkipListNode* node) {
-    Sketch* naive_agg = new Sketch(sketch_len, node->node->get_seed(), 1, sketch_err);
-    std::set<EulerTourNode*> component = node->get_component();
+bool aggregate_correct(SkipListNode<DefaultSketchColumn>* node) {
+    // Sketch* naive_agg = new Sketch(sketch_len, node->node->get_seed(), 1, sketch_err);
+    DefaultSketchColumn *naive_agg = new DefaultSketchColumn(
+        DefaultSketchColumn::suggest_capacity(sketch_len), node->node->get_seed());
+    std::set<EulerTourNode<DefaultSketchColumn>*> component = node->get_component();
     for (auto ett_node : component) {
         naive_agg->update(ett_node->vertex);
     }
     node->get_root()->process_updates();
-    Sketch* list_agg = node->get_list_aggregate();
-    return *naive_agg == *list_agg;
+    const DefaultSketchColumn &list_agg = node->get_list_aggregate();
+    return *naive_agg == list_agg;
 }
 
 TEST(SkipListSuite, join_split_test) {
@@ -50,33 +55,34 @@ TEST(SkipListSuite, join_split_test) {
 
     long seed = time(NULL);
     srand(seed);
-    EulerTourTree ett(num_elements, 0, seed);
-    SkipListNode* nodes[num_elements];
+    EulerTourTree<DefaultSketchColumn> ett(num_elements, 0, seed);
+    ett.initialize_all_nodes();
+    SkipListNode<DefaultSketchColumn>* nodes[num_elements];
 
     // Construct all of the ett_nodes and singleton SkipList nodes
     for (int i = 0; i < num_elements; i++) {
         ett.update_sketch(i, (vec_t)i);
-        nodes[i] = ett.ett_nodes[i].allowed_caller;
+        nodes[i] = ett.ett_node(i).allowed_caller;
     }
 
     // Link all the nodes two at a time, then link them all
-    for (int i = 0; i < num_elements; i+=2) SkipListNode::join(nodes[i], nodes[i+1]);
+    for (int i = 0; i < num_elements; i+=2) SkipListNode<DefaultSketchColumn>::join(nodes[i], nodes[i+1]);
     for (int i = 0; i < num_elements; i++) {
         ASSERT_TRUE(nodes[i]->isvalid());
         ASSERT_TRUE(aggregate_correct(nodes[i])) << "Node " << i << " agg incorrect";
     }
-    for (int i = 0; i < num_elements-2; i+=2) SkipListNode::join(nodes[i], nodes[i+2]);
+    for (int i = 0; i < num_elements-2; i+=2) SkipListNode<DefaultSketchColumn>::join(nodes[i], nodes[i+2]);
     for (int i = 0; i < num_elements; i++) {
         ASSERT_TRUE(nodes[i]->isvalid());
         ASSERT_TRUE(aggregate_correct(nodes[i])) << "Node " << i << " agg incorrect";
     }
     // Split all nodes into pairs, then split each pair
-    for (int i = 0; i < num_elements-2; i+=2) SkipListNode::split_left(nodes[i+2]);
+    for (int i = 0; i < num_elements-2; i+=2) SkipListNode<DefaultSketchColumn>::split_left(nodes[i+2]);
     for (int i = 0; i < num_elements; i++) {
         ASSERT_TRUE(nodes[i]->isvalid());
         ASSERT_TRUE(aggregate_correct(nodes[i])) << "Node " << i << " agg incorrect";
     }
-    for (int i = 0; i < num_elements; i+=2) SkipListNode::split_left(nodes[i+1]);
+    for (int i = 0; i < num_elements; i+=2) SkipListNode<DefaultSketchColumn>::split_left(nodes[i+1]);
     for (int i = 0; i < num_elements; i++) {
         ASSERT_TRUE(nodes[i]->isvalid());
         ASSERT_TRUE(aggregate_correct(nodes[i])) << "Node " << i << " agg incorrect";