Skip to content

Commit 43f202c

Browse files
committed
more progress, still not complete prototype
1 parent 4647b2e commit 43f202c

File tree

11 files changed

+330
-104
lines changed

11 files changed

+330
-104
lines changed

CMakeLists.txt

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ project(GraphZeppelin)
33

44
include (FetchContent)
55

6-
set(CMAKE_CXX_STANDARD 14)
6+
set(CMAKE_CXX_STANDARD 17)
77
set(CMAKE_CXX_STANDARD_REQUIRED ON)
88
set(CMAKE_CXX_EXTENSIONS ON)
99
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
@@ -59,6 +59,22 @@ FetchContent_Declare(
5959
GIT_TAG main
6060
)
6161

62+
# Get VieCut
63+
FetchContent_Declare(
64+
VieCut
65+
66+
GIT_REPOSITORY https://github.com/etwest/VieCut.git
67+
GIT_TAG master
68+
)
69+
70+
# Get tlx
71+
FetchContent_Declare(
72+
tlx
73+
74+
GIT_REPOSITORY https://github.com/tlx/tlx.git
75+
GIT_TAG master
76+
)
77+
6278
if (BUILD_BENCH)
6379
# Get Google Benchmark
6480
FetchContent_Declare(
@@ -72,7 +88,7 @@ if (BUILD_BENCH)
7288
FetchContent_MakeAvailable(benchmark)
7389
endif()
7490

75-
FetchContent_MakeAvailable(GutterTree StreamingUtilities)
91+
FetchContent_MakeAvailable(GutterTree StreamingUtilities VieCut tlx)
7692

7793
# AVAILABLE COMPILATION DEFINITIONS:
7894
# VERIFY_SAMPLES_F Use a deterministic connected-components
@@ -95,8 +111,8 @@ add_library(GraphZeppelin
95111
src/cc_alg_configuration.cpp
96112
src/sketch.cpp
97113
src/util.cpp)
98-
add_dependencies(GraphZeppelin GutterTree StreamingUtilities)
99-
target_link_libraries(GraphZeppelin PUBLIC xxhash GutterTree StreamingUtilities)
114+
add_dependencies(GraphZeppelin GutterTree StreamingUtilities VieCut tlx)
115+
target_link_libraries(GraphZeppelin PUBLIC xxhash GutterTree StreamingUtilities VieCut tlx)
100116
target_include_directories(GraphZeppelin PUBLIC include/)
101117
target_compile_options(GraphZeppelin PUBLIC -fopenmp)
102118
target_link_options(GraphZeppelin PUBLIC -fopenmp)
@@ -112,8 +128,8 @@ add_library(GraphZeppelinVerifyCC
112128
src/sketch.cpp
113129
src/util.cpp
114130
test/util/graph_verifier.cpp)
115-
add_dependencies(GraphZeppelinVerifyCC GutterTree StreamingUtilities)
116-
target_link_libraries(GraphZeppelinVerifyCC PUBLIC xxhash GutterTree StreamingUtilities)
131+
add_dependencies(GraphZeppelinVerifyCC GutterTree StreamingUtilities VieCut tlx)
132+
target_link_libraries(GraphZeppelinVerifyCC PUBLIC xxhash GutterTree StreamingUtilities VieCut tlx)
117133
target_include_directories(GraphZeppelinVerifyCC PUBLIC include/ include/test/)
118134
target_compile_options(GraphZeppelinVerifyCC PUBLIC -fopenmp)
119135
target_link_options(GraphZeppelinVerifyCC PUBLIC -fopenmp)

include/cc_sketch_alg.h

Lines changed: 25 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ class CCSketchAlg {
8383
// for accessing if the DSU is valid from threads that do not perform updates
8484
std::atomic<bool> shared_dsu_valid;
8585

86+
// Most recent spanning forest computed by algorithm and associated locks
8687
std::unordered_set<node_id_t> *spanning_forest;
8788
std::mutex *spanning_forest_mtx;
8889

@@ -104,8 +105,8 @@ class CCSketchAlg {
104105
/**
105106
* Sample a single supernode represented by a single sketch containing one or more vertices.
106107
* Updates the dsu and spanning forest with query results if edge contains new connectivity info.
107-
* @param skt sketch to sample
108-
* @return [bool] true if the query result indicates we should run an additional round.
108+
* param: skt sketch to sample
109+
* return: [bool] true if the query result indicates we should run an additional round.
109110
*/
110111
bool sample_supernode(Sketch &skt);
111112

@@ -115,8 +116,8 @@ class CCSketchAlg {
115116
void create_merge_instructions(std::vector<MergeInstr> &merge_instr);
116117

117118
/**
118-
* @param reps set containing the roots of each supernode
119-
* @param merge_instr a list of lists of supernodes to be merged
119+
* param: reps set containing the roots of each supernode
120+
* param: merge_instr a list of lists of supernodes to be merged
120121
*/
121122
bool perform_boruvka_round(const size_t cur_round, const std::vector<MergeInstr> &merge_instr,
122123
std::vector<GlobalMergeData> &global_merges);
@@ -127,6 +128,8 @@ class CCSketchAlg {
127128
*/
128129
void boruvka_emulation();
129130

131+
void filter_sf_edges(SpanningForest &sf);
132+
130133
// constructor for use when reading from a serialized file
131134
CCSketchAlg(node_id_t num_vertices, size_t seed, std::ifstream &binary_stream,
132135
CCAlgConfiguration config);
@@ -169,9 +172,9 @@ class CCSketchAlg {
169172

170173
/**
171174
* Update all the sketches for a node, given a batch of updates.
172-
* @param thr_id The id of the thread performing the update [0, num_threads)
173-
* @param src_vertex The vertex where the edges originate.
174-
* @param dst_vertices A vector of destinations.
175+
* param: thr_id The id of the thread performing the update [0, num_threads)
176+
* param: src_vertex The vertex where the edges originate.
177+
* param: dst_vertices A vector of destinations.
175178
*/
176179
void apply_update_batch(int thr_id, node_id_t src_vertex,
177180
const std::vector<node_id_t> &dst_vertices);
@@ -198,8 +201,8 @@ class CCSketchAlg {
198201
/**
199202
* Apply a batch of updates that have already been processed into a sketch delta.
200203
* Specifically, the delta is in the form of a pointer to raw bucket data.
201-
* @param src_vertex The vertex where the all edges originate.
202-
* @param raw_buckets Pointer to the array of buckets from the delta sketch
204+
* param: src_vertex The vertex where the all edges originate.
205+
* param: raw_buckets Pointer to the array of buckets from the delta sketch
203206
*/
204207
void apply_raw_buckets_update(node_id_t src_vertex, Bucket *raw_buckets);
205208

@@ -213,26 +216,34 @@ class CCSketchAlg {
213216

214217
/**
215218
* Main parallel query algorithm utilizing Boruvka and L_0 sampling.
216-
* @return the connected components in the graph.
219+
* return: the connected components in the graph.
217220
*/
218221
ConnectedComponents connected_components();
219222

220223
/**
221224
* Point query algorithm utilizing Boruvka and L_0 sampling.
222225
* Allows for additional updates when done.
223-
* @param a, b
224-
* @return true if a and b are in the same connected component, false otherwise.
226+
* param: a, b vertices of the graph. Check if these are connected.
227+
* return: true if a and b are in the same connected component, false otherwise.
225228
*/
226229
bool point_query(node_id_t a, node_id_t b);
227230

228231
/**
229232
* Return a spanning forest of the graph utilizing Boruvka and L_0 sampling
230233
* IMPORTANT: The updates to this algorithm MUST NOT be a function of the output of this query
231234
* that is, unless you really know what you're doing.
232-
* @return the spanning forest of the graph
235+
* return: the spanning forest of the graph
233236
*/
234237
SpanningForest calc_spanning_forest();
235238

239+
/**
240+
* Return k edge-disjoint spanning forests of the graph.
241+
* IMPORTANT: The updates to this algorithm MUST NOT be a function of the output of this query
242+
* that is, unless you really know what you're doing.
243+
* return: k edge-disjoint spanning forests of the graph.
244+
*/
245+
std::vector<SpanningForest> calc_disjoint_spanning_forests(size_t k);
246+
236247
#ifdef VERIFY_SAMPLES_F
237248
void set_verifier(std::unique_ptr<GraphVerifier> verifier) {
238249
this->verifier = std::move(verifier);
@@ -241,7 +252,7 @@ class CCSketchAlg {
241252

242253
/**
243254
* Serialize the graph data to a binary file.
244-
* @param filename the name of the file to (over)write data to.
255+
* param: filename the name of the file to (over)write data to.
245256
*/
246257
void write_binary(const std::string &filename);
247258

include/edge_store.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ class EdgeStore {
5959

6060
// this function is called when there are some sketch subgraphs.
6161
TaggedUpdateBatch insert_adj_edges(node_id_t src, node_id_t caller_first_es_subgraph,
62-
SubgraphTaggedUpdate* dst_data, size_t dst_data_size);
62+
std::vector<SubgraphTaggedUpdate> &dst_data);
6363

6464
// contract vertex data by removing all updates bound for lower subgraphs than the store
6565
// is responsible for

include/mc_configuration.h

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
#pragma once
2+
#include <iostream>
3+
4+
// Configuration options for the minimum cut sketch algorithm
5+
class MCAlgConfiguration {
6+
private:
7+
// How large to make update batches as factor of sketch size
8+
double _batch_factor = 1;
9+
10+
// Returned min-cut guaranteed to be a +/- epsilon multiplicative approx of the true min cut.
11+
double _epsilon = 0.5;
12+
13+
// Number of subgraphs for which we use a delta sketch
14+
// When applying sketch updates to other subgraphs, apply updates directly to sketch
15+
size_t _num_subgraphs_use_delta = 2;
16+
17+
friend class MinCutSketchAlg;
18+
public:
19+
// setters
20+
MCAlgConfiguration& batch_factor(double batch_factor) {
21+
if (batch_factor <= 0) {
22+
std::cerr << "WARNING: Batch factor in MCAlgConfiguration must be > 0." << std::endl;
23+
std::cerr << " Setting to default value: " << _batch_factor << std::endl;
24+
} else {
25+
_batch_factor = batch_factor;
26+
}
27+
return *this;
28+
}
29+
MCAlgConfiguration& epsilon(double epsilon) {
30+
if (epsilon <= 0 || epsilon > 1) {
31+
std::cerr << "WARNING: MCAlgConfiguration epsilon must be in range (0, 1]." << std::endl;
32+
std::cerr << " Setting to default value: " << _epsilon << std::endl;
33+
} else {
34+
_epsilon = epsilon;
35+
}
36+
return *this;
37+
}
38+
MCAlgConfiguration& num_subgraphs_use_delta(size_t num_subgraphs) {
39+
_num_subgraphs_use_delta = num_subgraphs;
40+
return *this;
41+
}
42+
43+
// getters
44+
double get_batch_factor() { return _batch_factor; }
45+
double get_epsilon() { return _epsilon; }
46+
size_t get_num_subgraphs_use_delta() { return _num_subgraphs_use_delta; }
47+
48+
friend std::ostream& operator<< (std::ostream &out, const MCAlgConfiguration &conf) {
49+
out << "Minimum Cut Algorithm Configuration:" << std::endl;
50+
out << " batch_factor = " << conf._batch_factor << std::endl;
51+
return out;
52+
}
53+
};

include/min_cut_sketch_alg.h

Lines changed: 10 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -5,62 +5,16 @@
55

66
#include "cc_sketch_alg.h"
77
#include "edge_store.h"
8-
9-
10-
// Configuration options for the minimum cut sketch algorithm
11-
class MCAlgConfiguration {
12-
private:
13-
// How large to make update batches as factor of sketch size
14-
double _batch_factor = 1;
15-
16-
// Returned min-cut guaranteed to be a +/- epsilon multiplicative approx of the true min cut.
17-
double _epsilon = 0.5;
18-
19-
// Number of subgraphs for which we use a delta sketch
20-
// When applying sketch updates to other subgraphs, apply updates directly to sketch
21-
size_t _num_subgraphs_use_delta = 2;
22-
23-
friend class MinCutSketchAlg;
24-
public:
25-
// setters
26-
MCAlgConfiguration& batch_factor(double batch_factor) {
27-
if (batch_factor <= 0) {
28-
std::cerr << "WARNING: Batch factor in MCAlgConfiguration must be > 0." << std::endl;
29-
std::cerr << " Setting to default value: " << _batch_factor << std::endl;
30-
} else {
31-
_batch_factor = batch_factor;
32-
}
33-
return *this;
34-
}
35-
MCAlgConfiguration& epsilon(double epsilon) {
36-
if (epsilon <= 0 || epsilon > 1) {
37-
std::cerr << "WARNING: MCAlgConfiguration epsilon must be in range (0, 1]." << std::endl;
38-
std::cerr << " Setting to default value: " << _epsilon << std::endl;
39-
} else {
40-
_epsilon = epsilon;
41-
}
42-
return *this;
43-
}
44-
MCAlgConfiguration& num_subgraphs_use_delta(size_t num_subgraphs) {
45-
_num_subgraphs_use_delta = num_subgraphs;
46-
return *this;
47-
}
48-
49-
// getters
50-
double get_batch_factor() { return _batch_factor; }
51-
double get_epsilon() { return _epsilon; }
52-
size_t get_num_subgraphs_use_delta() { return _num_subgraphs_use_delta; }
53-
54-
friend std::ostream& operator<< (std::ostream &out, const MCAlgConfiguration &conf) {
55-
out << "Minimum Cut Algorithm Configuration:" << std::endl;
56-
out << " batch_factor = " << conf._batch_factor << std::endl;
57-
return out;
58-
}
59-
};
8+
#include "mc_configuration.h"
609

6110
// Minimum cut sketch algorithm class
6211
class MinCutSketchAlg {
6312
private:
13+
struct ThreadData {
14+
std::vector<std::vector<node_id_t>> cc_buffers;
15+
std::vector<SubgraphTaggedUpdate> edge_store_buffer;
16+
};
17+
6418
const node_id_t num_vertices;
6519
const size_t seed;
6620
MCAlgConfiguration config;
@@ -69,14 +23,15 @@ class MinCutSketchAlg {
6923

7024
const double sketch_factor;
7125
const size_t sketch_samples;
26+
const size_t buffer_elms;
7227

7328
CCSketchAlg **cc_sketches;
7429
EdgeStore edge_store;
7530

7631
Sketch *delta_sketches = nullptr;
77-
node_id_t **update_buffers = nullptr;
32+
ThreadData *thread_data = nullptr;
7833
size_t num_delta_sketches = 0;
79-
size_t num_upd_buffers = 0;
34+
size_t num_workers;
8035

8136
#ifdef VERIFY_SAMPLES_F
8237
std::unique_ptr<GraphVerifier> verifier;
@@ -139,7 +94,7 @@ class MinCutSketchAlg {
13994
* seen thus far. This approximation is guaranteed to be within 1 +/- epsilon of the true
14095
* minimum cut.
14196
*/
142-
size_t calc_minimum_cut();
97+
MinCut calc_minimum_cut();
14398

14499
/**
145100
* Return if we have cached an answer to query.

include/return_types.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,19 @@ class ConnectedComponents {
2828
// This class defines a spanning forest of a graph
2929
class SpanningForest {
3030
private:
31-
std::vector<Edge> edges;
3231
node_id_t num_vertices;
32+
std::vector<Edge> edges;
33+
std::vector<Edge> sorted_adjacency;
34+
bool has_adjacency = false;
3335
public:
3436
SpanningForest(node_id_t num_vertices, const std::unordered_set<node_id_t> *spanning_forest);
3537

3638
const std::vector<Edge>& get_edges() const { return edges; }
39+
const std::vector<Edge>& get_sorted_adjacency();
40+
};
41+
42+
struct MinCut {
43+
std::set<node_id_t> left_vertices;
44+
std::set<node_id_t> right_vertices;
45+
size_t value;
3746
};

include/sketch.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ class Sketch {
117117
* Update a sketch based on information about one of its indices.
118118
* param update the point update.
119119
*/
120-
void update(const vec_t update);
120+
void update(const vec_t update_idx);
121121

122122
/**
123123
* Function to sample from the sketch.

0 commit comments

Comments
 (0)