Skip to content

Commit fd43708

Browse files
committed
moved sketch_columns stuff out to seperate cpp file
1 parent 4756953 commit fd43708

File tree

5 files changed

+355
-169
lines changed

5 files changed

+355
-169
lines changed

CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ add_library(GraphZeppelin
119119
src/driver_configuration.cpp
120120
src/cc_alg_configuration.cpp
121121
src/sketch.cpp
122+
src/sketch_columns.cpp
122123
src/recovery.cpp
123124
src/util.cpp)
124125
add_dependencies(GraphZeppelin GutterTree StreamingUtilities hwy)
@@ -134,6 +135,7 @@ add_library(GraphZeppelinVerifyCC
134135
src/driver_configuration.cpp
135136
src/cc_alg_configuration.cpp
136137
src/sketch.cpp
138+
src/sketch_columns.cpp
137139
src/recovery.cpp
138140
src/util.cpp
139141
test/util/graph_verifier.cpp)

\

Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
cmake_minimum_required(VERSION 3.15)
2+
project(GraphZeppelin)
3+
4+
include (FetchContent)
5+
6+
set(CMAKE_CXX_STANDARD 20)
7+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
8+
set(CMAKE_CXX_EXTENSIONS ON)
9+
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
10+
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
11+
set(CMAKE_COLOR_DIAGNOSTICS ON)
12+
13+
14+
15+
# Make the default build type Release. If user or another
16+
# project sets a different value than use that
17+
if(NOT CMAKE_BUILD_TYPE)
18+
message(STATUS "Setting build type to default -- Debug")
19+
set(CMAKE_BUILD_TYPE "Debug" CACHE
20+
STRING "Choose the type of build." FORCE)
21+
endif()
22+
message(STATUS "GraphZeppelin Build Type: ${CMAKE_BUILD_TYPE}")
23+
24+
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
25+
message(STATUS "Adding GNU compiler flags")
26+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -W -Wall")
27+
elseif(STATUS "${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
28+
message("Adding MSVC compiler flags")
29+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Wall")
30+
else()
31+
message(STATUS "${CMAKE_CXX_COMPILER_ID} not recognized, no flags added")
32+
endif()
33+
34+
include(CheckCXXCompilerFlag)
35+
CHECK_CXX_COMPILER_FLAG("-march=native" COMPILER_SUPPORTS_MARCH_NATIVE)
36+
if(COMPILER_SUPPORTS_MARCH_NATIVE)
37+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
38+
endif()
39+
40+
# add_compile_options(-fsanitize=address)
41+
# add_link_options(-fsanitize=address)
42+
#add_compile_options(-fsanitize=undefined)
43+
#add_link_options(-fsanitize=undefined)
44+
45+
# Check if this project is the top directory or build type is Debug
46+
# If so, build executables, otherwise, only build libraries
47+
get_directory_property(not_root PARENT_DIRECTORY)
48+
if (not_root AND "${CMAKE_BUILD_TYPE}" STREQUAL "Release")
49+
set(BUILD_EXE OFF)
50+
else()
51+
set(BUILD_EXE ON)
52+
message (STATUS "GraphZeppelin building executables")
53+
endif()
54+
55+
FetchContent_Declare(
56+
hwy
57+
58+
GIT_REPOSITORY https://github.com/google/highway.git
59+
GIT_TAG 1.2.0
60+
)
61+
62+
# Get GutterTree Project
63+
FetchContent_Declare(
64+
GutterTree
65+
66+
GIT_REPOSITORY https://github.com/GraphStreamingProject/GutterTree.git
67+
GIT_TAG main
68+
)
69+
70+
# Get StreamingUtilities
71+
FetchContent_Declare(
72+
StreamingUtilities
73+
74+
GIT_REPOSITORY https://github.com/GraphStreamingProject/StreamingUtilities.git
75+
GIT_TAG main
76+
)
77+
78+
79+
set(HWY_ENABLE_EXAMPLES OFF CACHE INTERNAL "Disable highway examples")
80+
set(HWY_ENABLE_TESTS OFF CACHE INTERNAL "Disable highway tests")
81+
82+
# Get google highway
83+
FetchContent_MakeAvailable(hwy)
84+
85+
# Ensure highway target is explicitly added
86+
#add_library(highway INTERFACE IMPORTED)
87+
88+
if (BUILD_BENCH)
89+
# Get Google Benchmark
90+
FetchContent_Declare(
91+
benchmark
92+
93+
GIT_REPOSITORY https://github.com/google/benchmark
94+
GIT_TAG v1.6.1
95+
)
96+
set(BENCHMARK_ENABLE_GTEST_TESTS OFF)
97+
98+
FetchContent_MakeAvailable(benchmark)
99+
endif()
100+
101+
102+
FetchContent_MakeAvailable(GutterTree StreamingUtilities)
103+
104+
# AVAILABLE COMPILATION DEFINITIONS:
105+
# VERIFY_SAMPLES_F Use a deterministic connected-components
106+
# algorithm to verify post-processing.
107+
# NO_EAGER_DSU Do not use the eager DSU query optimization
108+
# if this flag is present.
109+
# L0_SAMPLING Run the CubeSketch l0 sampling algorithm
110+
# to ensure that we sample uniformly.
111+
# Otherwise, run a support finding algorithm.
112+
#
113+
# Example:
114+
# cmake -DCMAKE_CXX_FLAGS="-DL0_SAMPLING" ..
115+
116+
add_library(GraphZeppelin
117+
src/cc_sketch_alg.cpp
118+
src/return_types.cpp
119+
src/driver_configuration.cpp
120+
src/cc_alg_configuration.cpp
121+
src/sketch.cpp
122+
src/sketch_columns.cpp
123+
src/recovery.cpp
124+
src/util.cpp)
125+
add_dependencies(GraphZeppelin GutterTree StreamingUtilities hwy)
126+
target_link_libraries(GraphZeppelin PUBLIC xxhash GutterTree StreamingUtilities hwy)
127+
target_include_directories(GraphZeppelin PUBLIC include/)
128+
target_compile_options(GraphZeppelin PUBLIC -fopenmp)
129+
target_link_options(GraphZeppelin PUBLIC -fopenmp)
130+
target_compile_definitions(GraphZeppelin PUBLIC XXH_INLINE_ALL)
131+
132+
add_library(GraphZeppelinVerifyCC
133+
src/cc_sketch_alg.cpp
134+
src/return_types.cpp
135+
src/driver_configuration.cpp
136+
src/cc_alg_configuration.cpp
137+
src/sketch.cpp
138+
src/recovery.cpp
139+
src/util.cpp
140+
test/util/graph_verifier.cpp)
141+
add_dependencies(GraphZeppelinVerifyCC GutterTree StreamingUtilities hwy)
142+
target_link_libraries(GraphZeppelinVerifyCC PUBLIC xxhash GutterTree StreamingUtilities hwy )
143+
target_include_directories(GraphZeppelinVerifyCC PUBLIC include/ include/test/)
144+
target_compile_options(GraphZeppelinVerifyCC PUBLIC -fopenmp)
145+
target_link_options(GraphZeppelinVerifyCC PUBLIC -fopenmp)
146+
target_compile_definitions(GraphZeppelinVerifyCC PUBLIC XXH_INLINE_ALL VERIFY_SAMPLES_F)
147+
148+
if (BUILD_EXE)
149+
add_executable(tests
150+
test/test_runner.cpp
151+
test/cc_alg_test.cpp
152+
test/sketch_test.cpp
153+
test/recovery_test.cpp
154+
test/dsu_test.cpp
155+
test/util_test.cpp
156+
test/util/graph_verifier_test.cpp)
157+
add_dependencies(tests GraphZeppelinVerifyCC)
158+
target_link_libraries(tests PRIVATE GraphZeppelinVerifyCC)
159+
160+
add_executable(statistical_sketch_test
161+
tools/sketch_testing.cpp)
162+
add_dependencies(statistical_sketch_test GraphZeppelinVerifyCC)
163+
target_link_libraries(statistical_sketch_test PRIVATE GraphZeppelinVerifyCC)
164+
165+
166+
# executable for processing a binary graph stream
167+
add_executable(process_stream
168+
tools/process_stream.cpp)
169+
target_link_libraries(process_stream PRIVATE GraphZeppelin)
170+
171+
# executable for performing in depth correctness testing
172+
add_executable(test_correctness
173+
tools/test_correctness.cpp)
174+
target_link_libraries(test_correctness PRIVATE GraphZeppelinVerifyCC)
175+
endif()
176+
177+
if (BUILD_BENCH)
178+
add_executable(bench_cc
179+
tools/benchmark/graphcc_bench.cpp)
180+
add_dependencies(bench_cc GraphZeppelin benchmark)
181+
target_link_libraries(bench_cc GraphZeppelin benchmark::benchmark xxhash)
182+
endif()

include/sketch/sketch_columns.h

Lines changed: 0 additions & 169 deletions
Original file line numberDiff line numberDiff line change
@@ -58,72 +58,6 @@ class FixedSizeSketchColumn {
5858
}
5959
};
6060

61-
FixedSizeSketchColumn::FixedSizeSketchColumn(uint8_t capacity, uint16_t col_idx) :
62-
capacity(capacity), col_idx(col_idx) {
63-
buckets = std::make_unique<Bucket[]>(capacity);
64-
// std::memset(buckets.get(), 0, capacity * sizeof(Bucket));
65-
}
66-
67-
FixedSizeSketchColumn::FixedSizeSketchColumn(const FixedSizeSketchColumn &other) :
68-
capacity(other.capacity), col_idx(other.col_idx), deterministic_bucket(other.deterministic_bucket) {
69-
buckets = std::make_unique<Bucket[]>(capacity);
70-
std::memcpy(buckets.get(), other.buckets.get(), capacity * sizeof(Bucket));
71-
}
72-
73-
FixedSizeSketchColumn::~FixedSizeSketchColumn() {
74-
// delete[] buckets;
75-
}
76-
77-
uint8_t FixedSizeSketchColumn::get_depth() const {
78-
for (size_t i = capacity; i > 0; --i) {
79-
if (!Bucket_Boruvka::is_empty(buckets[i - 1])) {
80-
return i;
81-
}
82-
}
83-
return 0;
84-
}
85-
86-
// TODO - implement actual deserialization
87-
void FixedSizeSketchColumn::serialize(std::ostream &binary_out) const {
88-
binary_out.write((char *) buckets.get(), capacity * sizeof(Bucket));
89-
binary_out.write((char *) &deterministic_bucket, sizeof(Bucket));
90-
binary_out.write((char *) &capacity, sizeof(uint8_t));
91-
binary_out.write((char *) &col_idx, sizeof(uint8_t));
92-
}
93-
94-
SketchSample<vec_t> FixedSizeSketchColumn::sample() const {
95-
if (Bucket_Boruvka::is_empty(deterministic_bucket)) {
96-
return {0, ZERO}; // the "first" bucket is deterministic so if all zero then no edges to return
97-
}
98-
for (size_t i = 0; i < capacity; ++i) {
99-
if (Bucket_Boruvka::is_good(buckets[i], seed)) {
100-
return {buckets[i].alpha, GOOD};
101-
}
102-
}
103-
return {0, FAIL};
104-
}
105-
106-
void FixedSizeSketchColumn::clear() {
107-
std::memset(buckets.get(), 0, capacity * sizeof(Bucket));
108-
deterministic_bucket = {0, 0};
109-
}
110-
111-
void FixedSizeSketchColumn::merge(FixedSizeSketchColumn &other) {
112-
for (size_t i = 0; i < capacity; ++i) {
113-
buckets[i] ^= other.buckets[i];
114-
}
115-
deterministic_bucket ^= other.deterministic_bucket;
116-
}
117-
118-
void FixedSizeSketchColumn::update(const vec_t update) {
119-
vec_hash_t checksum = Bucket_Boruvka::get_index_hash(update, seed);
120-
col_hash_t depth = Bucket_Boruvka::get_index_depth(update, seed, col_idx, capacity);
121-
assert(depth < capacity);
122-
buckets[depth] ^= {update, checksum};
123-
deterministic_bucket ^= {update, checksum};
124-
}
125-
126-
12761

12862
class ResizeableSketchColumn {
12963
private:
@@ -160,106 +94,3 @@ class ResizeableSketchColumn {
16094
void reallocate(uint8_t new_capacity);
16195
};
16296

163-
uint64_t ResizeableSketchColumn::seed = 0;
164-
uint64_t FixedSizeSketchColumn::seed = 0;
165-
166-
167-
ResizeableSketchColumn::ResizeableSketchColumn(uint8_t start_capacity, uint16_t col_idx) :
168-
capacity(start_capacity), col_idx(col_idx) {
169-
170-
// auto aligned_memptr = hwy::MakeUniqueAlignedArray<Bucket>(start_capacity);
171-
aligned_buckets = hwy::AllocateAligned<Bucket>(start_capacity);
172-
std::memset(aligned_buckets.get(), 0, capacity * sizeof(Bucket));
173-
}
174-
175-
ResizeableSketchColumn::ResizeableSketchColumn(const ResizeableSketchColumn &other) :
176-
capacity(other.capacity), col_idx(other.col_idx), deterministic_bucket(other.deterministic_bucket) {
177-
aligned_buckets = hwy::AllocateAligned<Bucket>(capacity);
178-
std::memcpy(aligned_buckets.get(), other.aligned_buckets.get(), capacity * sizeof(Bucket));
179-
}
180-
181-
ResizeableSketchColumn::~ResizeableSketchColumn() {
182-
}
183-
184-
/*
185-
Note this DROPS the contents if allocated down too much.
186-
*/
187-
void ResizeableSketchColumn::reallocate(uint8_t new_capacity) {
188-
auto resize_capacity = std::max(new_capacity, capacity);
189-
auto new_buckets = hwy::AllocateAligned<Bucket>(new_capacity);
190-
std::memset(new_buckets.get() + capacity, 0,
191-
(resize_capacity - capacity) * sizeof(Bucket));
192-
std::memcpy(new_buckets.get(), aligned_buckets.get(),
193-
resize_capacity * sizeof(Bucket));
194-
aligned_buckets = std::move(new_buckets);
195-
capacity = new_capacity;
196-
}
197-
198-
void ResizeableSketchColumn::clear() {
199-
std::memset(aligned_buckets.get(), 0, capacity * sizeof(Bucket));
200-
deterministic_bucket = {0, 0};
201-
}
202-
203-
void ResizeableSketchColumn::serialize(std::ostream &binary_out) const {
204-
binary_out.write((char *) aligned_buckets.get(), capacity * sizeof(Bucket));
205-
binary_out.write((char *) &deterministic_bucket, sizeof(Bucket));
206-
binary_out.write((char *) &capacity, sizeof(uint8_t));
207-
binary_out.write((char *) &col_idx, sizeof(uint8_t));
208-
}
209-
210-
SketchSample<vec_t> ResizeableSketchColumn::sample() const {
211-
if (Bucket_Boruvka::is_empty(deterministic_bucket)) {
212-
return {0, ZERO}; // the "first" bucket is deterministic so if all zero then no edges to return
213-
}
214-
for (size_t i = capacity; i > 0; --i) {
215-
if (Bucket_Boruvka::is_good(aligned_buckets[i - 1], seed)) {
216-
return {aligned_buckets[i - 1].alpha, GOOD};
217-
}
218-
}
219-
return {0, FAIL};
220-
}
221-
222-
void ResizeableSketchColumn::update(const vec_t update) {
223-
vec_hash_t checksum = Bucket_Boruvka::get_index_hash(update, seed);
224-
// TODO - remove magic number
225-
// TODO - get_index_depth needs to be fixed. hashes need to be longer
226-
// than 32 bits if we're not using the deep bucket buffer idea.
227-
col_hash_t depth = Bucket_Boruvka::get_index_depth(update, seed, col_idx, 32);
228-
deterministic_bucket ^= {update, checksum};
229-
230-
while (depth >= capacity) {
231-
// first multple of 4 larger than or equal to depth
232-
reallocate(capacity + 4);
233-
}
234-
aligned_buckets[depth] ^= {update, checksum};
235-
}
236-
237-
void ResizeableSketchColumn::merge(ResizeableSketchColumn &other) {
238-
deterministic_bucket ^= other.deterministic_bucket;
239-
if (other.capacity > capacity) {
240-
reallocate(other.capacity);
241-
}
242-
// auto for_vector_merge = hwy::Rebind<Bucket, uint32_t(aligned_buckets.get(), capacity);
243-
uint32_t *for_vector_merge = reinterpret_cast<uint32_t*>(aligned_buckets.get());
244-
uint32_t *other_for_vector_merge = reinterpret_cast<uint32_t*>(other.aligned_buckets.get());
245-
int num_vectors = other.capacity * (sizeof(Bucket) / sizeof(uint32_t));
246-
hwy::HWY_NAMESPACE::simd_xor(for_vector_merge, other_for_vector_merge, num_vectors);
247-
}
248-
249-
uint8_t ResizeableSketchColumn::get_depth() const {
250-
// TODO - maybe rely on flag vectors
251-
for (size_t i = capacity; i > 0; --i) {
252-
if (!Bucket_Boruvka::is_empty(aligned_buckets[i - 1])) {
253-
return i;
254-
}
255-
}
256-
return 0;
257-
}
258-
259-
260-
261-
static_assert(SketchColumnConcept<FixedSizeSketchColumn, vec_t>,
262-
"FixedSizeSketchColumn does not satisfy SketchColumnConcept");
263-
264-
static_assert(SketchColumnConcept<ResizeableSketchColumn, vec_t>,
265-
"ResizeableSketchColumn does not satisfy SketchColumnConcept");

include/sketch/sketch_concept.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#include <format>
33
#include <concepts>
44
#include "bucket.h"
5+
#include <unordered_set>
56

67
enum SampleResult {
78
GOOD, // sampling this sketch returned a single non-zero value

0 commit comments

Comments
 (0)