Skip to content

Commit 19d3990

Browse files
committed
stuff
1 parent cf600a4 commit 19d3990

File tree

4 files changed

+63
-29
lines changed

4 files changed

+63
-29
lines changed

CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ add_library(GraphZeppelin
101101
src/driver_configuration.cpp
102102
src/cc_alg_configuration.cpp
103103
src/sketch.cpp
104+
src/recovery.cpp
104105
src/util.cpp)
105106
add_dependencies(GraphZeppelin GutterTree StreamingUtilities)
106107
target_link_libraries(GraphZeppelin PUBLIC xxhash GutterTree StreamingUtilities )
@@ -115,6 +116,7 @@ add_library(GraphZeppelinVerifyCC
115116
src/driver_configuration.cpp
116117
src/cc_alg_configuration.cpp
117118
src/sketch.cpp
119+
src/recovery.cpp
118120
src/util.cpp
119121
test/util/graph_verifier.cpp)
120122
add_dependencies(GraphZeppelinVerifyCC GutterTree StreamingUtilities)

include/bucket.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,12 @@ struct Bucket {
1717
alpha ^= rhs.alpha;
1818
gamma ^= rhs.gamma;
1919
};
20+
bool operator==(const Bucket &rhs) const {
21+
return alpha == rhs.alpha && gamma == rhs.gamma;
22+
};
23+
bool operator!=(const Bucket &rhs) const {
24+
return alpha != rhs.alpha || gamma != rhs.gamma;
25+
};
2026
};
2127
#pragma pack(pop)
2228

include/recovery.h

Lines changed: 54 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ enum RecoveryResultTypes {
88
};
99
struct RecoveryResult {
1010
RecoveryResultTypes result;
11+
// std::vector<Bucket> recovered_indices;
1112
std::vector<vec_t> recovered_indices;
1213
};
1314

@@ -21,7 +22,7 @@ class SparseRecovery {
2122
// I'm gonna propose 0.69 (comfortably below sqrt(2) so we decrease the size every two levels)
2223
static constexpr double reduction_factor = 0.82;
2324
static constexpr double reduction_factor = 0.69;
24-
uint64_t checksum_seed;
25+
uint64_t _checksum_seed;
2526
uint64_t seed;
2627
// approx 1-1/2e. TODO - can do better. closer to 1-1/e with right
2728
// bounding parameters
@@ -38,7 +39,7 @@ class SparseRecovery {
3839
cleanup_sketch(universe_size, seed, ceil(cleanup_sketch_support_factor * log2(universe_size)) * 2, 1)
3940
{
4041
// TODO - define the seed better
41-
checksum_seed = seed;
42+
_checksum_seed = seed;
4243
seed = seed * seed + 13;
4344
universe_size = universe_size;
4445
max_recovery_size = max_recovery_size;
@@ -77,14 +78,23 @@ class SparseRecovery {
7778
inline uint64_t level_seed(size_t level) const {
7879
return seed * (2 + seed) + level * 30;
7980
}
80-
inline size_t checksum_seed() const { return seed; }
81+
inline size_t checksum_seed() const { return _checksum_seed; }
82+
// where in the level this coordinate would go:
83+
size_t get_level_placement(vec_t coordinate, size_t level) {
84+
size_t level_size = get_cfr_size(level);
85+
vec_hash_t hash = Bucket_Boruvka::get_index_hash(coordinate, level_seed(level));
86+
return hash % level_size;
87+
}
8188
void update(const vec_t update) {
82-
for (size_t cfr_idx=0; cfr_idx < recovery_buckets.size(); cfr_idx++) {
83-
size_t hash_index = Bucket_Boruvka::get_index_hash(update, cfr_idx * 1231) % get_cfr_size(cfr_idx);
84-
Bucket_Boruvka::update(get_cfr_bucket(cfr_idx, hash_index), update, checksum_seed());
89+
vec_hash_t checksum = Bucket_Boruvka::get_index_hash(update, checksum_seed());
90+
deterministic_bucket ^= {update, checksum};
91+
for (size_t cfr_idx=0; cfr_idx < num_levels(); cfr_idx++) {
92+
auto cfr_size = get_cfr_size(cfr_idx);
93+
size_t bucket_idx = get_level_placement(update, cfr_idx);
94+
Bucket &bucket = get_cfr_bucket(cfr_idx, bucket_idx);
95+
bucket ^= {update, checksum};
8596
}
8697
cleanup_sketch.update(update);
87-
Bucket_Boruvka::update(deterministic_bucket, update, checksum_seed());
8898
}
8999
void reset() {
90100
// zero contents of the CFRs
@@ -93,54 +103,69 @@ class SparseRecovery {
93103
}
94104
cleanup_sketch.zero_contents();
95105
};
96-
// NOTE THAT THIS IS A DESTRUCTIVE OPERATION AT THE MOMENT.
106+
107+
108+
// THIS IS A NON_DESTRUCTIVE OPERATION
97109
RecoveryResult recover() {
98-
std::vector<vec_t> recovered_indices;
110+
std::vector<Bucket> recovered_indices;
111+
std::vector<vec_t> recovered_return_vals;
112+
Bucket working_det_bucket = {0, 0};
99113
for (size_t cfr_idx=0; cfr_idx < num_levels(); cfr_idx++) {
100-
// go hunting for good buckets
101114
auto cfr_size = get_cfr_size(cfr_idx);
115+
// temporarily zero out already recovvered things:
116+
size_t previously_recovered = recovered_indices.size();
117+
for (size_t i=0; i < previously_recovered; i++) {
118+
auto location = get_level_placement(recovered_indices[i].alpha, cfr_idx);
119+
get_cfr_bucket(cfr_idx, location) ^= recovered_indices[i];
120+
}
121+
// go hunting for good buckets
102122
for (size_t bucket_idx=0; bucket_idx < cfr_size; bucket_idx++) {
103-
// Bucket &bucket = recovery_buckets[cfr_idx][bucket_idx];
104123
Bucket &bucket = get_cfr_bucket(cfr_idx, bucket_idx);
105124
if (Bucket_Boruvka::is_good(bucket, checksum_seed())) {
106-
recovered_indices.push_back(bucket.alpha);
107-
// update it out of the sketch everywhere.
108-
this->update(bucket.alpha);
109-
110-
// EARLY EXIT CONDITION: deterministic bucket empty
111-
if (Bucket_Boruvka::is_empty(deterministic_bucket)) {
112-
return {SUCCESS, recovered_indices};
113-
}
125+
recovered_indices.push_back(bucket);
126+
recovered_return_vals.push_back(bucket.alpha);
127+
working_det_bucket ^= bucket;
114128
}
115129
}
130+
// unzero recovered things
131+
for (size_t i=0; i < previously_recovered; i++) {
132+
auto location = get_level_placement(recovered_indices[i].alpha, cfr_idx);
133+
get_cfr_bucket(cfr_idx, location) ^= recovered_indices[i];
134+
}
135+
// EARLY EXIT CONDITION: we recovered everything according to deterministic bucket check
136+
if (working_det_bucket == deterministic_bucket) {
137+
return {SUCCESS, recovered_return_vals};
138+
}
116139
// repeat until we cleared out all the sketches.
117140
}
141+
// update out of sketch
142+
for (auto idx: recovered_return_vals) {
143+
this->update(idx);
144+
}
118145
size_t i=0;
119146
for (; i < cleanup_sketch.get_num_samples(); i++) {
120147
ExhaustiveSketchSample sample = cleanup_sketch.exhaustive_sample();
121148
if (sample.result == ZERO) {
122-
return {SUCCESS, recovered_indices};
149+
for (auto idx: recovered_return_vals) {
150+
this->update(idx);
151+
}
152+
return {SUCCESS, recovered_return_vals};
123153
}
124154
for (auto idx: sample.idxs) {
125155
// todo - checksum stuff. tihs is bad code writing but whatever, anything
126156
// to get out of writing psuedocode...
127-
recovered_indices.push_back(idx);
157+
recovered_return_vals.push_back(idx);
128158
// todo - this is inefficient. we are recalculating the bucket hash
129159
// for literally no reason
130160
// but doing things this way is important for undoing our recovery!
131161
// otherwise, we're stuck with a bunch of extra bookkeeping
132162
this->update(idx);
133163
}
134164
}
135-
if (i == cleanup_sketch.get_num_samples()) {
136-
// we ran out of samples
137-
// TODO - UNDO YOUR RECOVERY!!!
138-
for (auto idx: recovered_indices) {
139-
this->update(idx);
140-
}
141-
recovered_indices.clear();
165+
for (auto idx: recovered_return_vals) {
166+
this->update(idx);
142167
}
143-
return {FAILURE, recovered_indices};
168+
return {FAILURE, recovered_return_vals};
144169
};
145170
void merge(const SparseRecovery &other) {
146171
assert(other.recovery_buckets.size() == recovery_buckets.size());

src/recovery.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
#include "recovery.h"

0 commit comments

Comments
 (0)