Skip to content

Commit 3058d82

Browse files
committed
ksparse recovery test cases
1 parent 19d3990 commit 3058d82

File tree

4 files changed

+110
-10
lines changed

4 files changed

+110
-10
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ if (BUILD_EXE)
131131
test/test_runner.cpp
132132
test/cc_alg_test.cpp
133133
test/sketch_test.cpp
134+
test/recovery_test.cpp
134135
test/dsu_test.cpp
135136
test/util_test.cpp
136137
test/util/graph_verifier_test.cpp)

include/recovery.h

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ class SparseRecovery {
2020
size_t cleanup_sketch_support;
2121
// 1 - 1/2e. TODO - can do better. closer to 1-1/e. for the power-of-two-rounding,
2222
// I'm gonna propose 0.69 (comfortably below sqrt(2) so we decrease the size every two levels)
23-
static constexpr double reduction_factor = 0.82;
23+
// static constexpr double reduction_factor = 0.82;
2424
static constexpr double reduction_factor = 0.69;
2525
uint64_t _checksum_seed;
2626
uint64_t seed;
@@ -30,10 +30,10 @@ class SparseRecovery {
3030
// should just be a single array, maybe with a lookup set of pointers for the start of each
3131
std::vector<Bucket> recovery_buckets;
3232
std::vector<size_t> starter_indices;
33-
Sketch cleanup_sketch;
3433
// TODO - see if we want to continue maintaining the deterministic bucket
3534
Bucket deterministic_bucket;
3635
public:
36+
Sketch cleanup_sketch;
3737
SparseRecovery(size_t universe_size, size_t max_recovery_size, double cleanup_sketch_support_factor, uint64_t seed):
3838
// TODO - ugly constructor
3939
cleanup_sketch(universe_size, seed, ceil(cleanup_sketch_support_factor * log2(universe_size)) * 2, 1)
@@ -59,6 +59,7 @@ class SparseRecovery {
5959
auto full_storage_size = starter_indices.back();
6060
// starter_indices.pop_back();
6161
recovery_buckets.resize(full_storage_size);
62+
reset();
6263
};
6364
private:
6465
size_t num_levels() const {
@@ -89,7 +90,6 @@ class SparseRecovery {
8990
vec_hash_t checksum = Bucket_Boruvka::get_index_hash(update, checksum_seed());
9091
deterministic_bucket ^= {update, checksum};
9192
for (size_t cfr_idx=0; cfr_idx < num_levels(); cfr_idx++) {
92-
auto cfr_size = get_cfr_size(cfr_idx);
9393
size_t bucket_idx = get_level_placement(update, cfr_idx);
9494
Bucket &bucket = get_cfr_bucket(cfr_idx, bucket_idx);
9595
bucket ^= {update, checksum};
@@ -98,6 +98,7 @@ class SparseRecovery {
9898
}
9999
void reset() {
100100
// zero contents of the CFRs
101+
deterministic_bucket = {0, 0};
101102
for (size_t i=0; i < recovery_buckets.size(); i++) {
102103
recovery_buckets[i] = {0, 0};
103104
}
@@ -112,6 +113,7 @@ class SparseRecovery {
112113
Bucket working_det_bucket = {0, 0};
113114
for (size_t cfr_idx=0; cfr_idx < num_levels(); cfr_idx++) {
114115
auto cfr_size = get_cfr_size(cfr_idx);
116+
std::cout << "level " << cfr_idx << " size " << cfr_size << std::endl;
115117
// temporarily zero out already recovvered things:
116118
size_t previously_recovered = recovered_indices.size();
117119
for (size_t i=0; i < previously_recovered; i++) {
@@ -152,16 +154,11 @@ class SparseRecovery {
152154
return {SUCCESS, recovered_return_vals};
153155
}
154156
for (auto idx: sample.idxs) {
155-
// todo - checksum stuff. tihs is bad code writing but whatever, anything
156-
// to get out of writing psuedocode...
157157
recovered_return_vals.push_back(idx);
158-
// todo - this is inefficient. we are recalculating the bucket hash
159-
// for literally no reason
160-
// but doing things this way is important for undoing our recovery!
161-
// otherwise, we're stuck with a bunch of extra bookkeeping
162158
this->update(idx);
163159
}
164160
}
161+
// undo the removals for everything
165162
for (auto idx: recovered_return_vals) {
166163
this->update(idx);
167164
}
@@ -174,5 +171,7 @@ class SparseRecovery {
174171
}
175172
cleanup_sketch.merge(other.cleanup_sketch);
176173
};
177-
~SparseRecovery();
174+
~SparseRecovery() {
175+
176+
};
178177
};

src/sketch.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,11 +291,14 @@ void Sketch::zero_contents() {
291291
}
292292

293293
SketchSample Sketch::sample() {
294+
// TODO - this is bugged
295+
// inject buffer buckets no longer guarantees compactness
294296

295297
if (sample_idx >= num_samples) {
296298
throw OutOfSamplesException(seed, num_samples, sample_idx);
297299
}
298300
// TODO - fix this so this isnt required
301+
bucket_buffer.sort_and_compact();
299302
inject_buffer_buckets();
300303

301304

@@ -338,6 +341,9 @@ SketchSample Sketch::sample() {
338341
}
339342

340343
ExhaustiveSketchSample Sketch::exhaustive_sample() {
344+
// TODO - fix this so this isnt required
345+
bucket_buffer.sort_and_compact();
346+
inject_buffer_buckets();
341347
if (sample_idx >= num_samples) {
342348
throw OutOfSamplesException(seed, num_samples, sample_idx);
343349
}
@@ -367,6 +373,21 @@ ExhaustiveSketchSample Sketch::exhaustive_sample() {
367373
}
368374
}
369375

376+
// finally, check the deep buffer
377+
for (size_t i = 0; i < bucket_buffer.size(); i++) {
378+
const BufferEntry &entry = bucket_buffer[i];
379+
// TODO - optimize this check. THIS IS GONNA CAUSE REALLY POOR
380+
// PERFORMANCE UNTIL WE DO SOMETHING ABOUT IT
381+
if (entry.col_idx >= first_column &&
382+
entry.col_idx < first_column + cols_per_sample) {
383+
if (Bucket_Boruvka::is_good(entry.value, checksum_seed())) {
384+
// std::cout << "Found a bucket in the buffer" << std::endl;
385+
assert(entry.row_idx >= bkt_per_col);
386+
// return {entry.value.alpha, GOOD};
387+
ret.insert(entry.value.alpha);
388+
}
389+
}
390+
}
370391
unlikely_if (ret.size() == 0)
371392
return {ret, FAIL};
372393
return {ret, GOOD};

test/recovery_test.cpp

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
#include "sketch.h"
2+
#include "recovery.h"
3+
#include "bucket.h"
4+
#include <chrono>
5+
#include <gtest/gtest.h>
6+
#include <random>
7+
#include "testing_vector.h"
8+
9+
static size_t get_seed() {
10+
auto now = std::chrono::high_resolution_clock::now();
11+
return std::chrono::duration_cast<std::chrono::nanoseconds>(now.time_since_epoch()).count();
12+
}
13+
14+
static const int num_columns = 1;
15+
TEST(RecoveryTestSuite, RecoveryZeroOrOne) {
16+
SparseRecovery recovery(1 << 20, 1 << 10, 1, get_seed());
17+
auto result = recovery.recover();
18+
ASSERT_EQ(result.recovered_indices.size(), 0);
19+
ASSERT_EQ(result.result, SUCCESS);
20+
recovery.update(5);
21+
ASSERT_EQ(recovery.recover().recovered_indices.size(), 1);
22+
ASSERT_EQ(recovery.recover().recovered_indices[0], 5);
23+
recovery.update(5);
24+
ASSERT_EQ(result.recovered_indices.size(), 0);
25+
ASSERT_EQ(result.result, SUCCESS);
26+
}
27+
28+
TEST(RecoveryTestSuite, RecoveryMediumSize) {
29+
SparseRecovery recovery(1 << 20, 1 << 10, 1, get_seed());
30+
auto result = recovery.recover();
31+
ASSERT_EQ(result.recovered_indices.size(), 0);
32+
ASSERT_EQ(result.result, SUCCESS);
33+
recovery.update(5);
34+
ASSERT_EQ(recovery.recover().recovered_indices.size(), 1);
35+
ASSERT_EQ(recovery.recover().recovered_indices[0], 5);
36+
std::unordered_set<vec_t> inserted;
37+
recovery.update(5);
38+
for (vec_t i = 0; i < 1 << 10; i++) {
39+
recovery.update(i);
40+
inserted.insert(i);
41+
}
42+
auto result2 = recovery.recover();
43+
std::unordered_set<vec_t> recovered2(result2.recovered_indices.begin(), result2.recovered_indices.end());
44+
ASSERT_EQ(recovered2, inserted);
45+
auto result3 = recovery.recover();
46+
std::unordered_set<vec_t> recovered3(result3.recovered_indices.begin(), result3.recovered_indices.end());
47+
ASSERT_EQ(recovered3, inserted);
48+
49+
// REPEAT TO MAKE SURE NON-DESTRUCTIVE
50+
}
51+
52+
TEST(RecoveryTestSuite, RecoveryFailureCondition) {
53+
SparseRecovery recovery(1 << 20, 1 << 10, 1, get_seed());
54+
std::unordered_set<vec_t> inserted;
55+
for (vec_t i = 0; i < 1 << 14; i++) {
56+
recovery.update(i);
57+
inserted.insert(i);
58+
}
59+
auto result = recovery.recover();
60+
ASSERT_EQ(result.result, FAILURE);
61+
std::cout << "size: " << result.recovered_indices.size() << std::endl;
62+
// make sure all returned things were in there:
63+
for (auto idx: result.recovered_indices) {
64+
ASSERT_TRUE(inserted.find(idx) != inserted.end());
65+
}
66+
// inserted.clear();
67+
// remove all but the final few elements
68+
// TODO - figure out the right place to put sketch clearing
69+
recovery.cleanup_sketch.reset_sample_state();
70+
for (vec_t i = 0; i < (1 << 14) - 1027; i++) {
71+
recovery.update(i);
72+
inserted.erase(i);
73+
}
74+
// TODO - WRITE A HELPER FUNCTION FOR TIHS STYLE OF TEST CASE
75+
auto result3 = recovery.recover();
76+
std::unordered_set<vec_t> recovered3(result3.recovered_indices.begin(), result3.recovered_indices.end());
77+
ASSERT_EQ(result3.result, SUCCESS);
78+
ASSERT_EQ(recovered3, inserted);
79+
}

0 commit comments

Comments
 (0)