@@ -20,7 +20,7 @@ class SparseRecovery {
2020 size_t cleanup_sketch_support;
2121 // 1 - 1/2e. TODO - can do better. closer to 1-1/e. for the power-of-two-rounding,
2222 // I'm gonna propose 0.69 (comfortably below sqrt(2) so we decrease the size every two levels)
23- static constexpr double reduction_factor = 0.82 ;
23+ // static constexpr double reduction_factor = 0.82;
2424 static constexpr double reduction_factor = 0.69 ;
2525 uint64_t _checksum_seed;
2626 uint64_t seed;
@@ -30,10 +30,10 @@ class SparseRecovery {
3030 // should just be a single array, maybe with a lookup set of pointers for the start of each
3131 std::vector<Bucket> recovery_buckets;
3232 std::vector<size_t > starter_indices;
33- Sketch cleanup_sketch;
3433 // TODO - see if we want to continue maintaining the deterministic bucket
3534 Bucket deterministic_bucket;
3635 public:
36+ Sketch cleanup_sketch;
3737 SparseRecovery (size_t universe_size, size_t max_recovery_size, double cleanup_sketch_support_factor, uint64_t seed):
3838 // TODO - ugly constructor
3939 cleanup_sketch (universe_size, seed, ceil(cleanup_sketch_support_factor * log2 (universe_size)) * 2, 1)
@@ -59,6 +59,7 @@ class SparseRecovery {
5959 auto full_storage_size = starter_indices.back ();
6060 // starter_indices.pop_back();
6161 recovery_buckets.resize (full_storage_size);
62+ reset ();
6263 };
6364 private:
6465 size_t num_levels () const {
@@ -89,7 +90,6 @@ class SparseRecovery {
8990 vec_hash_t checksum = Bucket_Boruvka::get_index_hash (update, checksum_seed ());
9091 deterministic_bucket ^= {update, checksum};
9192 for (size_t cfr_idx=0 ; cfr_idx < num_levels (); cfr_idx++) {
92- auto cfr_size = get_cfr_size (cfr_idx);
9393 size_t bucket_idx = get_level_placement (update, cfr_idx);
9494 Bucket &bucket = get_cfr_bucket (cfr_idx, bucket_idx);
9595 bucket ^= {update, checksum};
@@ -98,6 +98,7 @@ class SparseRecovery {
9898 }
9999 void reset () {
100100 // zero contents of the CFRs
101+ deterministic_bucket = {0 , 0 };
101102 for (size_t i=0 ; i < recovery_buckets.size (); i++) {
102103 recovery_buckets[i] = {0 , 0 };
103104 }
@@ -112,6 +113,7 @@ class SparseRecovery {
112113 Bucket working_det_bucket = {0 , 0 };
113114 for (size_t cfr_idx=0 ; cfr_idx < num_levels (); cfr_idx++) {
114115 auto cfr_size = get_cfr_size (cfr_idx);
116+ std::cout << " level " << cfr_idx << " size " << cfr_size << std::endl;
115117 // temporarily zero out already recovvered things:
116118 size_t previously_recovered = recovered_indices.size ();
117119 for (size_t i=0 ; i < previously_recovered; i++) {
@@ -152,16 +154,11 @@ class SparseRecovery {
152154 return {SUCCESS, recovered_return_vals};
153155 }
154156 for (auto idx: sample.idxs ) {
155- // todo - checksum stuff. tihs is bad code writing but whatever, anything
156- // to get out of writing psuedocode...
157157 recovered_return_vals.push_back (idx);
158- // todo - this is inefficient. we are recalculating the bucket hash
159- // for literally no reason
160- // but doing things this way is important for undoing our recovery!
161- // otherwise, we're stuck with a bunch of extra bookkeeping
162158 this ->update (idx);
163159 }
164160 }
161+ // undo the removals for everything
165162 for (auto idx: recovered_return_vals) {
166163 this ->update (idx);
167164 }
@@ -174,5 +171,7 @@ class SparseRecovery {
174171 }
175172 cleanup_sketch.merge (other.cleanup_sketch );
176173 };
177- ~SparseRecovery ();
174+ ~SparseRecovery () {
175+
176+ };
178177};
0 commit comments