@@ -8,6 +8,7 @@ enum RecoveryResultTypes {
88};
99struct RecoveryResult {
1010 RecoveryResultTypes result;
11+ // std::vector<Bucket> recovered_indices;
1112 std::vector<vec_t > recovered_indices;
1213};
1314
@@ -21,7 +22,7 @@ class SparseRecovery {
2122 // I'm gonna propose 0.69 (comfortably below sqrt(2) so we decrease the size every two levels)
2223 static constexpr double reduction_factor = 0.82 ;
2324 static constexpr double reduction_factor = 0.69 ;
24- uint64_t checksum_seed ;
25+ uint64_t _checksum_seed ;
2526 uint64_t seed;
2627 // approx 1-1/2e. TODO - can do better. closer to 1-1/e with right
2728 // bounding parameters
@@ -38,7 +39,7 @@ class SparseRecovery {
3839 cleanup_sketch (universe_size, seed, ceil(cleanup_sketch_support_factor * log2 (universe_size)) * 2, 1)
3940 {
4041 // TODO - define the seed better
41- checksum_seed = seed;
42+ _checksum_seed = seed;
4243 seed = seed * seed + 13 ;
4344 universe_size = universe_size;
4445 max_recovery_size = max_recovery_size;
@@ -77,14 +78,23 @@ class SparseRecovery {
7778 inline uint64_t level_seed (size_t level) const {
7879 return seed * (2 + seed) + level * 30 ;
7980 }
80- inline size_t checksum_seed () const { return seed; }
81+ inline size_t checksum_seed () const { return _checksum_seed; }
82+ // where in the level this coordinate would go:
83+ size_t get_level_placement (vec_t coordinate, size_t level) {
84+ size_t level_size = get_cfr_size (level);
85+ vec_hash_t hash = Bucket_Boruvka::get_index_hash (coordinate, level_seed (level));
86+ return hash % level_size;
87+ }
8188 void update (const vec_t update) {
82- for (size_t cfr_idx=0 ; cfr_idx < recovery_buckets.size (); cfr_idx++) {
83- size_t hash_index = Bucket_Boruvka::get_index_hash (update, cfr_idx * 1231 ) % get_cfr_size (cfr_idx);
84- Bucket_Boruvka::update (get_cfr_bucket (cfr_idx, hash_index), update, checksum_seed ());
89+ vec_hash_t checksum = Bucket_Boruvka::get_index_hash (update, checksum_seed ());
90+ deterministic_bucket ^= {update, checksum};
91+ for (size_t cfr_idx=0 ; cfr_idx < num_levels (); cfr_idx++) {
92+ auto cfr_size = get_cfr_size (cfr_idx);
93+ size_t bucket_idx = get_level_placement (update, cfr_idx);
94+ Bucket &bucket = get_cfr_bucket (cfr_idx, bucket_idx);
95+ bucket ^= {update, checksum};
8596 }
8697 cleanup_sketch.update (update);
87- Bucket_Boruvka::update (deterministic_bucket, update, checksum_seed ());
8898 }
8999 void reset () {
90100 // zero contents of the CFRs
@@ -93,54 +103,69 @@ class SparseRecovery {
93103 }
94104 cleanup_sketch.zero_contents ();
95105 };
96- // NOTE THAT THIS IS A DESTRUCTIVE OPERATION AT THE MOMENT.
106+
107+
108+ // THIS IS A NON_DESTRUCTIVE OPERATION
97109 RecoveryResult recover () {
98- std::vector<vec_t > recovered_indices;
110+ std::vector<Bucket> recovered_indices;
111+ std::vector<vec_t > recovered_return_vals;
112+ Bucket working_det_bucket = {0 , 0 };
99113 for (size_t cfr_idx=0 ; cfr_idx < num_levels (); cfr_idx++) {
100- // go hunting for good buckets
101114 auto cfr_size = get_cfr_size (cfr_idx);
115+ // temporarily zero out already recovvered things:
116+ size_t previously_recovered = recovered_indices.size ();
117+ for (size_t i=0 ; i < previously_recovered; i++) {
118+ auto location = get_level_placement (recovered_indices[i].alpha , cfr_idx);
119+ get_cfr_bucket (cfr_idx, location) ^= recovered_indices[i];
120+ }
121+ // go hunting for good buckets
102122 for (size_t bucket_idx=0 ; bucket_idx < cfr_size; bucket_idx++) {
103- // Bucket &bucket = recovery_buckets[cfr_idx][bucket_idx];
104123 Bucket &bucket = get_cfr_bucket (cfr_idx, bucket_idx);
105124 if (Bucket_Boruvka::is_good (bucket, checksum_seed ())) {
106- recovered_indices.push_back (bucket.alpha );
107- // update it out of the sketch everywhere.
108- this ->update (bucket.alpha );
109-
110- // EARLY EXIT CONDITION: deterministic bucket empty
111- if (Bucket_Boruvka::is_empty (deterministic_bucket)) {
112- return {SUCCESS, recovered_indices};
113- }
125+ recovered_indices.push_back (bucket);
126+ recovered_return_vals.push_back (bucket.alpha );
127+ working_det_bucket ^= bucket;
114128 }
115129 }
130+ // unzero recovered things
131+ for (size_t i=0 ; i < previously_recovered; i++) {
132+ auto location = get_level_placement (recovered_indices[i].alpha , cfr_idx);
133+ get_cfr_bucket (cfr_idx, location) ^= recovered_indices[i];
134+ }
135+ // EARLY EXIT CONDITION: we recovered everything according to deterministic bucket check
136+ if (working_det_bucket == deterministic_bucket) {
137+ return {SUCCESS, recovered_return_vals};
138+ }
116139 // repeat until we cleared out all the sketches.
117140 }
141+ // update out of sketch
142+ for (auto idx: recovered_return_vals) {
143+ this ->update (idx);
144+ }
118145 size_t i=0 ;
119146 for (; i < cleanup_sketch.get_num_samples (); i++) {
120147 ExhaustiveSketchSample sample = cleanup_sketch.exhaustive_sample ();
121148 if (sample.result == ZERO) {
122- return {SUCCESS, recovered_indices};
149+ for (auto idx: recovered_return_vals) {
150+ this ->update (idx);
151+ }
152+ return {SUCCESS, recovered_return_vals};
123153 }
124154 for (auto idx: sample.idxs ) {
125155 // todo - checksum stuff. tihs is bad code writing but whatever, anything
126156 // to get out of writing psuedocode...
127- recovered_indices .push_back (idx);
157+ recovered_return_vals .push_back (idx);
128158 // todo - this is inefficient. we are recalculating the bucket hash
129159 // for literally no reason
130160 // but doing things this way is important for undoing our recovery!
131161 // otherwise, we're stuck with a bunch of extra bookkeeping
132162 this ->update (idx);
133163 }
134164 }
135- if (i == cleanup_sketch.get_num_samples ()) {
136- // we ran out of samples
137- // TODO - UNDO YOUR RECOVERY!!!
138- for (auto idx: recovered_indices) {
139- this ->update (idx);
140- }
141- recovered_indices.clear ();
165+ for (auto idx: recovered_return_vals) {
166+ this ->update (idx);
142167 }
143- return {FAILURE, recovered_indices };
168+ return {FAILURE, recovered_return_vals };
144169 };
145170 void merge (const SparseRecovery &other) {
146171 assert (other.recovery_buckets .size () == recovery_buckets.size ());
0 commit comments