Skip to content

Commit f6dcdf6

Browse files
committed
faster merge code
1 parent 07cd7dd commit f6dcdf6

File tree

4 files changed

+34
-6
lines changed

4 files changed

+34
-6
lines changed

include/bucket_buffer.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,20 @@ class BucketBuffer {
174174
// std::cout << std::endl;
175175
// }
176176
}
177+
178+
size_t partition(size_t pivot_value) {
179+
// used for the following: all updates with row index smaller than pivot
180+
// thus, all values larger to the left of pivot
181+
// go on the right side
182+
size_t partition_size = 0;
183+
for (size_t i = 0; i < size(); ++i) {
184+
if (entries[i].row_idx >= pivot_value) {
185+
std::swap(entries[i], entries[partition_size]);
186+
partition_size++;
187+
}
188+
}
189+
return partition_size;
190+
}
177191

178192
bool merge(const BucketBuffer &other) {
179193
// YOU SHOULD ONLY MERGE WITH AN UNDER CAPACITY BUFFER

include/cc_sketch_alg.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ class CCSketchAlg {
170170
// new Sketch(Sketch::calc_vector_length(num_vertices), seed,
171171
// Sketch::calc_cc_samples(num_vertices, config.get_sketches_factor()));
172172
delta_sketches[i] =
173-
new Sketch(5, seed,
173+
new Sketch(6, seed,
174174
Sketch::calc_cc_samples(num_vertices, config.get_sketches_factor()));
175175
}
176176
}

include/sketch.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,8 @@ class Sketch {
9090
* @return The length of the vector to sketch
9191
*/
9292
static vec_t calc_vector_length(node_id_t num_vertices) {
93-
return ceil(double(num_vertices) * (num_vertices - 1) / 2);
94-
// return num_vertices * 4;
93+
// return ceil(double(num_vertices) * (num_vertices - 1) / 2);
94+
return num_vertices * 2;
9595
// return 50; // round to something thats approx 2^6
9696
// return 3;
9797
// return 15;

src/sketch.cpp

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -194,15 +194,19 @@ Sketch::~Sketch() {
194194
* backwards until we reach the point where the columns are once again not
195195
* being stored
196196
*/
197-
bucket_buffer.sort_and_compact();
197+
// bucket_buffer.sort_and_compact();
198198
size_t buffer_size = bucket_buffer.size();
199+
// ACTUALLY - we dont need to sort. just need to partition
200+
size_t to_keep_sz = bucket_buffer.partition(bkt_per_col);
199201
int i = ((int) buffer_size)-1;
200-
while (i >= 0 && bucket_buffer[i].row_idx < bkt_per_col) {
202+
// while (i >= 0 && bucket_buffer[i].row_idx < bkt_per_col) {
203+
while (i >= 0 && i >= to_keep_sz) {
201204
// update the bucket
202205
get_bucket(bucket_buffer[i].col_idx, bucket_buffer[i].row_idx) ^= bucket_buffer[i].value;
203206
i--;
204207
}
205-
bucket_buffer.entries.resize(i+1);
208+
bucket_buffer.entries.resize(to_keep_sz);
209+
// bucket_buffer.entries.resize(i+1);
206210
// if (buffer_size > 3)
207211
// std::cout << "Injected buffer buckets:" << buffer_size << " to " << i+1 << std::endl;
208212
}
@@ -392,6 +396,11 @@ void Sketch::merge(const Sketch &other) {
392396
// TODO - when sketches have dynamic sizes, this will require more work
393397
// ie we would want to deal with some depths seperately.
394398
bool sufficient_space = bucket_buffer.merge(other.bucket_buffer);
399+
// TODO - make this procedure better. this isnt a great implementation
400+
if (!sufficient_space) {
401+
inject_buffer_buckets();
402+
sufficient_space = !bucket_buffer.over_capacity();
403+
}
395404
while (!sufficient_space) {
396405
// std::cout << "Merge: Buffer full, reallocating" << std::endl;
397406
// reallocate((bkt_per_col * 8) / 5);
@@ -485,6 +494,11 @@ void Sketch::range_merge(const Sketch &other, size_t start_sample, size_t n_samp
485494
}
486495
#endif
487496
bool sufficient_space = bucket_buffer.merge(other.bucket_buffer);
497+
// TODO - make this procedure better. this isnt a great implementation
498+
if (!sufficient_space) {
499+
inject_buffer_buckets();
500+
sufficient_space = !bucket_buffer.over_capacity();
501+
}
488502
while (!sufficient_space) {
489503
// std::cout << "Merge: Buffer full, reallocating" << std::endl;
490504
// reallocate((bkt_per_col * 8) / 5);

0 commit comments

Comments
 (0)