22
33FixedSizeSketchColumn::FixedSizeSketchColumn (uint8_t capacity, uint16_t col_idx) :
44 capacity(capacity), col_idx(col_idx) {
5- buckets = std::make_unique< Bucket[]>( capacity) ;
6- // std::memset(buckets.get() , 0, capacity * sizeof(Bucket));
5+ buckets = new Bucket[capacity] ;
6+ std::memset (buckets, 0 , capacity * sizeof (Bucket));
77}
88
99FixedSizeSketchColumn::FixedSizeSketchColumn (const FixedSizeSketchColumn &other) :
1010 capacity(other.capacity), col_idx(other.col_idx), deterministic_bucket(other.deterministic_bucket) {
11- buckets = std::make_unique< Bucket[]>( capacity) ;
12- std::memcpy (buckets. get () , other.buckets . get () , capacity * sizeof (Bucket));
11+ buckets = new Bucket[capacity] ;
12+ std::memcpy (buckets, other.buckets , capacity * sizeof (Bucket));
1313}
1414
1515FixedSizeSketchColumn::~FixedSizeSketchColumn () {
16- // delete[] buckets;
16+ delete[] buckets;
1717}
1818
1919uint8_t FixedSizeSketchColumn::get_depth () const {
@@ -27,7 +27,7 @@ uint8_t FixedSizeSketchColumn::get_depth() const {
2727
2828// TODO - implement actual deserialization
2929void FixedSizeSketchColumn::serialize (std::ostream &binary_out) const {
30- binary_out.write ((char *) buckets. get () , capacity * sizeof (Bucket));
30+ binary_out.write ((char *) buckets, capacity * sizeof (Bucket));
3131 binary_out.write ((char *) &deterministic_bucket, sizeof (Bucket));
3232 binary_out.write ((char *) &capacity, sizeof (uint8_t ));
3333 binary_out.write ((char *) &col_idx, sizeof (uint8_t ));
@@ -46,7 +46,7 @@ SketchSample<vec_t> FixedSizeSketchColumn::sample() const {
4646}
4747
4848void FixedSizeSketchColumn::clear () {
49- std::memset (buckets. get () , 0 , capacity * sizeof (Bucket));
49+ std::memset (buckets, 0 , capacity * sizeof (Bucket));
5050 deterministic_bucket = {0 , 0 };
5151}
5252
@@ -68,25 +68,119 @@ void FixedSizeSketchColumn::update(const vec_t update) {
6868
6969ResizeableSketchColumn::ResizeableSketchColumn (uint8_t start_capacity, uint16_t col_idx) :
7070 capacity(start_capacity), col_idx(col_idx) {
71+ aligned_buckets = new Bucket[start_capacity];
72+ std::memset (aligned_buckets, 0 , capacity * sizeof (Bucket));
73+ }
74+
75+ ResizeableSketchColumn::ResizeableSketchColumn (const ResizeableSketchColumn &other) :
76+ capacity(other.capacity), col_idx(other.col_idx), deterministic_bucket(other.deterministic_bucket) {
77+ aligned_buckets = new Bucket[capacity];
78+ std::memcpy (aligned_buckets, other.aligned_buckets , capacity * sizeof (Bucket));
79+ }
80+
81+ ResizeableSketchColumn::~ResizeableSketchColumn () {
82+ delete[] aligned_buckets;
83+ }
84+
85+ /*
86+ Note this DROPS the contents if allocated down too much.
87+ */
88+ void ResizeableSketchColumn::reallocate (uint8_t new_capacity) {
89+ // std::cout << "Reallocating from " << (int)capacity << " to " << (int)new_capacity << std::endl;
90+ auto new_buckets = new Bucket[new_capacity];
91+ likely_if (new_capacity > capacity) {
92+ std::memset (new_buckets + capacity, 0 ,
93+ (new_capacity - capacity) * sizeof (Bucket));
94+ }
95+ std::memcpy (new_buckets, aligned_buckets,
96+ std::min (capacity, new_capacity) * sizeof (Bucket));
97+ delete[] aligned_buckets;
98+
99+ aligned_buckets = new_buckets;
100+ capacity = new_capacity;
101+ }
102+ void ResizeableSketchColumn::clear () {
103+ std::memset (aligned_buckets, 0 , capacity * sizeof (Bucket));
104+ deterministic_bucket = {0 , 0 };
105+ }
106+
107+ void ResizeableSketchColumn::serialize (std::ostream &binary_out) const {
108+ binary_out.write ((char *) aligned_buckets, capacity * sizeof (Bucket));
109+ binary_out.write ((char *) &deterministic_bucket, sizeof (Bucket));
110+ binary_out.write ((char *) &capacity, sizeof (uint8_t ));
111+ binary_out.write ((char *) &col_idx, sizeof (uint8_t ));
112+ }
113+
114+ SketchSample<vec_t > ResizeableSketchColumn::sample () const {
115+ if (Bucket_Boruvka::is_empty (deterministic_bucket)) {
116+ return {0 , ZERO}; // the "first" bucket is deterministic so if all zero then no edges to return
117+ }
118+ for (size_t i = capacity; i > 0 ; --i) {
119+ if (Bucket_Boruvka::is_good (aligned_buckets[i - 1 ], seed)) {
120+ return {aligned_buckets[i - 1 ].alpha , GOOD};
121+ }
122+ }
123+ return {0 , FAIL};
124+ }
125+
126+ void ResizeableSketchColumn::update (const vec_t update) {
127+ vec_hash_t checksum = Bucket_Boruvka::get_index_hash (update, seed);
128+ // TODO - remove magic number
129+ // TODO - get_index_depth needs to be fixed. hashes need to be longer
130+ // than 32 bits if we're not using the deep bucket buffer idea.
131+ col_hash_t depth = Bucket_Boruvka::get_index_depth (update, seed, col_idx, 60 );
132+ deterministic_bucket ^= {update, checksum};
133+
134+ if (depth >= capacity) {
135+ size_t new_capacity = ((depth >> 2 ) << 2 ) + 4 ;
136+ reallocate (new_capacity);
137+ }
138+ aligned_buckets[depth] ^= {update, checksum};
139+ }
140+
141+ void ResizeableSketchColumn::merge (ResizeableSketchColumn &other) {
142+ deterministic_bucket ^= other.deterministic_bucket ;
143+ if (other.capacity > capacity) {
144+ reallocate (other.capacity );
145+ }
146+ for (size_t i = 0 ; i < other.capacity ; ++i) {
147+ aligned_buckets[i] ^= other.aligned_buckets [i];
148+ }
149+ }
150+
151+ uint8_t ResizeableSketchColumn::get_depth () const {
152+ // TODO - maybe rely on flag vectors
153+ for (size_t i = capacity; i > 0 ; --i) {
154+ if (!Bucket_Boruvka::is_empty (aligned_buckets[i - 1 ])) {
155+ return i;
156+ }
157+ }
158+ return 0 ;
159+ }
160+
161+
162+
163+ ResizeableAlignedSketchColumn::ResizeableAlignedSketchColumn (uint8_t start_capacity, uint16_t col_idx) :
164+ capacity(start_capacity), col_idx(col_idx) {
71165
72166 // auto aligned_memptr = hwy::MakeUniqueAlignedArray<Bucket>(start_capacity);
73167 aligned_buckets = hwy::AllocateAligned<Bucket>(start_capacity);
74168 std::memset (aligned_buckets.get (), 0 , capacity * sizeof (Bucket));
75169}
76170
77- ResizeableSketchColumn::ResizeableSketchColumn (const ResizeableSketchColumn &other) :
171+ ResizeableAlignedSketchColumn::ResizeableAlignedSketchColumn (const ResizeableAlignedSketchColumn &other) :
78172 capacity(other.capacity), col_idx(other.col_idx), deterministic_bucket(other.deterministic_bucket) {
79173 aligned_buckets = hwy::AllocateAligned<Bucket>(capacity);
80174 std::memcpy (aligned_buckets.get (), other.aligned_buckets .get (), capacity * sizeof (Bucket));
81175}
82176
83- ResizeableSketchColumn ::~ResizeableSketchColumn () {
177+ ResizeableAlignedSketchColumn ::~ResizeableAlignedSketchColumn () {
84178}
85179
86180/*
87181 Note this DROPS the contents if allocated down too much.
88182*/
89- void ResizeableSketchColumn ::reallocate (uint8_t new_capacity) {
183+ void ResizeableAlignedSketchColumn ::reallocate (uint8_t new_capacity) {
90184 auto resize_capacity = std::max (new_capacity, capacity);
91185 auto new_buckets = hwy::AllocateAligned<Bucket>(new_capacity);
92186 std::memset (new_buckets.get () + capacity, 0 ,
@@ -98,19 +192,19 @@ void ResizeableSketchColumn::reallocate(uint8_t new_capacity) {
98192 capacity = new_capacity;
99193}
100194
101- void ResizeableSketchColumn ::clear () {
195+ void ResizeableAlignedSketchColumn ::clear () {
102196 std::memset (aligned_buckets.get (), 0 , capacity * sizeof (Bucket));
103197 deterministic_bucket = {0 , 0 };
104198}
105199
106- void ResizeableSketchColumn ::serialize (std::ostream &binary_out) const {
200+ void ResizeableAlignedSketchColumn ::serialize (std::ostream &binary_out) const {
107201 binary_out.write ((char *) aligned_buckets.get (), capacity * sizeof (Bucket));
108202 binary_out.write ((char *) &deterministic_bucket, sizeof (Bucket));
109203 binary_out.write ((char *) &capacity, sizeof (uint8_t ));
110204 binary_out.write ((char *) &col_idx, sizeof (uint8_t ));
111205}
112206
113- SketchSample<vec_t > ResizeableSketchColumn ::sample () const {
207+ SketchSample<vec_t > ResizeableAlignedSketchColumn ::sample () const {
114208 if (Bucket_Boruvka::is_empty (deterministic_bucket)) {
115209 return {0 , ZERO}; // the "first" bucket is deterministic so if all zero then no edges to return
116210 }
@@ -122,34 +216,33 @@ SketchSample<vec_t> ResizeableSketchColumn::sample() const {
122216 return {0 , FAIL};
123217}
124218
125- void ResizeableSketchColumn ::update (const vec_t update) {
219+ void ResizeableAlignedSketchColumn ::update (const vec_t update) {
126220 vec_hash_t checksum = Bucket_Boruvka::get_index_hash (update, seed);
127221 // TODO - remove magic number
128222 // TODO - get_index_depth needs to be fixed. hashes need to be longer
129223 // than 32 bits if we're not using the deep bucket buffer idea.
130224 col_hash_t depth = Bucket_Boruvka::get_index_depth (update, seed, col_idx, 60 );
131225 deterministic_bucket ^= {update, checksum};
132226
133- while (depth >= capacity) {
134- // first multple of 4 larger than or equal to depth
135- reallocate (capacity + 4 );
227+ if (depth >= capacity) {
228+ size_t new_capacity = ((depth >> 2 ) << 2 ) + 4 ;
229+ reallocate (new_capacity);
136230 }
137231 aligned_buckets[depth] ^= {update, checksum};
138232}
139233
140- void ResizeableSketchColumn ::merge (ResizeableSketchColumn &other) {
234+ void ResizeableAlignedSketchColumn ::merge (ResizeableAlignedSketchColumn &other) {
141235 deterministic_bucket ^= other.deterministic_bucket ;
142236 if (other.capacity > capacity) {
143237 reallocate (other.capacity );
144238 }
145- // auto for_vector_merge = hwy::Rebind<Bucket, uint32_t(aligned_buckets.get(), capacity);
146239 uint32_t *for_vector_merge = reinterpret_cast <uint32_t *>(aligned_buckets.get ());
147240 uint32_t *other_for_vector_merge = reinterpret_cast <uint32_t *>(other.aligned_buckets .get ());
148241 int num_vectors = other.capacity * (sizeof (Bucket) / sizeof (uint32_t ));
149242 hwy::HWY_NAMESPACE::simd_xor (for_vector_merge, other_for_vector_merge, num_vectors);
150243}
151244
152- uint8_t ResizeableSketchColumn ::get_depth () const {
245+ uint8_t ResizeableAlignedSketchColumn ::get_depth () const {
153246 // TODO - maybe rely on flag vectors
154247 for (size_t i = capacity; i > 0 ; --i) {
155248 if (!Bucket_Boruvka::is_empty (aligned_buckets[i - 1 ])) {
@@ -159,13 +252,17 @@ uint8_t ResizeableSketchColumn::get_depth() const {
159252 return 0 ;
160253}
161254
162-
163255uint64_t ResizeableSketchColumn::seed = 0 ;
164256uint64_t FixedSizeSketchColumn::seed = 0 ;
257+ uint64_t ResizeableAlignedSketchColumn::seed = 0 ;
258+
165259
166260
167261static_assert (SketchColumnConcept<FixedSizeSketchColumn, vec_t >,
168262 " FixedSizeSketchColumn does not satisfy SketchColumnConcept" );
169263
170264static_assert (SketchColumnConcept<ResizeableSketchColumn, vec_t >,
171- " ResizeableSketchColumn does not satisfy SketchColumnConcept" );
265+ " ResizeableSketchColumn does not satisfy SketchColumnConcept" );
266+
267+ static_assert (SketchColumnConcept<ResizeableAlignedSketchColumn, vec_t >,
268+ " ResizeableAlignedSketchColumn does not satisfy SketchColumnConcept" );
0 commit comments