@@ -58,72 +58,6 @@ class FixedSizeSketchColumn {
5858 }
5959};
6060
61- FixedSizeSketchColumn::FixedSizeSketchColumn (uint8_t capacity, uint16_t col_idx) :
62- capacity(capacity), col_idx(col_idx) {
63- buckets = std::make_unique<Bucket[]>(capacity);
64- // std::memset(buckets.get(), 0, capacity * sizeof(Bucket));
65- }
66-
67- FixedSizeSketchColumn::FixedSizeSketchColumn (const FixedSizeSketchColumn &other) :
68- capacity(other.capacity), col_idx(other.col_idx), deterministic_bucket(other.deterministic_bucket) {
69- buckets = std::make_unique<Bucket[]>(capacity);
70- std::memcpy (buckets.get (), other.buckets .get (), capacity * sizeof (Bucket));
71- }
72-
73- FixedSizeSketchColumn::~FixedSizeSketchColumn () {
74- // delete[] buckets;
75- }
76-
77- uint8_t FixedSizeSketchColumn::get_depth () const {
78- for (size_t i = capacity; i > 0 ; --i) {
79- if (!Bucket_Boruvka::is_empty (buckets[i - 1 ])) {
80- return i;
81- }
82- }
83- return 0 ;
84- }
85-
86- // TODO - implement actual deserialization
87- void FixedSizeSketchColumn::serialize (std::ostream &binary_out) const {
88- binary_out.write ((char *) buckets.get (), capacity * sizeof (Bucket));
89- binary_out.write ((char *) &deterministic_bucket, sizeof (Bucket));
90- binary_out.write ((char *) &capacity, sizeof (uint8_t ));
91- binary_out.write ((char *) &col_idx, sizeof (uint8_t ));
92- }
93-
94- SketchSample<vec_t > FixedSizeSketchColumn::sample () const {
95- if (Bucket_Boruvka::is_empty (deterministic_bucket)) {
96- return {0 , ZERO}; // the "first" bucket is deterministic so if all zero then no edges to return
97- }
98- for (size_t i = 0 ; i < capacity; ++i) {
99- if (Bucket_Boruvka::is_good (buckets[i], seed)) {
100- return {buckets[i].alpha , GOOD};
101- }
102- }
103- return {0 , FAIL};
104- }
105-
106- void FixedSizeSketchColumn::clear () {
107- std::memset (buckets.get (), 0 , capacity * sizeof (Bucket));
108- deterministic_bucket = {0 , 0 };
109- }
110-
111- void FixedSizeSketchColumn::merge (FixedSizeSketchColumn &other) {
112- for (size_t i = 0 ; i < capacity; ++i) {
113- buckets[i] ^= other.buckets [i];
114- }
115- deterministic_bucket ^= other.deterministic_bucket ;
116- }
117-
118- void FixedSizeSketchColumn::update (const vec_t update) {
119- vec_hash_t checksum = Bucket_Boruvka::get_index_hash (update, seed);
120- col_hash_t depth = Bucket_Boruvka::get_index_depth (update, seed, col_idx, capacity);
121- assert (depth < capacity);
122- buckets[depth] ^= {update, checksum};
123- deterministic_bucket ^= {update, checksum};
124- }
125-
126-
12761
12862class ResizeableSketchColumn {
12963private:
@@ -160,106 +94,3 @@ class ResizeableSketchColumn {
16094 void reallocate (uint8_t new_capacity);
16195};
16296
163- uint64_t ResizeableSketchColumn::seed = 0 ;
164- uint64_t FixedSizeSketchColumn::seed = 0 ;
165-
166-
167- ResizeableSketchColumn::ResizeableSketchColumn (uint8_t start_capacity, uint16_t col_idx) :
168- capacity(start_capacity), col_idx(col_idx) {
169-
170- // auto aligned_memptr = hwy::MakeUniqueAlignedArray<Bucket>(start_capacity);
171- aligned_buckets = hwy::AllocateAligned<Bucket>(start_capacity);
172- std::memset (aligned_buckets.get (), 0 , capacity * sizeof (Bucket));
173- }
174-
175- ResizeableSketchColumn::ResizeableSketchColumn (const ResizeableSketchColumn &other) :
176- capacity(other.capacity), col_idx(other.col_idx), deterministic_bucket(other.deterministic_bucket) {
177- aligned_buckets = hwy::AllocateAligned<Bucket>(capacity);
178- std::memcpy (aligned_buckets.get (), other.aligned_buckets .get (), capacity * sizeof (Bucket));
179- }
180-
181- ResizeableSketchColumn::~ResizeableSketchColumn () {
182- }
183-
184- /*
185- Note this DROPS the contents if allocated down too much.
186- */
187- void ResizeableSketchColumn::reallocate (uint8_t new_capacity) {
188- auto resize_capacity = std::max (new_capacity, capacity);
189- auto new_buckets = hwy::AllocateAligned<Bucket>(new_capacity);
190- std::memset (new_buckets.get () + capacity, 0 ,
191- (resize_capacity - capacity) * sizeof (Bucket));
192- std::memcpy (new_buckets.get (), aligned_buckets.get (),
193- resize_capacity * sizeof (Bucket));
194- aligned_buckets = std::move (new_buckets);
195- capacity = new_capacity;
196- }
197-
198- void ResizeableSketchColumn::clear () {
199- std::memset (aligned_buckets.get (), 0 , capacity * sizeof (Bucket));
200- deterministic_bucket = {0 , 0 };
201- }
202-
203- void ResizeableSketchColumn::serialize (std::ostream &binary_out) const {
204- binary_out.write ((char *) aligned_buckets.get (), capacity * sizeof (Bucket));
205- binary_out.write ((char *) &deterministic_bucket, sizeof (Bucket));
206- binary_out.write ((char *) &capacity, sizeof (uint8_t ));
207- binary_out.write ((char *) &col_idx, sizeof (uint8_t ));
208- }
209-
210- SketchSample<vec_t > ResizeableSketchColumn::sample () const {
211- if (Bucket_Boruvka::is_empty (deterministic_bucket)) {
212- return {0 , ZERO}; // the "first" bucket is deterministic so if all zero then no edges to return
213- }
214- for (size_t i = capacity; i > 0 ; --i) {
215- if (Bucket_Boruvka::is_good (aligned_buckets[i - 1 ], seed)) {
216- return {aligned_buckets[i - 1 ].alpha , GOOD};
217- }
218- }
219- return {0 , FAIL};
220- }
221-
222- void ResizeableSketchColumn::update (const vec_t update) {
223- vec_hash_t checksum = Bucket_Boruvka::get_index_hash (update, seed);
224- // TODO - remove magic number
225- // TODO - get_index_depth needs to be fixed. hashes need to be longer
226- // than 32 bits if we're not using the deep bucket buffer idea.
227- col_hash_t depth = Bucket_Boruvka::get_index_depth (update, seed, col_idx, 32 );
228- deterministic_bucket ^= {update, checksum};
229-
230- while (depth >= capacity) {
231- // first multple of 4 larger than or equal to depth
232- reallocate (capacity + 4 );
233- }
234- aligned_buckets[depth] ^= {update, checksum};
235- }
236-
237- void ResizeableSketchColumn::merge (ResizeableSketchColumn &other) {
238- deterministic_bucket ^= other.deterministic_bucket ;
239- if (other.capacity > capacity) {
240- reallocate (other.capacity );
241- }
242- // auto for_vector_merge = hwy::Rebind<Bucket, uint32_t(aligned_buckets.get(), capacity);
243- uint32_t *for_vector_merge = reinterpret_cast <uint32_t *>(aligned_buckets.get ());
244- uint32_t *other_for_vector_merge = reinterpret_cast <uint32_t *>(other.aligned_buckets .get ());
245- int num_vectors = other.capacity * (sizeof (Bucket) / sizeof (uint32_t ));
246- hwy::HWY_NAMESPACE::simd_xor (for_vector_merge, other_for_vector_merge, num_vectors);
247- }
248-
249- uint8_t ResizeableSketchColumn::get_depth () const {
250- // TODO - maybe rely on flag vectors
251- for (size_t i = capacity; i > 0 ; --i) {
252- if (!Bucket_Boruvka::is_empty (aligned_buckets[i - 1 ])) {
253- return i;
254- }
255- }
256- return 0 ;
257- }
258-
259-
260-
261- static_assert (SketchColumnConcept<FixedSizeSketchColumn, vec_t >,
262- " FixedSizeSketchColumn does not satisfy SketchColumnConcept" );
263-
264- static_assert (SketchColumnConcept<ResizeableSketchColumn, vec_t >,
265- " ResizeableSketchColumn does not satisfy SketchColumnConcept" );
0 commit comments