diff --git a/README.md b/README.md index 97f072f..cc2ca15 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,20 @@ about 0.0015%. The type is `binary_fuse16_t` and you may use it with functions such as `binary_fuse16_allocate`, `binary_fuse16_populate`, `binary_fuse8_contain` and `binary_fuse8_free`. -You may serialize the data as follows: +For serialization, there is a choice between an unpacked and a packed format. + +The unpacked format is roughly of the same size as in-core data, but uses most +efficient memory copy operations. + +The packed format avoids storing zero bytes and relies on a bitset to locate them, so it +should be expected to be somewhat slower. The packed format might be smaller or larger. +It might be beneficial when using 16-bit binary fuse filters for users who need to preserve +every bytes, and who do not care about the computational overhead. +When in doubt, prefer the regular (unpacked) format. + +The two formats use slightly different APIs. + +You may serialize and deserialize in unpacked format as follows: ```C size_t buffer_size = binary_fuse16_serialization_bytes(&filter); @@ -65,9 +78,34 @@ You may serialize the data as follows: free(buffer); ``` -The serialization does not handle endianess: it is expected that you will serialize -and deserialize on the little endian systems. (Big endian systems are vanishingly rare.) +This should be the default. + +To serialize and deserialize in packed format, use the `_pack_bytes()`, +`_pack()` and `_unpack()` functions. The latter two have an additional `size_t` +argument for the buffer length. `_pack()` can be used with a buffer of arbitrary +size, it returns the used space if serialization fit into the buffer or 0 +otherwise. Note that the packed format will be slower and may not save space +although it is likely smaller on disk when using the 16-bit binary fuse filters. + +For example: + +```C + size_t buffer_size = binary_fuse16_pack_bytes(&filter); + char *buffer = (char*)malloc(buffer_size); + if (binary_fuse16_pack(&filter, buffer, buffer_size) != buffer_size) { + printf("pack failed\n"); + free(buffer); + return; + } + binary_fuse16_free(&filter); + if (! binary_fuse16_unpack(&filter, buffer, buffer_size)) { + printf("unpack failed\n"); + } + free(buffer); +``` +Either serialization does not handle endianess changes: it is expected that you +serialize and deserialize with equal byte order. ## C++ wrapper diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index a273c92..54d88fd 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -1,2 +1,5 @@ add_executable(bench bench.c) target_link_libraries(bench PUBLIC xor_singleheader) + +add_executable(spaceusage spaceusage.c) +target_link_libraries(spaceusage PUBLIC xor_singleheader) \ No newline at end of file diff --git a/benchmarks/spaceusage.c b/benchmarks/spaceusage.c new file mode 100644 index 0000000..4d6867f --- /dev/null +++ b/benchmarks/spaceusage.c @@ -0,0 +1,119 @@ +#include "binaryfusefilter.h" +#include "xorfilter.h" +#include +#include + +typedef struct { + size_t standard; + size_t pack; +} sizes; + +sizes fuse16(size_t n) { + binary_fuse16_t filter = {0}; + if (! binary_fuse16_allocate(n, &filter)) { + printf("allocation failed\n"); + return (sizes) {0, 0}; + } + uint64_t* big_set = malloc(n * sizeof(uint64_t)); + for(size_t i = 0; i < n; i++) { + big_set[i] = i; + } + bool is_ok = binary_fuse16_populate(big_set, n, &filter); + if(! is_ok ) { + printf("populating failed\n"); + } + free(big_set); + sizes s = { + .standard = binary_fuse16_serialization_bytes(&filter), + .pack = binary_fuse16_pack_bytes(&filter) + }; + binary_fuse16_free(&filter); + return s; +} + +sizes fuse8(size_t n) { + binary_fuse8_t filter = {0}; + if (! binary_fuse8_allocate(n, &filter)) { + printf("allocation failed\n"); + return (sizes) {0, 0}; + } + uint64_t* big_set = malloc(n * sizeof(uint64_t)); + for(size_t i = 0; i < n; i++) { + big_set[i] = i; + } + bool is_ok = binary_fuse8_populate(big_set, n, &filter); + if(! is_ok ) { + printf("populating failed\n"); + } + free(big_set); + sizes s = { + .standard = binary_fuse8_serialization_bytes(&filter), + .pack = binary_fuse8_pack_bytes(&filter) + }; + binary_fuse8_free(&filter); + return s; +} + +sizes xor16(size_t n) { + xor16_t filter = {0}; + if (! xor16_allocate(n, &filter)) { + printf("allocation failed\n"); + return (sizes) {0, 0}; + } + uint64_t* big_set = malloc(n * sizeof(uint64_t)); + for(size_t i = 0; i < n; i++) { + big_set[i] = i; + } + bool is_ok = xor16_populate(big_set, n, &filter); + if(! is_ok ) { + printf("populating failed\n"); + } + free(big_set); + sizes s = { + .standard = xor16_serialization_bytes(&filter), + .pack = xor16_pack_bytes(&filter) + }; + xor16_free(&filter); + return s; +} + +sizes xor8(size_t n) { + xor8_t filter = {0}; + if (! xor8_allocate(n, &filter)) { + printf("allocation failed\n"); + return (sizes) {0, 0}; + } + uint64_t* big_set = malloc(n * sizeof(uint64_t)); + for(size_t i = 0; i < n; i++) { + big_set[i] = i; + } + bool is_ok = xor8_populate(big_set, n, &filter); + if(! is_ok ) { + printf("populating failed\n"); + } + free(big_set); + sizes s = { + .standard = xor8_serialization_bytes(&filter), + .pack = xor8_pack_bytes(&filter) + }; + xor8_free(&filter); + + return s; +} + +int main() { + for (size_t n = 10; n <= 10000000; n *= 2) { + printf("%-10zu ", n); // Align number to 10 characters wide + sizes f16 = fuse16(n); + sizes f8 = fuse8(n); + sizes x16 = xor16(n); + sizes x8 = xor8(n); + + printf("fuse16: %5.2f %5.2f ", (double)f16.standard * 8.0 / n, (double)f16.pack * 8.0 / n); + printf("fuse8: %5.2f %5.2f ", (double)f8.standard * 8.0 / n, (double)f8.pack * 8.0 / n); + printf("xor16: %5.2f %5.2f ", (double)x16.standard * 8.0 / n, (double)x16.pack * 8.0 / n); + printf("xor8: %5.2f %5.2f ", (double)x8.standard * 8.0 / n, (double)x8.pack * 8.0 / n); + printf("\n"); + } + return EXIT_SUCCESS; +} diff --git a/include/binaryfusefilter.h b/include/binaryfusefilter.h index 7049f3f..14dbf4e 100644 --- a/include/binaryfusefilter.h +++ b/include/binaryfusefilter.h @@ -67,6 +67,7 @@ static inline uint64_t binary_fuse_rng_splitmix64(uint64_t *seed) { typedef struct binary_fuse8_s { uint64_t Seed; + uint32_t Size; uint32_t SegmentLength; uint32_t SegmentLengthMask; uint32_t SegmentCount; @@ -222,6 +223,7 @@ static inline double binary_fuse_calculate_size_factor(uint32_t arity, static inline bool binary_fuse8_allocate(uint32_t size, binary_fuse8_t *filter) { uint32_t arity = 3; + filter->Size = size; filter->SegmentLength = size == 0 ? 4 : binary_fuse_calculate_segment_length(arity, size); if (filter->SegmentLength > 262144) { filter->SegmentLength = 262144; @@ -258,6 +260,7 @@ static inline void binary_fuse8_free(binary_fuse8_t *filter) { free(filter->Fingerprints); filter->Fingerprints = NULL; filter->Seed = 0; + filter->Size = 0; filter->SegmentLength = 0; filter->SegmentLengthMask = 0; filter->SegmentCount = 0; @@ -459,6 +462,7 @@ static inline bool binary_fuse8_populate(uint64_t *keys, uint32_t size, typedef struct binary_fuse16_s { uint64_t Seed; + uint32_t Size; uint32_t SegmentLength; uint32_t SegmentLengthMask; uint32_t SegmentCount; @@ -512,6 +516,7 @@ static inline bool binary_fuse16_contain(uint64_t key, static inline bool binary_fuse16_allocate(uint32_t size, binary_fuse16_t *filter) { uint32_t arity = 3; + filter->Size = size; filter->SegmentLength = size == 0 ? 4 : binary_fuse_calculate_segment_length(arity, size); if (filter->SegmentLength > 262144) { filter->SegmentLength = 262144; @@ -548,6 +553,7 @@ static inline void binary_fuse16_free(binary_fuse16_t *filter) { free(filter->Fingerprints); filter->Fingerprints = NULL; filter->Seed = 0; + filter->Size = 0; filter->SegmentLength = 0; filter->SegmentLengthMask = 0; filter->SegmentCount = 0; @@ -858,4 +864,111 @@ static inline bool binary_fuse8_deserialize(binary_fuse8_t * filter, const char return true; } +// minimal bitfield implementation +#define XOR_bitf_w (sizeof(uint8_t) * 8) +#define XOR_bitf_sz(bits) (((bits) + XOR_bitf_w - 1) / XOR_bitf_w) +#define XOR_bitf_word(bit) (bit / XOR_bitf_w) +#define XOR_bitf_bit(bit) ((1U << (bit % XOR_bitf_w)) % 256) + +#define XOR_ser(buf, lim, src) do { \ + if ((buf) + sizeof src > (lim)) \ + return (0); \ + memcpy(buf, &src, sizeof src); \ + buf += sizeof src; \ +} while (0) + +#define XOR_deser(dst, buf, lim) do { \ + if ((buf) + sizeof dst > (lim)) \ + return (false); \ + memcpy(&dst, buf, sizeof dst); \ + buf += sizeof dst; \ +} while (0) + +// return required space for binary_fuse{8,16}_pack() +#define XOR_bytesf(fuse) \ +static inline size_t binary_ ## fuse ## _pack_bytes(const binary_ ## fuse ## _t *filter) \ +{ \ + size_t sz = 0; \ + sz += sizeof filter->Seed; \ + sz += sizeof filter->Size; \ + sz += XOR_bitf_sz(filter->ArrayLength); \ + for (size_t i = 0; i < filter->ArrayLength; i++) { \ + if (filter->Fingerprints[i] == 0) \ + continue; \ + sz += sizeof filter->Fingerprints[i]; \ + } \ + return (sz); \ +} + +// serialize as packed format, return size used or 0 for insufficient space +#define XOR_packf(fuse) \ +static inline size_t binary_ ## fuse ## _pack(const binary_ ## fuse ## _t *filter, char *buffer, size_t space) { \ + uint8_t *s = (uint8_t *)(void *)buffer; \ + uint8_t *buf = s, *e = buf + space; \ + \ + XOR_ser(buf, e, filter->Seed); \ + XOR_ser(buf, e, filter->Size); \ + size_t bsz = XOR_bitf_sz(filter->ArrayLength); \ + if (buf + bsz > e) \ + return (0); \ + uint8_t *bitf = buf; \ + memset(bitf, 0, bsz); \ + buf += bsz; \ + \ + for (size_t i = 0; i < filter->ArrayLength; i++) { \ + if (filter->Fingerprints[i] == 0) \ + continue; \ + bitf[XOR_bitf_word(i)] |= XOR_bitf_bit(i); \ + XOR_ser(buf, e, filter->Fingerprints[i]); \ + } \ + return ((size_t)(buf - s)); \ +} + +#define XOR_unpackf(fuse) \ +static inline bool binary_ ## fuse ## _unpack(binary_ ## fuse ## _t *filter, const char *buffer, size_t len) \ +{ \ + const uint8_t *s = (const uint8_t *)(const void *)buffer; \ + const uint8_t *buf = s, *e = buf + len; \ + bool r; \ + \ + uint64_t Seed; \ + uint32_t Size; \ + \ + memset(filter, 0, sizeof *filter); \ + XOR_deser(Seed, buf, e); \ + XOR_deser(Size, buf, e); \ + r = binary_ ## fuse ## _allocate(Size, filter); \ + if (! r) \ + return (r); \ + filter->Seed = Seed; \ + const uint8_t *bitf = buf; \ + buf += XOR_bitf_sz(filter->ArrayLength); \ + for (size_t i = 0; i < filter->ArrayLength; i++) { \ + if ((bitf[XOR_bitf_word(i)] & XOR_bitf_bit(i)) == 0) \ + continue; \ + XOR_deser(filter->Fingerprints[i], buf, e); \ + } \ + return (true); \ +} + +#define XOR_packers(fuse) \ +XOR_bytesf(fuse) \ +XOR_packf(fuse) \ +XOR_unpackf(fuse) \ + +XOR_packers(fuse8) +XOR_packers(fuse16) + +#undef XOR_packers +#undef XOR_bytesf +#undef XOR_packf +#undef XOR_unpackf + +#undef XOR_bitf_w +#undef XOR_bitf_sz +#undef XOR_bitf_word +#undef XOR_bitf_bit +#undef XOR_ser +#undef XOR_deser + #endif diff --git a/include/xorfilter.h b/include/xorfilter.h index d87ef65..a07d397 100644 --- a/include/xorfilter.h +++ b/include/xorfilter.h @@ -1349,5 +1349,109 @@ static inline bool xor8_deserialize(xor8_t * filter, const char *buffer) { return true; } +// minimal bitfield implementation +#define XOR_bitf_w (sizeof(uint8_t) * 8) +#define XOR_bitf_sz(bits) (((bits) + XOR_bitf_w - 1) / XOR_bitf_w) +#define XOR_bitf_word(bit) (bit / XOR_bitf_w) +#define XOR_bitf_bit(bit) ((1U << (bit % XOR_bitf_w)) % 256) + +#define XOR_ser(buf, lim, src) do { \ + if ((buf) + sizeof src > (lim)) \ + return (0); \ + memcpy(buf, &src, sizeof src); \ + buf += sizeof src; \ +} while (0) + +#define XOR_deser(dst, buf, lim) do { \ + if ((buf) + sizeof dst > (lim)) \ + return (false); \ + memcpy(&dst, buf, sizeof dst); \ + buf += sizeof dst; \ +} while (0) + +// return required space for binary_xor{8,16}_pack() +#define XOR_bytesf(xbits) \ +static inline size_t xor ## xbits ## _pack_bytes(const xor ## xbits ## _t *filter) \ +{ \ + size_t sz = 0; \ + size_t capacity = (size_t)(3 * filter->blockLength); \ + sz += sizeof filter->seed; \ + sz += sizeof filter->blockLength; \ + sz += XOR_bitf_sz(capacity); \ + for (size_t i = 0; i < capacity; i++) { \ + if (filter->fingerprints[i] == 0) \ + continue; \ + sz += sizeof filter->fingerprints[i]; \ + } \ + return (sz); \ +} + +// serialize as packed format, return size used or 0 for insufficient space +#define XOR_packf(xbits) \ +static inline size_t xor ## xbits ## _pack(const xor ## xbits ## _t *filter, char *buffer, size_t space) { \ + uint8_t *s = (uint8_t *)(void *)buffer; \ + uint8_t *buf = s, *e = buf + space; \ + size_t capacity = (size_t)(3 * filter->blockLength); \ + \ + XOR_ser(buf, e, filter->seed); \ + XOR_ser(buf, e, filter->blockLength); \ + size_t bsz = XOR_bitf_sz(capacity); \ + if (buf + bsz > e) \ + return (0); \ + uint8_t *bitf = buf; \ + memset(bitf, 0, bsz); \ + buf += bsz; \ + \ + for (size_t i = 0; i < capacity; i++) { \ + if (filter->fingerprints[i] == 0) \ + continue; \ + bitf[XOR_bitf_word(i)] |= XOR_bitf_bit(i); \ + XOR_ser(buf, e, filter->fingerprints[i]); \ + } \ + return ((size_t)(buf - s)); \ +} + +#define XOR_unpackf(xbits) \ +static inline bool xor ## xbits ## _unpack(xor ## xbits ## _t *filter, const char *buffer, size_t len) \ +{ \ + const uint8_t *s = (const uint8_t *)(const void *)buffer; \ + const uint8_t *buf = s, *e = buf + len; \ + \ + memset(filter, 0, sizeof *filter); \ + XOR_deser(filter->seed, buf, e); \ + XOR_deser(filter->blockLength, buf, e); \ + size_t capacity = (size_t)(3 * filter->blockLength); \ + filter->fingerprints = (uint ## xbits ## _t *)calloc(capacity, sizeof filter->fingerprints[0]); \ + if (filter->fingerprints == NULL) \ + return (false); \ + const uint8_t *bitf = buf; \ + buf += XOR_bitf_sz(capacity); \ + for (size_t i = 0; i < capacity; i++) { \ + if ((bitf[XOR_bitf_word(i)] & XOR_bitf_bit(i)) == 0) \ + continue; \ + XOR_deser(filter->fingerprints[i], buf, e); \ + } \ + return (true); \ +} + +#define XOR_packers(xbits) \ +XOR_bytesf(xbits) \ +XOR_packf(xbits) \ +XOR_unpackf(xbits) \ + +XOR_packers(8) +XOR_packers(16) + +#undef XOR_packers +#undef XOR_bytesf +#undef XOR_packf +#undef XOR_unpackf + +#undef XOR_bitf_w +#undef XOR_bitf_sz +#undef XOR_bitf_word +#undef XOR_bitf_bit +#undef XOR_ser +#undef XOR_deser #endif diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index e23be17..3000ca9 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -14,9 +14,9 @@ else() # *nix -Wall -Wextra -Wshadow -Wcast-qual -Wconversion -Wsign-conversion -Werror) if (NOT MINGW) # sanitizers are not supported under mingw - list(APPEND TEST_COMPILE_OPTIONS -fsanitize=address,undefined,leak) + list(APPEND TEST_COMPILE_OPTIONS -fsanitize=address,undefined) # sanitsizers need to be specified at link time as well - target_link_options(unit PRIVATE -fsanitize=address,leak,undefined) + target_link_options(unit PRIVATE -fsanitize=address,undefined) endif() endif() diff --git a/tests/unit.c b/tests/unit.c index f6dbe00..cd7ad0c 100644 --- a/tests/unit.c +++ b/tests/unit.c @@ -8,16 +8,24 @@ #define F1(t, a, rt, t1, p1) rt GFNAM(t, a)(t1 p1) { return FNAM(t, a)(p1); } #define F2(t, a, rt, t1, p1, t2, p2) rt GFNAM(t, a)(t1 p1, t2 p2) { return FNAM(t, a)(p1, p2); } #define F3(t, a, rt, t1, p1, t2, p2, t3, p3) rt GFNAM(t, a)(t1 p1, t2 p2, t3 p3) { return FNAM(t, a)(p1, p2, p3); } +// map 3-argument _gen to 2-argument, discarding last +#define F32(t, a, rt, t1, p1, t2, p2, t3, p3) rt GFNAM(t, a)(t1 p1, t2 p2, t3 p3) { (void)p3; return FNAM(t, a)(p1, p2); } +// void return, ignore return value +#define F3V(t, a, rt, t1, p1, t2, p2, t3, p3) rt GFNAM(t, a)(t1 p1, t2 p2, t3 p3) { (void)FNAM(t, a)(p1, p2, p3); } #define GEN_THUNKS(ftype) \ F2(ftype, allocate, bool, uint32_t, size, void*, filter) \ F1(ftype, free, void, void*, filter) \ F1(ftype, size_in_bytes, size_t, const void*, filter) \ F1(ftype, serialization_bytes, size_t, void*, filter) \ - F2(ftype, serialize, void, void*, filter, char*, buffer) \ - F2(ftype, deserialize, bool, void*, filter, const char*, buffer) \ + F32(ftype, serialize, void, void*, filter, char*, buffer, size_t, len) \ + F32(ftype, deserialize, bool, void*, filter, const char*, buffer, size_t, len) \ F3(ftype, populate, bool, uint64_t*, keys, uint32_t, size, void*, filter) \ - F2(ftype, contain, bool, uint64_t, key, const void*, filter) + F2(ftype, contain, bool, uint64_t, key, const void*, filter) \ + F1(ftype, pack_bytes, size_t, void*, filter) \ + F3V(ftype, pack, void, void*, filter, char*, buffer, size_t, len) \ + F3(ftype, unpack, bool, void*, filter, const char*, buffer, size_t, len) + GEN_THUNKS(xor8) GEN_THUNKS(xor16) @@ -32,8 +40,8 @@ bool test(size_t size, size_t repeated_size, void *filter, void (*free_filter)(void *filter), size_t (*size_in_bytes)(const void *filter), size_t (*serialization_bytes)(void *filter), - void (*serialize)(void *filter, char *buffer), - bool (*deserialize)(void *filter, const char *buffer), + void (*serialize)(void *filter, char *buffer, size_t len), + bool (*deserialize)(void *filter, const char *buffer, size_t len), bool (*populate)(uint64_t *keys, uint32_t size, void *filter), bool (*contain)(uint64_t key, const void *filter)) { allocate((uint32_t)size, filter); @@ -56,9 +64,9 @@ bool test(size_t size, size_t repeated_size, void *filter, size_t buffer_size = serialization_bytes(filter); char *buffer = (char*)malloc(buffer_size); - serialize(filter, buffer); + serialize(filter, buffer, buffer_size); free_filter(filter); - deserialize(filter, buffer); + deserialize(filter, buffer, buffer_size); free(buffer); for (size_t i = 0; i < size; i++) { if (!(contain)(big_set[i], filter)) { @@ -79,10 +87,14 @@ bool test(size_t size, size_t repeated_size, void *filter, } double fpp = (double)random_matches * 1.0 / (double)trials; printf(" fpp %3.5f (estimated) \n", fpp); - double bpe = (double)size_in_bytes(filter) * 8.0 / (double)size; - printf(" bits per entry %3.2f\n", bpe); - printf(" bits per entry %3.2f (theoretical lower bound)\n", - log(fpp)/log(2)); - printf(" efficiency ratio %3.3f \n", bpe /(- log(fpp)/log(2))); + size_t core_size = size_in_bytes(filter); + printf(" size in-core %zu wire %zu\n", core_size, buffer_size); + double cbpe = (double)core_size * 8.0 / (double)size; + double wbpe = (double)buffer_size * 8.0 / (double)size; + printf(" bits per entry in-core %3.2f wire %3.2f\n", cbpe, wbpe); + double bound = - log(fpp)/log(2); + printf(" bits per entry %3.2f (theoretical lower bound)\n", bound); + printf(" efficiency ratio in-core %3.3f wire %3.3f\n", cbpe/bound, wbpe/bound); free_filter(filter); free(big_set); return true; @@ -132,6 +144,35 @@ bool testxor16(size_t size) { } +bool testxor8pack(size_t size) { + printf("testing xor8 pack/unpack\n"); + xor8_t filter; + return test(size, 0, &filter, + xor8_allocate_gen, + xor8_free_gen, + xor8_size_in_bytes_gen, + xor8_pack_bytes_gen, + xor8_pack_gen, + xor8_unpack_gen, + xor8_populate_gen, + xor8_contain_gen); +} + +bool testxor16pack(size_t size) { + printf("testing xor16 pack/unpack\n"); + xor8_t filter; + return test(size, 0, &filter, + xor16_allocate_gen, + xor16_free_gen, + xor16_size_in_bytes_gen, + xor16_pack_bytes_gen, + xor16_pack_gen, + xor16_unpack_gen, + xor16_populate_gen, + xor16_contain_gen); +} + + bool testbufferedxor16(size_t size) { printf("testing buffered xor16\n"); @@ -161,8 +202,6 @@ bool testbinaryfuse8(size_t size, size_t repeated_size) { binary_fuse8_contain_gen); } - - bool testbinaryfuse16(size_t size, size_t repeated_size) { printf("testing binary fuse16 with size %zu and %zu duplicates\n", size, repeated_size); binary_fuse16_t filter; @@ -177,6 +216,35 @@ bool testbinaryfuse16(size_t size, size_t repeated_size) { binary_fuse16_contain_gen); } + +bool testbinaryfuse8pack(size_t size, size_t repeated_size) { + printf("testing binary fuse8 pack/unpack with size %zu and %zu duplicates\n", size, repeated_size); + binary_fuse8_t filter; + return test(size, repeated_size, &filter, + binary_fuse8_allocate_gen, + binary_fuse8_free_gen, + binary_fuse8_size_in_bytes_gen, + binary_fuse8_pack_bytes_gen, + binary_fuse8_pack_gen, + binary_fuse8_unpack_gen, + binary_fuse8_populate_gen, + binary_fuse8_contain_gen); +} + +bool testbinaryfuse16pack(size_t size, size_t repeated_size) { + printf("testing binary fuse16 pack/unpack with size %zu and %zu duplicates\n", size, repeated_size); + binary_fuse16_t filter; + return test(size, repeated_size, &filter, + binary_fuse16_allocate_gen, + binary_fuse16_free_gen, + binary_fuse16_size_in_bytes_gen, + binary_fuse16_pack_bytes_gen, + binary_fuse16_pack_gen, + binary_fuse16_unpack_gen, + binary_fuse16_populate_gen, + binary_fuse16_contain_gen); +} + void failure_rate_binary_fuse16() { printf("testing binary fuse16 for failure rate\n"); // we construct many 5000-long input cases and check the probability of failure. @@ -200,7 +268,34 @@ void failure_rate_binary_fuse16() { free(big_set); } +// test code from the example in the README +void readme_pack() { + binary_fuse16_t filter = {0}; + if (! binary_fuse16_allocate(64, &filter)) { + printf("allocation failed\n"); + return; + } + + // begin example snippet + size_t buffer_size = binary_fuse16_pack_bytes(&filter); + char *buffer = (char*)malloc(buffer_size); + if (binary_fuse16_pack(&filter, buffer, buffer_size) != buffer_size) { + printf("pack failed\n"); + free(buffer); + return; + } + binary_fuse16_free(&filter); + if (! binary_fuse16_unpack(&filter, buffer, buffer_size)) { + printf("unpack failed\n"); + } + free(buffer); + // end example snippet + + binary_fuse16_free(&filter); +} + int main() { + readme_pack(); failure_rate_binary_fuse16(); for(size_t size = 1000; size <= 1000000; size *= 300) { printf("== size = %zu \n", size); @@ -208,6 +303,10 @@ int main() { printf("\n"); if(!testbinaryfuse16(size, 0)) { abort(); } printf("\n"); + if(!testbinaryfuse8pack(size, 0)) { abort(); } + printf("\n"); + if(!testbinaryfuse16pack(size, 0)) { abort(); } + printf("\n"); if(!testbinaryfuse8(size, 10)) { abort(); } printf("\n"); if(!testbinaryfuse16(size, 10)) { abort(); } @@ -220,6 +319,10 @@ int main() { printf("\n"); if(!testxor16(size)) { abort(); } printf("\n"); + if(!testxor8pack(size)) { abort(); } + printf("\n"); + if(!testxor16pack(size)) { abort(); } + printf("\n"); printf("======\n"); }