Skip to content

Commit 4beeed0

Browse files
committed
Add unrolled bit-packing kernels for longs (widths 1..63)
Branch-free unrolled fastpackwithoutmask/fastunpack for all widths, mirroring the 32-bit BitPacking. Measured on Graviton (aarch64), Corretto 21, median over widths 1..63: pack 1.93x, unpack 2.66x.
1 parent 5b473c7 commit 4beeed0

3 files changed

Lines changed: 12775 additions & 16 deletions

File tree

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
/**
2+
* This code is released under the
3+
* Apache License Version 2.0 http://www.apache.org/licenses/.
4+
*
5+
* (c) Daniel Lemire, http://lemire.me/en/
6+
*/
7+
package me.lemire.longcompression;
8+
9+
import java.text.DecimalFormat;
10+
import java.util.Random;
11+
12+
/**
13+
* Class used to benchmark the speed of long bit packing, comparing the
14+
* generic and unrolled {@link LongBitPacking} kernels. (For expert use.)
15+
*
16+
* @author Daniel Lemire
17+
*
18+
*/
19+
public class BenchmarkLongBitPacking {
20+
21+
private static void test(boolean verbose) {
22+
DecimalFormat dfspeed = new DecimalFormat("0");
23+
final int N = 64;
24+
final int times = 100000;
25+
Random r = new Random(0);
26+
long[] data = new long[N];
27+
long[] compressed = new long[N];
28+
long[] uncompressed = new long[N];
29+
for (int bit = 1; bit < 64; ++bit) {
30+
long mask = (1L << bit) - 1;
31+
long slowcomp = 0;
32+
long slowdecomp = 0;
33+
long fastcomp = 0;
34+
long fastdecomp = 0;
35+
for (int t = 0; t < times; ++t) {
36+
for (int k = 0; k < N; ++k) {
37+
data[k] = r.nextLong() & mask;
38+
}
39+
long time1 = System.nanoTime();
40+
LongBitPacking.slowpackwithoutmask(data, 0,
41+
compressed, 0, bit);
42+
long time2 = System.nanoTime();
43+
LongBitPacking.slowunpack(compressed, 0,
44+
uncompressed, 0, bit);
45+
long time3 = System.nanoTime();
46+
LongBitPacking.fastpackwithoutmask(data, 0,
47+
compressed, 0, bit);
48+
long time4 = System.nanoTime();
49+
LongBitPacking.fastunpack(compressed, 0,
50+
uncompressed, 0, bit);
51+
long time5 = System.nanoTime();
52+
slowcomp += time2 - time1;
53+
slowdecomp += time3 - time2;
54+
fastcomp += time4 - time3;
55+
fastdecomp += time5 - time4;
56+
}
57+
if (verbose)
58+
System.out.println("bit = "
59+
+ bit
60+
+ " slow comp. speed = "
61+
+ dfspeed.format(N * times * 1000.0
62+
/ (slowcomp))
63+
+ " slow decomp. speed = "
64+
+ dfspeed.format(N * times * 1000.0
65+
/ (slowdecomp))
66+
+ " unrolled comp. speed = "
67+
+ dfspeed.format(N * times * 1000.0
68+
/ (fastcomp))
69+
+ " unrolled decomp. speed = "
70+
+ dfspeed.format(N * times * 1000.0
71+
/ (fastdecomp)));
72+
}
73+
}
74+
75+
/**
76+
* Main method
77+
*
78+
* @param args
79+
* command-line arguments
80+
*/
81+
public static void main(String[] args) {
82+
System.out.println("Testing long packing (slow vs unrolled) ");
83+
test(false);
84+
test(true);
85+
}
86+
87+
}

0 commit comments

Comments
 (0)