Skip to content

Commit 8752c6a

Browse files
committed
update move_to_17_using_FFM with 6.1.X
1 parent d0575f7 commit 8752c6a

28 files changed

+554
-496
lines changed

src/main/java/org/apache/datasketches/frequencies/PreambleUtil.java

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -32,22 +32,16 @@
3232
* This class defines the preamble data structure and provides basic utilities for some of the key
3333
* fields.
3434
*
35-
* <p>
36-
* The intent of the design of this class was to isolate the detailed knowledge of the bit and byte
35+
* <p>The intent of the design of this class was to isolate the detailed knowledge of the bit and byte
3736
* layout of the serialized form of the sketches derived from the Sketch class into one place. This
3837
* allows the possibility of the introduction of different serialization schemes with minimal impact
39-
* on the rest of the library.
40-
* </p>
38+
* on the rest of the library.</p>
4139
*
42-
* <p>
43-
* MAP: Low significance bytes of this <i>long</i> data structure are on the right. However, the
40+
* <p>MAP: Low significance bytes of this <i>long</i> data structure are on the right. However, the
4441
* multi-byte integers (<i>int</i> and <i>long</i>) are stored in native byte order. The <i>byte</i>
45-
* values are treated as unsigned.
46-
* </p>
42+
* values are treated as unsigned.</p>
4743
*
48-
* <p>
49-
* An empty FrequentItems only requires 8 bytes. All others require 32 bytes of preamble.
50-
* </p>
44+
* <p>An empty FrequentItems only requires 8 bytes. All others require 32 bytes of preamble.</p>
5145
*
5246
* <pre>
5347
* * Long || Start Byte Adr:

src/main/java/org/apache/datasketches/quantiles/DoublesByteArrayImpl.java

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ static byte[] toByteArray(final DoublesSketch sketch, final boolean ordered, fin
5858
| (ordered ? ORDERED_FLAG_MASK : 0)
5959
| (compact ? (COMPACT_FLAG_MASK | READ_ONLY_FLAG_MASK) : 0);
6060

61-
if (empty && !sketch.hasMemory()) { //empty & has Memory
61+
if (empty && !sketch.hasMemory()) { //empty & !has Memory
6262
final byte[] outByteArr = new byte[Long.BYTES];
6363
final WritableMemory memOut = WritableMemory.writableWrap(outByteArr);
6464
final int preLongs = 1;
@@ -79,31 +79,31 @@ static byte[] toByteArray(final DoublesSketch sketch, final boolean ordered, fin
7979
*/
8080
private static byte[] convertToByteArray(final DoublesSketch sketch, final int flags,
8181
final boolean ordered, final boolean compact) {
82-
final int preLongs = 2;
83-
final int extra = 2; // extra space for min and max quantiles
84-
final int prePlusExtraBytes = (preLongs + extra) << 3;
85-
final int k = sketch.getK();
86-
final long n = sketch.getN();
87-
88-
// If not-compact, have accessor always report full levels. Then use level size to determine
89-
// whether to copy data out.
90-
final DoublesSketchAccessor dsa = DoublesSketchAccessor.wrap(sketch, !compact);
82+
final int preLongs = sketch.isEmpty() ? 1 : 2;
9183

9284
final int outBytes = (compact ? sketch.getCurrentCompactSerializedSizeBytes()
9385
: sketch.getCurrentUpdatableSerializedSizeBytes());
9486

9587
final byte[] outByteArr = new byte[outBytes];
9688
final WritableMemory memOut = WritableMemory.writableWrap(outByteArr);
9789

98-
//insert preamble-0, N, min, max
90+
//insert pre0
91+
final int k = sketch.getK();
9992
insertPre0(memOut, preLongs, flags, k);
10093
if (sketch.isEmpty()) { return outByteArr; }
10194

95+
//insert N, min, max
96+
final long n = sketch.getN();
10297
insertN(memOut, n);
10398
insertMinDouble(memOut, sketch.isEmpty() ? Double.NaN : sketch.getMinItem());
10499
insertMaxDouble(memOut, sketch.isEmpty() ? Double.NaN : sketch.getMaxItem());
105100

106-
long memOffsetBytes = prePlusExtraBytes;
101+
// If not-compact, have accessor always report full levels. Then use level size to determine
102+
// whether to copy data out.
103+
final DoublesSketchAccessor dsa = DoublesSketchAccessor.wrap(sketch, !compact);
104+
105+
final int minAndMax = 2; // extra space for min and max quantiles
106+
long memOffsetBytes = (preLongs + minAndMax) << 3;
107107

108108
// might need to sort base buffer but don't want to change input sketch
109109
final int bbCnt = computeBaseBufferItems(k, n);

src/main/java/org/apache/datasketches/tdigest/TDigestDouble.java

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import org.apache.datasketches.memory.WritableBuffer;
3333
import org.apache.datasketches.memory.WritableMemory;
3434
import org.apache.datasketches.quantilescommon.QuantilesAPI;
35+
import org.apache.datasketches.quantilescommon.QuantilesUtil;
3536

3637
/**
3738
* t-Digest for estimating quantiles and ranks.
@@ -125,7 +126,7 @@ public void merge(final TDigestDouble other) {
125126
/**
126127
* Process buffered values and merge centroids if needed
127128
*/
128-
public void compress() {
129+
private void compress() {
129130
if (numBuffered_ == 0) { return; }
130131
final int num = numBuffered_ + numCentroids_;
131132
final double[] values = new double[num];
@@ -277,6 +278,51 @@ public double getQuantile(final double rank) {
277278
return weightedAverage(centroidWeights_[numCentroids_ - 1], w1, maxValue_, w2);
278279
}
279280

281+
/**
282+
* Returns an approximation to the Probability Mass Function (PMF) of the input stream
283+
* given a set of split points.
284+
*
285+
* @param splitPoints an array of <i>m</i> unique, monotonically increasing values
286+
* that divide the input domain into <i>m+1</i> consecutive disjoint intervals (bins).
287+
*
288+
* @return an array of m+1 doubles each of which is an approximation
289+
* to the fraction of the input stream values (the mass) that fall into one of those intervals.
290+
* @throws SketchesStateException if sketch is empty.
291+
*/
292+
public double[] getPMF(final double[] splitPoints) {
293+
final double[] buckets = getCDF(splitPoints);
294+
for (int i = buckets.length; i-- > 1; ) {
295+
buckets[i] -= buckets[i - 1];
296+
}
297+
return buckets;
298+
}
299+
300+
/**
301+
* Returns an approximation to the Cumulative Distribution Function (CDF), which is the
302+
* cumulative analog of the PMF, of the input stream given a set of split points.
303+
*
304+
* @param splitPoints an array of <i>m</i> unique, monotonically increasing values
305+
* that divide the input domain into <i>m+1</i> consecutive disjoint intervals.
306+
*
307+
* @return an array of m+1 doubles, which are a consecutive approximation to the CDF
308+
* of the input stream given the splitPoints. The value at array position j of the returned
309+
* CDF array is the sum of the returned values in positions 0 through j of the returned PMF
310+
* array. This can be viewed as array of ranks of the given split points plus one more value
311+
* that is always 1.
312+
* @throws SketchesStateException if sketch is empty.
313+
*/
314+
public double[] getCDF(final double[] splitPoints) {
315+
if (isEmpty()) { throw new SketchesStateException(QuantilesAPI.EMPTY_MSG); }
316+
QuantilesUtil.checkDoublesSplitPointsOrder(splitPoints);
317+
final int len = splitPoints.length + 1;
318+
final double[] ranks = new double[len];
319+
for (int i = 0; i < len - 1; i++) {
320+
ranks[i] = getRank(splitPoints[i]);
321+
}
322+
ranks[len - 1] = 1.0;
323+
return ranks;
324+
}
325+
280326
/**
281327
* Computes size needed to serialize the current state.
282328
* @return size in bytes needed to serialize this tdigest

src/test/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ public void basicWritableWrapTest() {
139139
@Test
140140
public void countWritableWrappedBitsWhenDirty() {
141141
// like basicOperationTest but with setBit which does
142-
// not neecssarily track numBitsSet_
142+
// not necessarily track numBitsSet_
143143
final HeapBitArray hba = new HeapBitArray(128);
144144
assertFalse(hba.getAndSetBit(1));
145145
assertFalse(hba.getAndSetBit(2));

src/test/java/org/apache/datasketches/hll/DirectCouponListTest.java

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131

3232
import org.apache.datasketches.memory.DefaultMemoryRequestServer;
3333
import org.apache.datasketches.memory.Memory;
34-
//import org.apache.datasketches.memory.WritableHandle;
3534
import org.apache.datasketches.memory.WritableMemory;
3635

3736
/**
@@ -74,8 +73,6 @@ private static void promotions(int lgConfigK, int n, TgtHllType tgtHllType, bool
7473
byte[] barr1;
7574
WritableMemory wmem;
7675
try (ResourceScope scope = (wmem = WritableMemory.allocateDirect(bytes)).scope()) {
77-
//byte[] byteArr = new byte[bytes];
78-
//WritableMemory wmem = WritableMemory.wrap(byteArr);
7976
hllSketch = new HllSketch(lgConfigK, tgtHllType, wmem);
8077
assertTrue(hllSketch.isEmpty());
8178

src/test/java/org/apache/datasketches/hll/PreambleUtilTest.java

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,6 @@ public void checkCorruptMemoryInput() {
109109
HllSketch sk = new HllSketch(12);
110110
byte[] memObj = sk.toCompactByteArray();
111111
WritableMemory wmem = WritableMemory.writableWrap(memObj);
112-
//long memAdd = wmem.getCumulativeOffset(0);
113112
HllSketch bad;
114113

115114
//checkFamily
@@ -148,7 +147,6 @@ public void checkCorruptMemoryInput() {
148147
for (int i = 1; i <= 15; i++) { sk.update(i); }
149148
memObj = sk.toCompactByteArray();
150149
wmem = WritableMemory.writableWrap(memObj);
151-
//memAdd = wmem.getCumulativeOffset(0);
152150

153151
//check wrong PreInts and SET
154152
try {
@@ -162,7 +160,6 @@ public void checkCorruptMemoryInput() {
162160
for (int i = 15; i <= 1000; i++) { sk.update(i); }
163161
memObj = sk.toCompactByteArray();
164162
wmem = WritableMemory.writableWrap(memObj);
165-
//memAdd = wmem.getCumulativeOffset(0);
166163

167164
//check wrong PreInts and HLL
168165
try {
@@ -179,7 +176,6 @@ public void checkExtractFlags() {
179176
int bytes = HllSketch.getMaxUpdatableSerializationBytes(4, TgtHllType.HLL_4);
180177
WritableMemory wmem = WritableMemory.allocate(bytes);
181178
Object memObj = wmem.getArray();
182-
//long memAdd = wmem.getCumulativeOffset(0L);
183179
HllSketch sk = new HllSketch(4, TgtHllType.HLL_4, wmem);
184180
int flags = extractFlags(wmem);
185181
assertEquals(flags, EMPTY_FLAG_MASK);

src/test/java/org/apache/datasketches/kll/KllCrossLanguageTest.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,16 @@ public void generateKllFloatsSketchBinaries() throws IOException {
6767
}
6868
}
6969

70+
@Test(groups = {GENERATE_JAVA_FILES})
71+
public void generateKllLongsSketchBinaries() throws IOException {
72+
final int[] nArr = {0, 1, 10, 100, 1_000, 10_000, 100_000, 1_000_000};
73+
for (int n: nArr) {
74+
final KllLongsSketch sk = KllLongsSketch.newHeapInstance();
75+
for (int i = 1; i <= n; i++) { sk.update(i); }
76+
Files.newOutputStream(javaPath.resolve("kll_long_n" + n + "_java.sk")).write(sk.toByteArray());
77+
}
78+
}
79+
7080
@Test(groups = {GENERATE_JAVA_FILES})
7181
public void generateKllItemsSketchBinaries() throws IOException {
7282
final int[] nArr = {0, 1, 10, 100, 1_000, 10_000, 100_000, 1_000_000};

src/test/java/org/apache/datasketches/kll/KllDirectCompactDoublesSketchTest.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -110,13 +110,13 @@ public void checkDirectCompactGetDoubleItemsArray() {
110110

111111
KllDoublesSketch sk2 = KllDoublesSketch.wrap(Memory.wrap(sk.toByteArray()));
112112
double[] itemsArr = sk2.getDoubleItemsArray();
113-
for (int i = 0; i < 20; i++) { assertEquals(itemsArr[i], 0F); }
113+
for (int i = 0; i < 20; i++) { assertEquals(itemsArr[i], 0.0); }
114114

115115
sk.update(1);
116116
sk2 = KllDoublesSketch.wrap(Memory.wrap(sk.toByteArray()));
117117
itemsArr = sk2.getDoubleItemsArray();
118-
for (int i = 0; i < 19; i++) { assertEquals(itemsArr[i], 0F); }
119-
assertEquals(itemsArr[19], 1F);
118+
for (int i = 0; i < 19; i++) { assertEquals(itemsArr[i], 0.0); }
119+
assertEquals(itemsArr[19], 1.0);
120120

121121
for (int i = 2; i <= 21; i++) { sk.update(i); }
122122
sk2 = KllDoublesSketch.wrap(Memory.wrap(sk.toByteArray()));
@@ -169,12 +169,12 @@ public void checkMinAndMax() {
169169
try { sk2.getMaxItem(); fail(); } catch (SketchesArgumentException e) {}
170170
sk.update(1);
171171
sk2 = KllDoublesSketch.wrap(Memory.wrap(sk.toByteArray()));
172-
assertEquals(sk2.getMaxItem(),1.0F);
173-
assertEquals(sk2.getMinItem(),1.0F);
172+
assertEquals(sk2.getMaxItem(),1.0);
173+
assertEquals(sk2.getMinItem(),1.0);
174174
for (int i = 2; i <= 21; i++) { sk.update(i); }
175175
sk2 = KllDoublesSketch.wrap(Memory.wrap(sk.toByteArray()));
176-
assertEquals(sk2.getMaxItem(),21.0F);
177-
assertEquals(sk2.getMinItem(),1.0F);
176+
assertEquals(sk2.getMaxItem(),21.0);
177+
assertEquals(sk2.getMinItem(),1.0);
178178
}
179179

180180
@Test

src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchIteratorTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ public void oneItemSketch() {
4141
sketch.update(0);
4242
QuantilesDoublesSketchIterator it = sketch.iterator();
4343
Assert.assertTrue(it.next());
44-
Assert.assertEquals(it.getQuantile(), 0f);
44+
Assert.assertEquals(it.getQuantile(), 0);
4545
Assert.assertEquals(it.getWeight(), 1);
4646
Assert.assertFalse(it.next());
4747
}

src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -189,11 +189,11 @@ public void mergeLowerK() {
189189
sketch2.update(2 * n - i - 1);
190190
}
191191

192-
assertEquals(sketch1.getMinItem(), 0.0f);
193-
assertEquals(sketch1.getMaxItem(), n - 1f);
192+
assertEquals(sketch1.getMinItem(), 0.0);
193+
assertEquals(sketch1.getMaxItem(), n - 1.0);
194194

195195
assertEquals(sketch2.getMinItem(), n);
196-
assertEquals(sketch2.getMaxItem(), 2f * n - 1f);
196+
assertEquals(sketch2.getMaxItem(), 2.0 * n - 1.0);
197197

198198
assertTrue(sketch1.getNormalizedRankError(false) < sketch2.getNormalizedRankError(false));
199199
assertTrue(sketch1.getNormalizedRankError(true) < sketch2.getNormalizedRankError(true));
@@ -613,7 +613,7 @@ public void checkWritableWrapOfCompactForm() {
613613
public void checkReadOnlyExceptions() {
614614
int k = 20;
615615
double[] dblArr = new double[0];
616-
double dblV = 1.0f;
616+
double dblV = 1.0;
617617
int idx = 1;
618618
boolean bool = true;
619619
KllDoublesSketch sk = KllDoublesSketch.newHeapInstance(k);

0 commit comments

Comments
 (0)