A Bloom filter is a data structure that can be used for probabilistic - * set membership.
+ * A Bloom filter is a data structure that can be used for probabilistic + * set membership. * *When querying a Bloom filter, there are no false positives. Specifically: * When querying an item that has already been inserted to the filter, the filter will diff --git a/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilterBuilder.java b/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilterBuilder.java index f865a3350..ee17a9918 100644 --- a/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilterBuilder.java +++ b/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilterBuilder.java @@ -25,8 +25,8 @@ import org.apache.datasketches.memory.WritableMemory; /** - *
This class provides methods to help estimate the correct parameters when - * creating a Bloom filter, and methods to create the filter using those values.
+ * This class provides methods to help estimate the correct parameters when + * creating a Bloom filter, and methods to create the filter using those values. * * * ** MAP: Low significance bytes of this long data structure are on the right. However, the diff --git a/src/main/java/org/apache/datasketches/hash/MurmurHash3.java b/src/main/java/org/apache/datasketches/hash/MurmurHash3.java index 9fb2ab948..a708e0077 100644 --- a/src/main/java/org/apache/datasketches/hash/MurmurHash3.java +++ b/src/main/java/org/apache/datasketches/hash/MurmurHash3.java @@ -29,10 +29,8 @@ import org.apache.datasketches.memory.Memory; /** - *
* The MurmurHash3 is a fast, non-cryptographic, 128-bit hash function that has * excellent avalanche and 2-way bit independence properties. - *
* ** Austin Appleby's C++ diff --git a/src/main/java/org/apache/datasketches/hash/package-info.java b/src/main/java/org/apache/datasketches/hash/package-info.java index 2d97afeb6..5744b2776 100644 --- a/src/main/java/org/apache/datasketches/hash/package-info.java +++ b/src/main/java/org/apache/datasketches/hash/package-info.java @@ -18,12 +18,11 @@ */ /** - *
The hash package contains a high-performing and extended Java implementations + * The hash package contains a high-performing and extended Java implementations * of Austin Appleby's 128-bit MurmurHash3 hash function originally coded in C. * This core MurmurHash3.java class is used throughout many of the sketch classes for consistency * and as long as the user specifies the same seed will result in coordinated hash operations. * This package also contains an adaptor class that extends the basic class with more functions * commonly associated with hashing. - *
*/ package org.apache.datasketches.hash; diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java b/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java index 67035b45f..acbecdf07 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java @@ -312,6 +312,7 @@ private static void randomlyHalveUpDoubles(final double[] buf, final int start, /** * Compression algorithm used to merge higher levels. + * *Here is what we do for each level:
*The parameter k will not change.
*/ @Override diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java b/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java index 50cadeb3e..69045f78c 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java @@ -312,6 +312,7 @@ private static void randomlyHalveUpFloats(final float[] buf, final int start, fi /** * Compression algorithm used to merge higher levels. + * *Here is what we do for each level:
*The parameter k will not change.
*/ @Override diff --git a/src/main/java/org/apache/datasketches/kll/KllItemsHelper.java b/src/main/java/org/apache/datasketches/kll/KllItemsHelper.java index 00b8c7d35..dd309ba3a 100644 --- a/src/main/java/org/apache/datasketches/kll/KllItemsHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllItemsHelper.java @@ -346,6 +346,7 @@ staticHere is what we do for each level:
*Here is what we do for each level:
*The parameter k will not change.
*/ @Override diff --git a/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java b/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java index 03f84116a..f75d3dfe5 100644 --- a/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java +++ b/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java @@ -506,6 +506,7 @@ public QuantilesDoublesSketchIterator iterator() { /** * {@inheritDoc} + * *The parameter k will not change.
*/ @Override diff --git a/src/main/java/org/apache/datasketches/quantiles/package-info.java b/src/main/java/org/apache/datasketches/quantiles/package-info.java index c44e2e316..8767757b7 100644 --- a/src/main/java/org/apache/datasketches/quantiles/package-info.java +++ b/src/main/java/org/apache/datasketches/quantiles/package-info.java @@ -18,9 +18,8 @@ */ /** - *The quantiles package contains stochastic streaming algorithms that enable single-pass + * The quantiles package contains stochastic streaming algorithms that enable single-pass * analysis of the distribution of a stream of quantiles. - *
* * @see org.apache.datasketches.quantiles.DoublesSketch * @see org.apache.datasketches.quantiles.ItemsSketch diff --git a/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedView.java index 98616661f..1427f6279 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedView.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedView.java @@ -38,7 +38,7 @@ public interface DoublesSortedView extends SortedView { * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 overlapping intervals. - * + **The start of each interval is below the lowest item retained by the sketch * corresponding to a zero rank or zero probability, and the end of the interval * is the rank or cumulative probability corresponding to the split point.
@@ -55,7 +55,7 @@ public interface DoublesSortedView extends SortedView { *
It is not recommended to include either the minimum or maximum items of the input stream.
- * + * * @param searchCrit the desired search criteria. * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. @@ -100,7 +100,7 @@ default double[] getCDF(double[] splitPoints, QuantileSearchCriteria searchCrit) * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 consecutive, non-overlapping intervals. - * + **Each interval except for the end intervals starts with a split point and ends with the next split * point in sequence.
* @@ -124,7 +124,7 @@ default double[] getCDF(double[] splitPoints, QuantileSearchCriteria searchCrit) *
It is not recommended to include either the minimum or maximum items of the input stream.
- * + * * @param searchCrit the desired search criteria. * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. diff --git a/src/main/java/org/apache/datasketches/quantilescommon/FloatsSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/FloatsSortedView.java index 0a0c54b5a..eec699d94 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/FloatsSortedView.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/FloatsSortedView.java @@ -38,7 +38,7 @@ public interface FloatsSortedView extends SortedView { * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 overlapping intervals. - * + **The start of each interval is below the lowest item retained by the sketch * corresponding to a zero rank or zero probability, and the end of the interval * is the rank or cumulative probability corresponding to the split point.
@@ -55,7 +55,7 @@ public interface FloatsSortedView extends SortedView { *
It is not recommended to include either the minimum or maximum items of the input stream.
- * + * * @param searchCrit the desired search criteria. * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. @@ -100,7 +100,7 @@ default double[] getCDF(float[] splitPoints, QuantileSearchCriteria searchCrit) * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 consecutive, non-overlapping intervals. - * + **Each interval except for the end intervals starts with a split point and ends with the next split * point in sequence.
* @@ -124,7 +124,7 @@ default double[] getCDF(float[] splitPoints, QuantileSearchCriteria searchCrit) *
It is not recommended to include either the minimum or maximum items of the input stream.
- * + * * @param searchCrit the desired search criteria. * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. diff --git a/src/main/java/org/apache/datasketches/quantilescommon/GenericInequalitySearch.java b/src/main/java/org/apache/datasketches/quantilescommon/GenericInequalitySearch.java index e650fef9b..7b9d6d665 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/GenericInequalitySearch.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/GenericInequalitySearch.java @@ -41,7 +41,7 @@ *Given a sorted array of values arr[] and a search key value v, the algorithms for * the searching criteria are given with each enum criterion.
* - * @see + * @see * Sketching Quantiles and Ranks Tutorial * @author Lee Rhodes */ diff --git a/src/main/java/org/apache/datasketches/quantilescommon/GenericSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/GenericSortedView.java index 0d27ce78c..f0dc81151 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/GenericSortedView.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/GenericSortedView.java @@ -47,7 +47,7 @@ public interface GenericSortedView** @param searchCrit the desired search criteria. * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. @@ -116,7 +116,7 @@ default double[] getCDF(final T[] splitPoints, final QuantileSearchCriteria sear * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 consecutive, non-overlapping intervals. - * + *The start of each interval is below the lowest item retained by the sketch * corresponding to a zero rank or zero probability, and the end of the interval * is the rank or cumulative probability corresponding to the split point.
@@ -64,7 +64,7 @@ public interface GenericSortedViewextends PartitioningFeature , SketchPar * * * It is not recommended to include either the minimum or maximum items of the input stream.
- * + *
** @param searchCrit the desired search criteria. * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. diff --git a/src/main/java/org/apache/datasketches/quantilescommon/InequalitySearch.java b/src/main/java/org/apache/datasketches/quantilescommon/InequalitySearch.java index 51b013573..21af2b531 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/InequalitySearch.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/InequalitySearch.java @@ -40,7 +40,7 @@ *Each interval except for the end intervals starts with a split point and ends with the next split * point in sequence.
* @@ -140,7 +140,7 @@ default double[] getCDF(final T[] splitPoints, final QuantileSearchCriteria sear * * *It is not recommended to include either the minimum or maximum items of the input stream.
- * + *
Given a sorted array of values arr[] and a search key value v, the algorithms for * the searching criteria are given with each enum criterion.
* - * @see + * @see * Sketching Quantiles and Ranks Tutorial * @author Lee Rhodes */ diff --git a/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedView.java index 4823edd1d..e7e3521c7 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedView.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedView.java @@ -38,7 +38,7 @@ public interface LongsSortedView extends SortedView { * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 overlapping intervals. - * + *** @param searchCrit the desired search criteria. * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. @@ -100,7 +100,7 @@ default double[] getCDF(long[] splitPoints, QuantileSearchCriteria searchCrit) { * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 consecutive, non-overlapping intervals. - * + *The start of each interval is below the lowest item retained by the sketch * corresponding to a zero rank or zero probability, and the end of the interval * is the rank or cumulative probability corresponding to the split point.
@@ -55,7 +55,7 @@ public interface LongsSortedView extends SortedView { * * *It is not recommended to include either the minimum or maximum items of the input stream.
- * + *
** @param searchCrit the desired search criteria. * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantileSearchCriteria.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantileSearchCriteria.java index 8ea3c3415..e7b9e6ef6 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantileSearchCriteria.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantileSearchCriteria.java @@ -22,7 +22,7 @@ /** * These search criteria are used by the KLL, REQ and Classic Quantiles sketches in the DataSketches library. * - * @see + * @see * Sketching Quantiles and Ranks Tutorial * * @author Lee Rhodes diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesAPI.java index b70843bb4..a7af4cf77 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesAPI.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesAPI.java @@ -20,12 +20,12 @@ package org.apache.datasketches.quantilescommon; /** - *Each interval except for the end intervals starts with a split point and ends with the next split * point in sequence.
* @@ -124,7 +124,7 @@ default double[] getCDF(long[] splitPoints, QuantileSearchCriteria searchCrit) { * * *It is not recommended to include either the minimum or maximum items of the input stream.
- * + *
This is a stochastic streaming sketch that enables near-real time analysis of the + * This is a stochastic streaming sketch that enables near-real time analysis of the * approximate distribution of items from a very large stream in a single pass, requiring only * that the items are comparable. * The analysis is obtained using the getQuantile() function or the * inverse functions getRank(), getPMF() (the Probability Mass Function), and getCDF() - * (the Cumulative Distribution Function).
+ * (the Cumulative Distribution Function). * *Given an input stream of N items, the natural rank of any specific * item is defined as its index (1 to N) in the hypothetical sorted stream of all @@ -194,7 +194,7 @@ *
[*] Note that obtaining epsilon may require using a similar function but with more parameters * based on the specific sketch implementation.
* - * @see + * @see * Sketching Quantiles and Ranks, Tutorial * @see org.apache.datasketches.quantilescommon.QuantileSearchCriteria * diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java index e8e5310f5..659528191 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java @@ -21,6 +21,8 @@ import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; +import org.apache.datasketches.common.SketchesArgumentException; + /** * The Quantiles API for item type double. * @see QuantilesAPI @@ -33,7 +35,7 @@ public interface QuantilesDoublesAPI extends QuantilesAPI { * This is equivalent to {@link #getCDF(double[], QuantileSearchCriteria) getCDF(splitPoints, INCLUSIVE)} * @param splitPoints an array of m unique, monotonically increasing items. * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. - * @throws IllegalArgumentException if sketch is empty. + * @throws SketchesArgumentException if sketch is empty. */ default double[] getCDF(double[] splitPoints) { return getCDF(splitPoints, INCLUSIVE); @@ -50,7 +52,7 @@ default double[] getCDF(double[] splitPoints) { * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 overlapping intervals. - * + *** @param searchCrit the desired search criteria. * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. - * @throws IllegalArgumentException if sketch is empty. + * @throws SketchesArgumentException if sketch is empty. */ double[] getCDF(double[] splitPoints, QuantileSearchCriteria searchCrit); @@ -79,7 +81,7 @@ default double[] getCDF(double[] splitPoints) { * item returned by getQuantile(1.0). * * @return the maximum item of the stream - * @throws IllegalArgumentException if sketch is empty. + * @throws SketchesArgumentException if sketch is empty. */ double getMaxItem(); @@ -88,7 +90,7 @@ default double[] getCDF(double[] splitPoints) { * item returned by getQuantile(0.0). * * @return the minimum item of the stream - * @throws IllegalArgumentException if sketch is empty. + * @throws SketchesArgumentException if sketch is empty. */ double getMinItem(); @@ -96,7 +98,7 @@ default double[] getCDF(double[] splitPoints) { * This is equivalent to {@link #getPMF(double[], QuantileSearchCriteria) getPMF(splitPoints, INCLUSIVE)} * @param splitPoints an array of m unique, monotonically increasing items. * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. - * @throws IllegalArgumentException if sketch is empty. + * @throws SketchesArgumentException if sketch is empty. */ default double[] getPMF(double[] splitPoints) { return getPMF(splitPoints, INCLUSIVE); @@ -113,7 +115,7 @@ default double[] getPMF(double[] splitPoints) { * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 consecutive, non-overlapping intervals. - * + *The start of each interval is below the lowest item retained by the sketch * corresponding to a zero rank or zero probability, and the end of the interval * is the rank or cumulative probability corresponding to the split point.
@@ -67,10 +69,10 @@ default double[] getCDF(double[] splitPoints) { * * *It is not recommended to include either the minimum or maximum items of the input stream.
- * + *
** @param searchCrit the desired search criteria. * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. - * @throws IllegalArgumentException if sketch is empty. + * @throws SketchesArgumentException if sketch is empty. */ double[] getPMF(double[] splitPoints, QuantileSearchCriteria searchCrit); @@ -148,7 +150,7 @@ default double[] getPMF(double[] splitPoints) { * This is equivalent to {@link #getQuantile(double, QuantileSearchCriteria) getQuantile(rank, INCLUSIVE)} * @param rank the given normalized rank, a double in the range [0.0, 1.0]. * @return the approximate quantile given the normalized rank. - * @throws IllegalArgumentException if sketch is empty. + * @throws SketchesArgumentException if sketch is empty. */ default double getQuantile(double rank) { return getQuantile(rank, INCLUSIVE); @@ -163,7 +165,7 @@ default double getQuantile(double rank) { * If EXCLUSIVE, he given rank includes all quantiles < * the quantile directly corresponding to the given rank. * @return the approximate quantile given the normalized rank. - * @throws IllegalArgumentException if sketch is empty. + * @throws SketchesArgumentException if sketch is empty. * @see org.apache.datasketches.quantilescommon.QuantileSearchCriteria */ double getQuantile(double rank, QuantileSearchCriteria searchCrit); @@ -180,7 +182,7 @@ default double getQuantile(double rank) { * @param rank the given normalized rank * @return the lower bound of the quantile confidence interval in which the quantile of the * given rank exists. - * @throws IllegalArgumentException if sketch is empty. + * @throws SketchesArgumentException if sketch is empty. */ double getQuantileLowerBound(double rank); @@ -196,7 +198,7 @@ default double getQuantile(double rank) { * @param rank the given normalized rank * @return the upper bound of the quantile confidence interval in which the true quantile of the * given rank exists. - * @throws IllegalArgumentException if sketch is empty. + * @throws SketchesArgumentException if sketch is empty. */ double getQuantileUpperBound(double rank); @@ -205,7 +207,7 @@ default double getQuantile(double rank) { * @param ranks the given array of normalized ranks, each of which must be * in the interval [0.0,1.0]. * @return an array of quantiles corresponding to the given array of normalized ranks. - * @throws IllegalArgumentException if sketch is empty. + * @throws SketchesArgumentException if sketch is empty. */ default double[] getQuantiles(double[] ranks) { return getQuantiles(ranks, INCLUSIVE); @@ -219,7 +221,7 @@ default double[] getQuantiles(double[] ranks) { * @param searchCrit if INCLUSIVE, the given ranks include all quantiles ≤ * the quantile directly corresponding to each rank. * @return an array of quantiles corresponding to the given array of normalized ranks. - * @throws IllegalArgumentException if sketch is empty. + * @throws SketchesArgumentException if sketch is empty. * @see org.apache.datasketches.quantilescommon.QuantileSearchCriteria */ double[] getQuantiles(double[] ranks, QuantileSearchCriteria searchCrit); @@ -228,7 +230,7 @@ default double[] getQuantiles(double[] ranks) { * This is equivalent to {@link #getRank(double, QuantileSearchCriteria) getRank(quantile, INCLUSIVE)} * @param quantile the given quantile * @return the normalized rank corresponding to the given quantile - * @throws IllegalArgumentException if sketch is empty. + * @throws SketchesArgumentException if sketch is empty. */ default double getRank(double quantile) { return getRank(quantile, INCLUSIVE); @@ -240,7 +242,7 @@ default double getRank(double quantile) { * @param quantile the given quantile * @param searchCrit if INCLUSIVE the given quantile is included into the rank. * @return the normalized rank corresponding to the given quantile - * @throws IllegalArgumentException if sketch is empty. + * @throws SketchesArgumentException if sketch is empty. * @see org.apache.datasketches.quantilescommon.QuantileSearchCriteria */ double getRank(double quantile, QuantileSearchCriteria searchCrit); @@ -249,7 +251,7 @@ default double getRank(double quantile) { * This is equivalent to {@link #getRanks(double[], QuantileSearchCriteria) getRanks(quantiles, INCLUSIVE)} * @param quantiles the given array of quantiles * @return an array of normalized ranks corresponding to the given array of quantiles. - * @throws IllegalArgumentException if sketch is empty. + * @throws SketchesArgumentException if sketch is empty. */ default double[] getRanks(double[] quantiles) { return getRanks(quantiles, INCLUSIVE); @@ -262,7 +264,7 @@ default double[] getRanks(double[] quantiles) { * @param quantiles the given array of quantiles * @param searchCrit if INCLUSIVE, the given quantiles include the rank directly corresponding to each quantile. * @return an array of normalized ranks corresponding to the given array of quantiles. - * @throws IllegalArgumentException if sketch is empty. + * @throws SketchesArgumentException if sketch is empty. * @see org.apache.datasketches.quantilescommon.QuantileSearchCriteria */ double[] getRanks(double[] quantiles, QuantileSearchCriteria searchCrit); diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java index 986780444..8b8a91bdd 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java @@ -49,7 +49,7 @@ default double[] getCDF(float[] splitPoints) { * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 overlapping intervals. - * + *Each interval except for the end intervals starts with a split point and ends with the next split * point in sequence.
* @@ -137,10 +139,10 @@ default double[] getPMF(double[] splitPoints) { * * *It is not recommended to include either the minimum or maximum items of the input stream.
- * + *
** @param searchCrit the desired search criteria. * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. @@ -112,7 +112,7 @@ default double[] getPMF(float[] splitPoints) { * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 consecutive, non-overlapping intervals. - * + *The start of each interval is below the lowest item retained by the sketch * corresponding to a zero rank or zero probability, and the end of the interval * is the rank or cumulative probability corresponding to the split point.
@@ -66,7 +66,7 @@ default double[] getCDF(float[] splitPoints) { * * *It is not recommended to include either the minimum or maximum items of the input stream.
- * + *
** @param searchCrit the desired search criteria. * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesGenericAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesGenericAPI.java index 459e58cdd..bc0881282 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesGenericAPI.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesGenericAPI.java @@ -53,7 +53,7 @@ default double[] getCDF(T[] splitPoints) { * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 overlapping intervals. - * + *Each interval except for the end intervals starts with a split point and ends with the next split * point in sequence.
* @@ -136,7 +136,7 @@ default double[] getPMF(float[] splitPoints) { * * *It is not recommended to include either the minimum or maximum items of the input stream.
- * + *
** @param searchCrit the desired search criteria. * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. @@ -132,7 +132,7 @@ default double[] getPMF(T[] splitPoints) { * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 consecutive, non-overlapping intervals. - * + *The start of each interval is below the lowest item retained by the sketch * corresponding to a zero rank or zero probability, and the end of the interval * is the rank or cumulative probability corresponding to the split point.
@@ -70,7 +70,7 @@ default double[] getCDF(T[] splitPoints) { * * *It is not recommended to include either the minimum or maximum items of the input stream.
- * + *
** @param searchCrit the desired search criteria. * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesLongsAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesLongsAPI.java index 2b542a68f..fb1ca5817 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesLongsAPI.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesLongsAPI.java @@ -50,7 +50,7 @@ default double[] getCDF(long[] splitPoints) { * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 overlapping intervals. - * + *Each interval except for the end intervals starts with a split point and ends with the next split * point in sequence.
* @@ -156,7 +156,7 @@ default double[] getPMF(T[] splitPoints) { * * *It is not recommended to include either the minimum or maximum items of the input stream.
- * + *
** @param searchCrit the desired search criteria. * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. @@ -113,7 +113,7 @@ default double[] getPMF(long[] splitPoints) { * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 consecutive, non-overlapping intervals. - * + *The start of each interval is below the lowest item retained by the sketch * corresponding to a zero rank or zero probability, and the end of the interval * is the rank or cumulative probability corresponding to the split point.
@@ -67,7 +67,7 @@ default double[] getCDF(long[] splitPoints) { * * *It is not recommended to include either the minimum or maximum items of the input stream.
- * + *
** @param searchCrit the desired search criteria. * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. diff --git a/src/main/java/org/apache/datasketches/req/BaseReqSketch.java b/src/main/java/org/apache/datasketches/req/BaseReqSketch.java index e587cd633..99de621d9 100644 --- a/src/main/java/org/apache/datasketches/req/BaseReqSketch.java +++ b/src/main/java/org/apache/datasketches/req/BaseReqSketch.java @@ -28,7 +28,7 @@ * This abstract class provides a single place to define and document the public API * for the Relative Error Quantiles Sketch. * - * @see + * @see * Sketching Quantiles and Ranks Tutorial * * @author Lee Rhodes @@ -160,6 +160,7 @@ public boolean isReadOnly() { /** * {@inheritDoc} + * *Each interval except for the end intervals starts with a split point and ends with the next split * point in sequence.
* @@ -137,7 +137,7 @@ default double[] getPMF(long[] splitPoints) { * * *It is not recommended to include either the minimum or maximum items of the input stream.
- * + *
The parameters k, highRankAccuracy, and reqDebug will not change.
*/ @Override diff --git a/src/main/java/org/apache/datasketches/req/ReqSerDe.java b/src/main/java/org/apache/datasketches/req/ReqSerDe.java index 52b1371a9..952749deb 100644 --- a/src/main/java/org/apache/datasketches/req/ReqSerDe.java +++ b/src/main/java/org/apache/datasketches/req/ReqSerDe.java @@ -110,7 +110,8 @@ * 0 || (empty)| 0 | K | Flags |FamID=17| SerVer | PreInts = 2 | * *- ** - *Flags:
+ * + * Flags: * Bit 0 : Endianness, reserved * Bit 1 : ReadOnly, reserved * Bit 2 : Empty diff --git a/src/main/java/org/apache/datasketches/sampling/ReservoirLongsUnion.java b/src/main/java/org/apache/datasketches/sampling/ReservoirLongsUnion.java index c3ef33957..b58317a9a 100644 --- a/src/main/java/org/apache/datasketches/sampling/ReservoirLongsUnion.java +++ b/src/main/java/org/apache/datasketches/sampling/ReservoirLongsUnion.java @@ -132,10 +132,9 @@ public static ReservoirLongsUnion heapify(final Memory srcMem) { /** * Union the given sketch. - *- * This method can be repeatedly called. If the given sketch is null it is interpreted as an empty - * sketch. - *
+ * + *This method can be repeatedly called. If the given sketch is null it is interpreted as an empty + * sketch.
* * @param sketchIn The incoming sketch. */ diff --git a/src/main/java/org/apache/datasketches/sampling/ReservoirSize.java b/src/main/java/org/apache/datasketches/sampling/ReservoirSize.java index e6f361955..e12d31aa9 100644 --- a/src/main/java/org/apache/datasketches/sampling/ReservoirSize.java +++ b/src/main/java/org/apache/datasketches/sampling/ReservoirSize.java @@ -25,6 +25,7 @@ /** * This class provides a compact representation of reservoir size by encoding it into a * fixed-point 16-bit value. + * *The value itself is a fractional power of 2, with 5 bits of exponent and 11 bits of * mantissa. The exponent allows a choice of anywhere from 0-30, and there are 2048 possible * reservoir size values within each octave. Because reservoir size must be an integer, this diff --git a/src/main/java/org/apache/datasketches/sampling/package-info.java b/src/main/java/org/apache/datasketches/sampling/package-info.java index edfaa20a8..bbe446914 100644 --- a/src/main/java/org/apache/datasketches/sampling/package-info.java +++ b/src/main/java/org/apache/datasketches/sampling/package-info.java @@ -18,8 +18,8 @@ */ /** - *
This package is dedicated to streaming algorithms that enable fixed size, uniform sampling of - * weighted and unweighted items from a stream.
+ * This package is dedicated to streaming algorithms that enable fixed size, uniform sampling of + * weighted and unweighted items from a stream. * *These sketches are mergeable and can be serialized and deserialized to/from a compact * form.
diff --git a/src/main/java/org/apache/datasketches/tdigest/TDigestDouble.java b/src/main/java/org/apache/datasketches/tdigest/TDigestDouble.java index 1e3408511..478b80124 100644 --- a/src/main/java/org/apache/datasketches/tdigest/TDigestDouble.java +++ b/src/main/java/org/apache/datasketches/tdigest/TDigestDouble.java @@ -32,6 +32,7 @@ import org.apache.datasketches.memory.WritableBuffer; import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.quantilescommon.QuantilesAPI; +import org.apache.datasketches.quantilescommon.QuantilesUtil; /** * t-Digest for estimating quantiles and ranks. @@ -125,6 +126,7 @@ public void merge(final TDigestDouble other) { /** * Process buffered values and merge centroids if needed */ + // this method will become private in the next major version public void compress() { if (numBuffered_ == 0) { return; } final int num = numBuffered_ + numCentroids_; @@ -277,6 +279,51 @@ public double getQuantile(final double rank) { return weightedAverage(centroidWeights_[numCentroids_ - 1], w1, maxValue_, w2); } + /** + * Returns an approximation to the Probability Mass Function (PMF) of the input stream + * given a set of split points. + * + * @param splitPoints an array of m unique, monotonically increasing values + * that divide the input domain into m+1 consecutive disjoint intervals (bins). + * + * @return an array of m+1 doubles each of which is an approximation + * to the fraction of the input stream values (the mass) that fall into one of those intervals. + * @throws SketchesStateException if sketch is empty. + */ + public double[] getPMF(final double[] splitPoints) { + final double[] buckets = getCDF(splitPoints); + for (int i = buckets.length; i-- > 1; ) { + buckets[i] -= buckets[i - 1]; + } + return buckets; + } + + /** + * Returns an approximation to the Cumulative Distribution Function (CDF), which is the + * cumulative analog of the PMF, of the input stream given a set of split points. + * + * @param splitPoints an array of m unique, monotonically increasing values + * that divide the input domain into m+1 consecutive disjoint intervals. + * + * @return an array of m+1 doubles, which are a consecutive approximation to the CDF + * of the input stream given the splitPoints. The value at array position j of the returned + * CDF array is the sum of the returned values in positions 0 through j of the returned PMF + * array. This can be viewed as array of ranks of the given split points plus one more value + * that is always 1. + * @throws SketchesStateException if sketch is empty. + */ + public double[] getCDF(final double[] splitPoints) { + if (isEmpty()) { throw new SketchesStateException(QuantilesAPI.EMPTY_MSG); } + QuantilesUtil.checkDoublesSplitPointsOrder(splitPoints); + final int len = splitPoints.length + 1; + final double[] ranks = new double[len]; + for (int i = 0; i < len - 1; i++) { + ranks[i] = getRank(splitPoints[i]); + } + ranks[len - 1] = 1.0; + return ranks; + } + /** * Computes size needed to serialize the current state. * @return size in bytes needed to serialize this tdigest diff --git a/src/main/java/org/apache/datasketches/theta/PreambleUtil.java b/src/main/java/org/apache/datasketches/theta/PreambleUtil.java index 562be982c..e1d9262e6 100644 --- a/src/main/java/org/apache/datasketches/theta/PreambleUtil.java +++ b/src/main/java/org/apache/datasketches/theta/PreambleUtil.java @@ -37,6 +37,7 @@ /** * This class defines the preamble data structure and provides basic utilities for some of the key * fields. + * *The intent of the design of this class was to isolate the detailed knowledge of the bit and * byte layout of the serialized form of the sketches derived from the Sketch class into one place. * This allows the possibility of the introduction of different serialization @@ -126,7 +127,7 @@ * 3 ||----------------------Start of Hash Table of longs---------------------------------| *
Union objects require 32 bytes of preamble plus a non-compact array of longs representing a + *
Union objects require 32 bytes of preamble plus a non-compact array of longs representing a * hash table.
* *
diff --git a/src/main/java/org/apache/datasketches/theta/Sketch.java b/src/main/java/org/apache/datasketches/theta/Sketch.java
index 05d6bbb2c..89618bc23 100644
--- a/src/main/java/org/apache/datasketches/theta/Sketch.java
+++ b/src/main/java/org/apache/datasketches/theta/Sketch.java
@@ -309,7 +309,7 @@ public static int getMaxCompactSketchBytes(final int numberOfEntries) {
* log_base2 of the number of nominal entries, which is a power of 2.
* @param lgNomEntries Nominal Entries
* @return the maximum number of storage bytes required for a CompactSketch with the given
- * nomEntries.
+ * lgNomEntries.
*/
public static int getCompactSketchMaxBytes(final int lgNomEntries) {
return (int)((2 << lgNomEntries) * ThetaUtil.REBUILD_THRESHOLD
diff --git a/src/main/java/org/apache/datasketches/theta/Sketches.java b/src/main/java/org/apache/datasketches/theta/Sketches.java
index dc20072dc..2e7fa0915 100644
--- a/src/main/java/org/apache/datasketches/theta/Sketches.java
+++ b/src/main/java/org/apache/datasketches/theta/Sketches.java
@@ -91,14 +91,14 @@ public static int getMaxCompactSketchBytes(final int numberOfEntries) {
/**
* Returns the maximum number of storage bytes required for a CompactSketch given the configured
- * number of nominal entries (power of 2).
- * @param nomEntries Nominal Entries
+ * log_base2 of the number of nominal entries, which is a power of 2.
+ * @param lgNomEntries Nominal Entries
* @return the maximum number of storage bytes required for a CompactSketch with the given
- * nomEntries.
+ * lgNomEntries.
* @see Sketch#getCompactSketchMaxBytes(int)
*/
- public static int getCompactSketchMaxBytes(final int nomEntries) {
- return Sketch.getCompactSketchMaxBytes(nomEntries);
+ public static int getCompactSketchMaxBytes(final int lgNomEntries) {
+ return Sketch.getCompactSketchMaxBytes(lgNomEntries);
}
/**
diff --git a/src/main/java/org/apache/datasketches/theta/UnionImpl.java b/src/main/java/org/apache/datasketches/theta/UnionImpl.java
index be2980801..bac05de74 100644
--- a/src/main/java/org/apache/datasketches/theta/UnionImpl.java
+++ b/src/main/java/org/apache/datasketches/theta/UnionImpl.java
@@ -333,7 +333,7 @@ public void union(final Sketch sketchIn) {
if (sketchIn.isOrdered() && (sketchIn instanceof CompactSketch)) { //Use early stop
//Ordered, thus compact
if (sketchIn.hasMemory()) {
- final Memory skMem = ((CompactSketch) sketchIn).getMemory();
+ final Memory skMem = sketchIn.getMemory();
final int preambleLongs = skMem.getByte(PREAMBLE_LONGS_BYTE) & 0X3F;
for (int i = 0; i < curCountIn; i++ ) {
final int offsetBytes = preambleLongs + i << 3;
diff --git a/src/main/java/org/apache/datasketches/theta/UpdateSketch.java b/src/main/java/org/apache/datasketches/theta/UpdateSketch.java
index 882c5e2e9..cb6854b02 100644
--- a/src/main/java/org/apache/datasketches/theta/UpdateSketch.java
+++ b/src/main/java/org/apache/datasketches/theta/UpdateSketch.java
@@ -343,6 +343,7 @@ public UpdateReturnState update(final long[] data) {
/**
* All potential updates converge here.
+ *
* Don't ever call this unless you really know what you are doing!
*
* @param hash the given input hash value. A hash of zero or Long.MAX_VALUE is ignored.
diff --git a/src/main/java/org/apache/datasketches/tuple/Union.java b/src/main/java/org/apache/datasketches/tuple/Union.java
index 653312fa0..acefa2ab5 100644
--- a/src/main/java/org/apache/datasketches/tuple/Union.java
+++ b/src/main/java/org/apache/datasketches/tuple/Union.java
@@ -100,8 +100,7 @@ public CompactSketch union(final Sketch tupleSketch,
/**
* Performs a stateful union of the internal set with the given tupleSketch.
* @param tupleSketch input tuple sketch to merge with the internal set.
- *
- * Nulls and empty sketches are ignored.
+ * Nulls and empty sketches are ignored.
*/
public void union(final Sketch tupleSketch) {
if (tupleSketch == null || tupleSketch.isEmpty()) { return; }
diff --git a/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java b/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java
index e7abae0d4..a54c11afc 100644
--- a/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java
+++ b/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java
@@ -41,24 +41,28 @@ public enum Mode {
/**
* The aggregation mode is the summation function.
+ *
* New retained value = previous retained value + incoming value
*/
Sum,
/**
* The aggregation mode is the minimum function.
+ *
* New retained value = min(previous retained value, incoming value)
*/
Min,
/**
* The aggregation mode is the maximum function.
+ *
* New retained value = max(previous retained value, incoming value)
*/
Max,
/**
* The aggregation mode is always one.
+ *
* New retained value = 1.0
*/
AlwaysOne
diff --git a/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummary.java b/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummary.java
index 4c04fa2c2..72695355e 100644
--- a/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummary.java
+++ b/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummary.java
@@ -41,24 +41,28 @@ public enum Mode {
/**
* The aggregation mode is the summation function.
+ *
* New retained value = previous retained value + incoming value
*/
Sum,
/**
* The aggregation mode is the minimum function.
+ *
* New retained value = min(previous retained value, incoming value)
*/
Min,
/**
* The aggregation mode is the maximum function.
+ *
* New retained value = max(previous retained value, incoming value)
*/
Max,
/**
* The aggregation mode is always one.
+ *
* New retained value = 1
*/
AlwaysOne
diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java
index 91d4eade4..52f827149 100644
--- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java
+++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java
@@ -30,6 +30,7 @@
/**
* Direct Compact Sketch of type ArrayOfDoubles.
+ *
* This implementation uses data in a given Memory that is owned and managed by the caller.
* This Memory can be off-heap, which if managed properly will greatly reduce the need for
* the JVM to perform garbage collection.
diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesIntersection.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesIntersection.java
index 3dd019d74..7c1b1bf07 100644
--- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesIntersection.java
+++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesIntersection.java
@@ -23,6 +23,7 @@
/**
* Direct Intersection operation for tuple sketches of type ArrayOfDoubles.
+ *
* This implementation uses data in a given Memory that is owned and managed by the caller.
* This Memory can be off-heap, which if managed properly will greatly reduce the need for
* the JVM to perform garbage collection.
@@ -43,7 +44,7 @@ final class DirectArrayOfDoublesIntersection extends ArrayOfDoublesIntersection
}
@Override
- protected ArrayOfDoublesQuickSelectSketch createSketch(final int nomEntries, final int numValues,
+ protected ArrayOfDoublesQuickSelectSketch createSketch(final int nomEntries, final int numValues,
final long seed) {
return new DirectArrayOfDoublesQuickSelectSketch(nomEntries, 0, 1f, numValues, seed, mem_);
}
diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java
index 1b4e86904..ae1aa3dc0 100644
--- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java
+++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java
@@ -33,6 +33,7 @@
/**
* Direct QuickSelect tuple sketch of type ArrayOfDoubles.
+ *
* This implementation uses data in a given Memory that is owned and managed by the caller.
* This Memory can be off-heap, which if managed properly will greatly reduce the need for
* the JVM to perform garbage collection.
diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesSketchIterator.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesSketchIterator.java
index 15503fc04..dcdab1313 100644
--- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesSketchIterator.java
+++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesSketchIterator.java
@@ -23,6 +23,7 @@
/**
* Iterator over the off-heap, Direct tuple sketch of type ArrayOfDoubles (compact or hash table).
+ *
* This implementation uses data in a given Memory that is owned and managed by the caller.
* This Memory can be off-heap, which if managed properly will greatly reduce the need for
* the JVM to perform garbage collection.
diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesUnion.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesUnion.java
index 00310f534..734019632 100644
--- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesUnion.java
+++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesUnion.java
@@ -27,6 +27,7 @@
/**
* Direct Union operation for tuple sketches of type ArrayOfDoubles.
+ *
* This implementation uses data in a given Memory that is owned and managed by the caller.
* This Memory can be off-heap, which if managed properly will greatly reduce the need for
* the JVM to perform garbage collection.
diff --git a/src/test/java/org/apache/datasketches/hll/DirectAuxHashMapTest.java b/src/test/java/org/apache/datasketches/hll/DirectAuxHashMapTest.java
index 71774783c..f7249c918 100644
--- a/src/test/java/org/apache/datasketches/hll/DirectAuxHashMapTest.java
+++ b/src/test/java/org/apache/datasketches/hll/DirectAuxHashMapTest.java
@@ -43,13 +43,16 @@
public class DirectAuxHashMapTest {
@Test
- public void checkGrow() {
+ public void checkGrow() { //It is very rare, but this forces an HLL_4 to exceed its computed memory size.
int lgConfigK = 4;
TgtHllType tgtHllType = TgtHllType.HLL_4;
int n = 8; //put lgConfigK == 4 into HLL mode
int bytes = HllSketch.getMaxUpdatableSerializationBytes(lgConfigK, tgtHllType);
HllSketch hllSketch;
WritableMemory wmem = WritableMemory.allocateDirect(bytes, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer());
+ WritableMemory wmemCopy = wmem; //copy of wmem
+ assertTrue(wmemCopy.isDirect()); //original copy of wmem is off-heap
+ assertTrue(wmemCopy.isAlive()); //original copy of wmem is Alive
hllSketch = new HllSketch(lgConfigK, tgtHllType, wmem);
for (int i = 0; i < n; i++) {
hllSketch.update(i);
@@ -88,6 +91,8 @@ public void checkGrow() {
assertEquals(dha.getAuxHashMap().getAuxCount(), 4);
assertTrue(hllSketch.isMemory());
assertFalse(hllSketch.isOffHeap());
+ assertTrue(wmemCopy.isDirect()); //original copy of wmem was off-heap and still is
+ assertFalse(wmemCopy.isAlive()); //original copy of wmem has been closed
}
@Test
diff --git a/src/test/java/org/apache/datasketches/quantiles/DirectQuantilesMemoryRequestTest.java b/src/test/java/org/apache/datasketches/quantiles/DirectQuantilesMemoryRequestTest.java
index 2ec2fbe27..cfa14b60c 100644
--- a/src/test/java/org/apache/datasketches/quantiles/DirectQuantilesMemoryRequestTest.java
+++ b/src/test/java/org/apache/datasketches/quantiles/DirectQuantilesMemoryRequestTest.java
@@ -48,6 +48,7 @@ public void checkLimitedMemoryScenarios() { //Requesting application
//########## Owning Implementation
// This part would actually be part of the Memory owning implementation so it is faked here
WritableMemory wmem = WritableMemory.allocateDirect(initBytes, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer());
+ WritableMemory wmemCopy = wmem;
println("Initial mem size: " + wmem.getCapacity());
//########## Receiving Application
@@ -70,6 +71,8 @@ public void checkLimitedMemoryScenarios() { //Requesting application
// so the the wmem reference is invalid. Use the sketch to get the last memory reference.
WritableMemory lastMem = usk1.getMemory();
println("Final mem size: " + usk1.getMemory().getCapacity());
+ assertTrue(wmemCopy.isDirect());
+ assertFalse(wmemCopy.isAlive());
}
@Test
@@ -79,6 +82,7 @@ public void checkGrowBaseBuf() {
final int initBytes = (4 + (u / 2)) << 3; // not enough to hold everything
WritableMemory wmem = WritableMemory.allocateDirect(initBytes, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer());
+ WritableMemory wmemCopy = wmem;
println("Initial mem size: " + wmem.getCapacity());
final UpdateDoublesSketch usk1 = DoublesSketch.builder().setK(k).build(wmem);
for (int i = 1; i <= u; i++) {
@@ -88,6 +92,8 @@ public void checkGrowBaseBuf() {
println("curCombBufItemCap: " + currentSpace);
assertEquals(currentSpace, 2 * k);
println("last Mem Cap: " + usk1.getMemory().getCapacity());
+ assertTrue(wmemCopy.isDirect());
+ assertFalse(wmemCopy.isAlive());
}
@Test
@@ -97,6 +103,7 @@ public void checkGrowCombBuf() {
final int initBytes = ((2 * k) + 4) << 3; //just room for BB
WritableMemory wmem = WritableMemory.allocateDirect(initBytes, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer());
+ WritableMemory wmemCopy = wmem;
println("Initial mem size: " + wmem.getCapacity());
final UpdateDoublesSketch usk1 = DoublesSketch.builder().setK(k).build(wmem);
for (int i = 1; i <= u; i++) {
@@ -108,6 +115,8 @@ public void checkGrowCombBuf() {
final int newSpace = usk1.getCombinedBufferItemCapacity();
println("newCombBurItemCap: " + newSpace);
assertEquals(newCB.length, 3 * k);
+ assertTrue(wmemCopy.isDirect());
+ assertFalse(wmemCopy.isAlive());
}
@Test
@@ -119,6 +128,7 @@ public void checkGrowFromWrappedEmptySketch() {
final Memory origSketchMem = Memory.wrap(usk1.toByteArray());
WritableMemory wmem = WritableMemory.allocateDirect(initBytes, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer());
+ WritableMemory wmemCopy = wmem;
origSketchMem.copyTo(0, wmem, 0, initBytes);
UpdateDoublesSketch usk2 = DirectUpdateDoublesSketch.wrapInstance(wmem);
assertTrue(wmem.isSameResource(usk2.getMemory()));
@@ -135,6 +145,8 @@ public void checkGrowFromWrappedEmptySketch() {
final int expectedSize = COMBINED_BUFFER + ((2 * k) << 3);
assertEquals(mem2.getCapacity(), expectedSize);
+ assertTrue(wmemCopy.isDirect());
+ assertFalse(wmemCopy.isAlive());
}
@Test
diff --git a/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java b/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java
index cf8e142b7..fd1b7a72a 100644
--- a/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java
+++ b/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java
@@ -140,18 +140,22 @@ public void checkEmptyExceptions() {
@Test
public void directSketchShouldMoveOntoHeapEventually() {
WritableMemory wmem = WritableMemory.allocateDirect(1000, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer());
+ WritableMemory wmemCopy = wmem;
UpdateDoublesSketch sketch = DoublesSketch.builder().build(wmem);
Assert.assertTrue(sketch.isSameResource(wmem));
for (int i = 0; i < 1000; i++) {
sketch.update(i);
}
println(sketch.toString());
+ assertTrue(wmemCopy.isDirect());
+ assertFalse(wmemCopy.isAlive());
}
@Test
public void directSketchShouldMoveOntoHeapEventually2() {
int i = 0;
WritableMemory wmem = WritableMemory.allocateDirect(50, ByteOrder.LITTLE_ENDIAN, new DefaultMemoryRequestServer());
+ WritableMemory wmemCopy = wmem;
UpdateDoublesSketch sketch = DoublesSketch.builder().build(wmem);
Assert.assertTrue(sketch.isSameResource(wmem));
for (; i < 1000; i++) {
@@ -163,6 +167,8 @@ public void directSketchShouldMoveOntoHeapEventually2() {
}
}
assertFalse(wmem.isAlive());
+ assertTrue(wmemCopy.isDirect());
+ assertFalse(wmemCopy.isAlive());
}
@Test
diff --git a/src/test/java/org/apache/datasketches/tdigest/TDigestDoubleTest.java b/src/test/java/org/apache/datasketches/tdigest/TDigestDoubleTest.java
index db043cff6..55baa83e7 100644
--- a/src/test/java/org/apache/datasketches/tdigest/TDigestDoubleTest.java
+++ b/src/test/java/org/apache/datasketches/tdigest/TDigestDoubleTest.java
@@ -41,6 +41,8 @@ public void empty() {
assertThrows(SketchesStateException.class, () -> td.getMaxValue());
assertThrows(SketchesStateException.class, () -> td.getRank(0));
assertThrows(SketchesStateException.class, () -> td.getQuantile(0.5));
+ assertThrows(SketchesStateException.class, () -> td.getPMF(new double[]{0}));
+ assertThrows(SketchesStateException.class, () -> td.getCDF(new double[]{0}));
}
@Test
@@ -65,9 +67,6 @@ public void manyValues() {
final TDigestDouble td = new TDigestDouble();
final int n = 10000;
for (int i = 0; i < n; i++) td.update(i);
-// System.out.println(td.toString(true));
-// td.compress();
-// System.out.println(td.toString(true));
assertFalse(td.isEmpty());
assertEquals(td.getTotalWeight(), n);
assertEquals(td.getMinValue(), 0);
@@ -82,6 +81,14 @@ public void manyValues() {
assertEquals(td.getQuantile(0.9), n * 0.9, n * 0.9 * 0.01);
assertEquals(td.getQuantile(0.95), n * 0.95, n * 0.95 * 0.01);
assertEquals(td.getQuantile(1), n - 1);
+ final double[] pmf = td.getPMF(new double[] {n / 2});
+ assertEquals(pmf.length, 2);
+ assertEquals(pmf[0], 0.5, 0.0001);
+ assertEquals(pmf[1], 0.5, 0.0001);
+ final double[] cdf = td.getCDF(new double[] {n / 2});
+ assertEquals(cdf.length, 2);
+ assertEquals(cdf[0], 0.5, 0.0001);
+ assertEquals(cdf[1], 1.0);
}
@Test
diff --git a/src/test/java/org/apache/datasketches/theta/DirectQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta/DirectQuickSelectSketchTest.java
index 9b129daaf..2aa195a0c 100644
--- a/src/test/java/org/apache/datasketches/theta/DirectQuickSelectSketchTest.java
+++ b/src/test/java/org/apache/datasketches/theta/DirectQuickSelectSketchTest.java
@@ -781,13 +781,16 @@ public void checkMoveAndResize() {
int u = 2 * k;
int bytes = Sketches.getMaxUpdateSketchBytes(k);
WritableMemory wmem = WritableMemory.allocateDirect(bytes/2); //will request more memory
+ WritableMemory wmemCopy = wmem;
UpdateSketch sketch = Sketches.updateSketchBuilder().setNominalEntries(k).build(wmem);
assertTrue(sketch.isSameResource(wmem));
for (int i = 0; i < u; i++) { sketch.update(i); }
Memory mem = sketch.getMemory();
assertTrue(mem.isAlive());
assertFalse(mem.isDirect()); //now on heap.
+ assertTrue(wmemCopy.isDirect()); //original copy
assertFalse(wmem.isAlive()); //wmem closed by MemoryRequestServer
+ assertFalse(wmemCopy.isAlive()); //original copy closed
}
@Test
@@ -796,6 +799,7 @@ public void checkReadOnlyRebuildResize() {
int u = 2 * k;
int bytes = Sketches.getMaxUpdateSketchBytes(k);
WritableMemory wmem = WritableMemory.allocateDirect(bytes/2); //will request more memory
+ WritableMemory wmemCopy = wmem;
UpdateSketch sketch = Sketches.updateSketchBuilder().setNominalEntries(k).build(wmem);
for (int i = 0; i < u; i++) { sketch.update(i); }
double est1 = sketch.getEstimate();
@@ -808,6 +812,8 @@ public void checkReadOnlyRebuildResize() {
assertTrue(mem2.isAlive());
assertFalse(mem2.isDirect()); //now on heap
assertFalse(wmem.isAlive()); //wmem closed by MemoryRequestServer
+ assertTrue(wmemCopy.isDirect());
+ assertFalse(wmemCopy.isAlive());
try {
roSketch.rebuild();
fail();
diff --git a/src/test/java/org/apache/datasketches/theta/UnionImplTest.java b/src/test/java/org/apache/datasketches/theta/UnionImplTest.java
index 2fbd7bdcf..4bade62f7 100644
--- a/src/test/java/org/apache/datasketches/theta/UnionImplTest.java
+++ b/src/test/java/org/apache/datasketches/theta/UnionImplTest.java
@@ -193,6 +193,7 @@ public void checkMoveAndResizeOffHeap() {
final int bytes = Sketches.getMaxUpdateSketchBytes(k);
WritableMemory wmem = WritableMemory.allocateDirect(bytes / 2); //not really used, except as a reference.
WritableMemory wmem2 = WritableMemory.allocateDirect(bytes / 2); //too small, forces new allocation on heap
+ WritableMemory wmem2Copy = wmem2;
final UpdateSketch sketch = Sketches.updateSketchBuilder().setNominalEntries(k).build(wmem);
assertTrue(sketch.isSameResource(wmem)); //also testing the isSameResource function
@@ -206,6 +207,8 @@ public void checkMoveAndResizeOffHeap() {
assertFalse(union2.isSameResource(wmem2)); //obviously not
wmem.close(); //empty, but we must close it anyway.
assertFalse(wmem2.isAlive());//previously closed via the DefaultMemoryRequestServer.
+ assertTrue(wmem2Copy.isDirect());
+ assertFalse(wmem2Copy.isAlive());
}
@Test
diff --git a/tools/FindBugsExcludeFilter.xml b/tools/FindBugsExcludeFilter.xml
index 4f62a5cd6..d47e78f09 100644
--- a/tools/FindBugsExcludeFilter.xml
+++ b/tools/FindBugsExcludeFilter.xml
@@ -41,6 +41,7 @@ under the License.
+
diff --git a/tools/SketchesCheckstyle.xml b/tools/SketchesCheckstyle.xml
index 873a878a0..4e2471d91 100644
--- a/tools/SketchesCheckstyle.xml
+++ b/tools/SketchesCheckstyle.xml
@@ -230,7 +230,7 @@ under the License.
-
+