Skip to content

Commit b235526

Browse files
Merge pull request #650 from apache/union_mem
union(mem) delegates to union(sketch)
2 parents 861462f + f338133 commit b235526

File tree

1 file changed

+2
-104
lines changed

1 file changed

+2
-104
lines changed

src/main/java/org/apache/datasketches/theta/UnionImpl.java

Lines changed: 2 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -20,28 +20,17 @@
2020
package org.apache.datasketches.theta;
2121

2222
import static java.lang.Math.min;
23-
import static org.apache.datasketches.theta.PreambleUtil.COMPACT_FLAG_MASK;
24-
import static org.apache.datasketches.theta.PreambleUtil.ORDERED_FLAG_MASK;
2523
import static org.apache.datasketches.theta.PreambleUtil.UNION_THETA_LONG;
2624
import static org.apache.datasketches.theta.PreambleUtil.clearEmpty;
27-
import static org.apache.datasketches.theta.PreambleUtil.extractCurCount;
2825
import static org.apache.datasketches.theta.PreambleUtil.extractFamilyID;
29-
import static org.apache.datasketches.theta.PreambleUtil.extractFlags;
30-
import static org.apache.datasketches.theta.PreambleUtil.extractLgArrLongs;
31-
import static org.apache.datasketches.theta.PreambleUtil.extractPreLongs;
32-
import static org.apache.datasketches.theta.PreambleUtil.extractSeedHash;
33-
import static org.apache.datasketches.theta.PreambleUtil.extractSerVer;
34-
import static org.apache.datasketches.theta.PreambleUtil.extractThetaLong;
3526
import static org.apache.datasketches.theta.PreambleUtil.extractUnionThetaLong;
3627
import static org.apache.datasketches.theta.PreambleUtil.insertUnionThetaLong;
37-
import static org.apache.datasketches.theta.SingleItemSketch.otherCheckForSingleItem;
3828
import static org.apache.datasketches.thetacommon.QuickSelect.selectExcludingZeros;
3929

4030
import java.nio.ByteBuffer;
4131

4232
import org.apache.datasketches.common.Family;
4333
import org.apache.datasketches.common.ResizeFactor;
44-
import org.apache.datasketches.common.SketchesArgumentException;
4534
import org.apache.datasketches.memory.Memory;
4635
import org.apache.datasketches.memory.MemoryRequestServer;
4736
import org.apache.datasketches.memory.WritableMemory;
@@ -347,99 +336,8 @@ public void union(final Sketch sketchIn) {
347336

348337
@Override
349338
public void union(final Memory skMem) {
350-
if (skMem == null) { return; }
351-
final int cap = (int) skMem.getCapacity();
352-
if (cap < 16) { return; } //empty or garbage
353-
final int serVer = extractSerVer(skMem);
354-
final int fam = extractFamilyID(skMem);
355-
356-
if (serVer == 4) { // compressed ordered compact
357-
ThetaUtil.checkSeedHashes(expectedSeedHash_, (short) extractSeedHash(skMem));
358-
union(CompactSketch.wrap(skMem));
359-
return;
360-
}
361-
if (serVer == 3) { //The OpenSource sketches (Aug 4, 2015) starts with serVer = 3
362-
if (fam < 1 || fam > 3) {
363-
throw new SketchesArgumentException(
364-
"Family must be Alpha, QuickSelect, or Compact: " + Family.idToFamily(fam));
365-
}
366-
processVer3(skMem);
367-
return;
368-
}
369-
if (serVer == 2) { //older Sketch, which is compact and ordered
370-
ThetaUtil.checkSeedHashes(expectedSeedHash_, (short)extractSeedHash(skMem));
371-
union(ForwardCompatibility.heapify2to3(skMem, expectedSeedHash_));
372-
return;
373-
}
374-
if (serVer == 1) { //much older Sketch, which is compact and ordered, no seedHash
375-
union(ForwardCompatibility.heapify1to3(skMem, expectedSeedHash_));
376-
return;
377-
}
378-
throw new SketchesArgumentException("SerVer is unknown: " + serVer);
379-
}
380-
381-
//Has seedHash, p, could have 0 entries & theta < 1.0,
382-
//could be unordered, ordered, compact, or not compact,
383-
//could be Alpha, QuickSelect, or Compact.
384-
private void processVer3(final Memory skMem) {
385-
final int preLongs = extractPreLongs(skMem);
386-
387-
if (preLongs == 1) {
388-
if (otherCheckForSingleItem(skMem)) {
389-
final long hash = skMem.getLong(8);
390-
gadget_.hashUpdate(hash);
391-
return;
392-
}
393-
return; //empty
394-
}
395-
ThetaUtil.checkSeedHashes(expectedSeedHash_, (short)extractSeedHash(skMem));
396-
final int curCountIn;
397-
final long thetaLongIn;
398-
399-
if (preLongs == 2) { //exact mode
400-
curCountIn = extractCurCount(skMem);
401-
if (curCountIn == 0) { return; } //should be > 0, but if it is 0 return empty anyway.
402-
thetaLongIn = Long.MAX_VALUE;
403-
}
404-
405-
else { //prelongs == 3
406-
//curCount may be 0 (e.g., from intersection); but sketch cannot be empty.
407-
curCountIn = extractCurCount(skMem);
408-
thetaLongIn = extractThetaLong(skMem);
409-
}
410-
411-
unionThetaLong_ = min(min(unionThetaLong_, thetaLongIn), gadget_.getThetaLong()); //theta rule
412-
unionEmpty_ = false;
413-
final int flags = extractFlags(skMem);
414-
final boolean ordered = (flags & ORDERED_FLAG_MASK) != 0;
415-
if (ordered) { //must be compact
416-
417-
for (int i = 0; i < curCountIn; i++ ) {
418-
final int offsetBytes = preLongs + i << 3;
419-
final long hashIn = skMem.getLong(offsetBytes);
420-
if (hashIn >= unionThetaLong_) { break; } // "early stop"
421-
gadget_.hashUpdate(hashIn); //backdoor update, hash function is bypassed
422-
}
423-
}
424-
425-
else { //not-ordered, could be compact or hash-table form
426-
final boolean compact = (flags & COMPACT_FLAG_MASK) != 0;
427-
final int size = compact ? curCountIn : 1 << extractLgArrLongs(skMem);
428-
429-
for (int i = 0; i < size; i++ ) {
430-
final int offsetBytes = preLongs + i << 3;
431-
final long hashIn = skMem.getLong(offsetBytes);
432-
if (hashIn <= 0L || hashIn >= unionThetaLong_) { continue; }
433-
gadget_.hashUpdate(hashIn); //backdoor update, hash function is bypassed
434-
}
435-
}
436-
437-
unionThetaLong_ = min(unionThetaLong_, gadget_.getThetaLong()); //sync thetaLongs
438-
439-
if (gadget_.hasMemory()) {
440-
final WritableMemory wmem = (WritableMemory)gadget_.getMemory();
441-
PreambleUtil.insertUnionThetaLong(wmem, unionThetaLong_);
442-
PreambleUtil.clearEmpty(wmem);
339+
if (skMem != null) {
340+
union(Sketch.wrap(skMem));
443341
}
444342
}
445343

0 commit comments

Comments
 (0)