Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ Improvements

Optimizations
---------------------
(No changes)
* GITHUB#16268: Use the doc-values skip index to skip per-doc value lookups in LongRangeFacetCutter. (Jakub Slowinski)

Bug Fixes
---------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ interface IntervalTracker extends OrdinalIterator {
/** clear recorded information on this tracker. * */
void clear();

/**
* restart reading from the first recorded ordinal, to replay a {@link #freeze() frozen} tracker
*/
void rewind();

/** check if any data for the interval has been recorded * */
boolean get(int index);

Expand Down Expand Up @@ -71,6 +76,12 @@ public void clear() {
intervalsWithHit = 0;
}

@Override
public void rewind() {
bitFrom = 0;
trackerState = 0;
}

@Override
public boolean get(int index) {
return tracker.get(index);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@
import org.apache.lucene.facet.MultiLongValues;
import org.apache.lucene.facet.MultiLongValuesSource;
import org.apache.lucene.facet.range.LongRange;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.sandbox.facet.cutters.FacetCutter;
import org.apache.lucene.sandbox.facet.cutters.LeafFacetCutter;
import org.apache.lucene.search.LongValues;
Expand All @@ -42,6 +46,10 @@ public abstract class LongRangeFacetCutter implements FacetCutter {

// TODO: refactor - weird that we have both multi and single here.
final LongValuesSource singleValues;

// Field name whose skip index is used on the single-valued path, or null when faceting a source.
final String skipField;

final LongRangeAndPos[] sortedRanges;

final int requestedRangeCount;
Expand All @@ -62,32 +70,51 @@ static LongRangeFacetCutter createSingleOrMultiValued(
MultiLongValuesSource longValuesSource,
LongValuesSource singleLongValuesSource,
LongRange[] longRanges) {
return createSingleOrMultiValued(longValuesSource, singleLongValuesSource, longRanges, null);
}

/** Same as above, but uses the {@code skipField} skip index on the single-valued path. */
static LongRangeFacetCutter createSingleOrMultiValued(
MultiLongValuesSource longValuesSource,
LongValuesSource singleLongValuesSource,
LongRange[] longRanges,
String skipField) {
if (areOverlappingRanges(longRanges)) {
return new OverlappingLongRangeFacetCutter(
longValuesSource, singleLongValuesSource, longRanges);
longValuesSource, singleLongValuesSource, longRanges, skipField);
}
return new NonOverlappingLongRangeFacetCutter(
longValuesSource, singleLongValuesSource, longRanges);
longValuesSource, singleLongValuesSource, longRanges, skipField);
}

public static LongRangeFacetCutter create(
MultiLongValuesSource longValuesSource, LongRange[] longRanges) {
return createSingleOrMultiValued(longValuesSource, null, longRanges);
return createSingleOrMultiValued(longValuesSource, null, longRanges, null);
}

/** Create {@link FacetCutter} for a long field by name, using its skip index when present. */
public static LongRangeFacetCutter create(String field, LongRange[] longRanges) {
// Leave the single-valued source null. The skip path reads the field directly, and a
// multi-valued segment must fall back to the multi-valued leaf cutter.
return createSingleOrMultiValued(
MultiLongValuesSource.fromLongField(field), null, longRanges, field);
}

// caller handles conversion of Doubles and DoubleRange to Long and LongRange
// ranges need not be sorted
LongRangeFacetCutter(
MultiLongValuesSource longValuesSource,
LongValuesSource singleLongValuesSource,
LongRange[] longRanges) {
LongRange[] longRanges,
String skipField) {
super();
valuesSource = longValuesSource;
if (singleLongValuesSource != null) {
singleValues = singleLongValuesSource;
} else {
singleValues = MultiLongValuesSource.unwrapSingleton(valuesSource);
}
this.skipField = skipField;

sortedRanges = new LongRangeAndPos[longRanges.length];
requestedRangeCount = longRanges.length;
Expand Down Expand Up @@ -124,6 +151,32 @@ public static LongRangeFacetCutter create(
*/
abstract List<InclusiveRange> buildElementaryIntervals();

/**
* Single-valued {@link LongValues} read directly from {@link #skipField} so its skip index can be
* used, or null when there is no skip field or the segment is multi-valued.
*/
final LongValues singleValuedSkipField(LeafReaderContext context) throws IOException {
if (skipField == null) {
return null;
}
NumericDocValues values =
DocValues.unwrapSingleton(DocValues.getSortedNumeric(context.reader(), skipField));
if (values == null) {
return null;
}
return new LongValues() {
@Override
public long longValue() throws IOException {
return values.longValue();
}

@Override
public boolean advanceExact(int doc) throws IOException {
return values.advanceExact(doc);
}
};
}

private static boolean areOverlappingRanges(LongRange[] ranges) {
if (ranges.length == 0) {
return false;
Expand Down Expand Up @@ -252,29 +305,98 @@ abstract static class LongRangeSingleValuedLeafFacetCutter implements LeafFacetC

IntervalTracker requestedIntervalTracker;

private final DocValuesSkipper skipper;

// advanceSkipper's decisions for the current block; the fields below hold while doc <=
// upToInclusive, after which it runs again for the next block.
private int upToInclusive = -1;
// Whether every value in the block maps to the single interval upToIntervalOrd.
private boolean upToSameInterval;
// Whether every doc in the block has a value.
private boolean upToDense;
private int upToIntervalOrd;

// Interval of the previous doc with a value, for replaying the tracker on a repeat.
private int previousIntervalOrd = -1;

LongRangeSingleValuedLeafFacetCutter(LongValues longValues, long[] boundaries, int[] pos) {
this(longValues, boundaries, pos, null);
}

LongRangeSingleValuedLeafFacetCutter(
LongValues longValues, long[] boundaries, int[] pos, DocValuesSkipper skipper) {
this.longValues = longValues;
this.boundaries = boundaries;
this.pos = pos;
this.skipper = skipper;
}

@Override
public boolean advanceExact(int doc) throws IOException {
if (longValues.advanceExact(doc) == false) {
return false;
if (skipper != null && doc > upToInclusive) {
advanceSkipper(doc);
}
if (requestedIntervalTracker != null) {
requestedIntervalTracker.clear();

int intervalOrd;
if (upToSameInterval) {
// Reuse the cached ordinal, skipping the binary search. A dense block also skips the value
// lookup, a sparse one still needs advanceExact to know whether this doc has a value.
if (upToDense == false && longValues.advanceExact(doc) == false) {
return false;
}
intervalOrd = upToIntervalOrd;
} else if (longValues.advanceExact(doc)) {
intervalOrd = processValue(longValues.longValue());
} else {
return false;
}
elementaryIntervalOrd = processValue(longValues.longValue());
maybeRollUp(requestedIntervalTracker);

elementaryIntervalOrd = intervalOrd;
if (requestedIntervalTracker != null) {
requestedIntervalTracker.freeze();
if (skipper != null && intervalOrd == previousIntervalOrd) {
// Same interval as the previous doc, so replay its frozen rollup instead of rebuilding.
requestedIntervalTracker.rewind();
} else {
requestedIntervalTracker.clear();
maybeRollUp(requestedIntervalTracker);
requestedIntervalTracker.freeze();
previousIntervalOrd = intervalOrd;
}
}

return true;
}

private void advanceSkipper(int doc) throws IOException {
if (doc > skipper.maxDocID(0)) {
skipper.advance(doc);
}
upToSameInterval = false;

if (skipper.minDocID(0) > doc) {
// Corner case which happens if doc doesn't have a value and is between two intervals of the
// skip index. Fall back to per-doc lookups until the next block.
upToInclusive = skipper.minDocID(0) - 1;
return;
}

upToInclusive = skipper.maxDocID(0);
// Climb to the highest level that still maps to a single interval.
for (int level = 0; level < skipper.numLevels(); ++level) {
// Long fields store raw values, skipper's min/max maps straight into the boundary space.
int minInterval = processValue(skipper.minValue(level));
int maxInterval = processValue(skipper.maxValue(level));
if (minInterval != maxInterval) {
break;
}
upToInclusive = skipper.maxDocID(level);
upToSameInterval = true;
upToIntervalOrd = minInterval;
int totalDocsAtLevel = skipper.maxDocID(level) - skipper.minDocID(level) + 1;
upToDense = skipper.docCount(level) == totalDocsAtLevel;
}
}

// Returns the value of the interval v belongs or lastIntervalSeen
// if no processing is done, it returns the lastIntervalSeen
private int processValue(long v) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.apache.lucene.facet.MultiLongValues;
import org.apache.lucene.facet.MultiLongValuesSource;
import org.apache.lucene.facet.range.LongRange;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.sandbox.facet.cutters.LeafFacetCutter;
import org.apache.lucene.search.LongValues;
Expand All @@ -32,8 +33,9 @@ class NonOverlappingLongRangeFacetCutter extends LongRangeFacetCutter {
NonOverlappingLongRangeFacetCutter(
MultiLongValuesSource longValuesSource,
LongValuesSource singleLongValuesSource,
LongRange[] longRanges) {
super(longValuesSource, singleLongValuesSource, longRanges);
LongRange[] longRanges,
String skipField) {
super(longValuesSource, singleLongValuesSource, longRanges, skipField);
}

/**
Expand Down Expand Up @@ -68,6 +70,12 @@ List<InclusiveRange> buildElementaryIntervals() {

@Override
public LeafFacetCutter createLeafCutter(LeafReaderContext context) throws IOException {
LongValues skipFieldValues = singleValuedSkipField(context);
if (skipFieldValues != null) {
DocValuesSkipper skipper = context.reader().getDocValuesSkipper(skipField);
return new NonOverlappingLongRangeSingleValueLeafFacetCutter(
skipFieldValues, boundaries, pos, skipper);
}
if (singleValues != null) {
LongValues values = singleValues.getValues(context, null);
return new NonOverlappingLongRangeSingleValueLeafFacetCutter(values, boundaries, pos);
Expand Down Expand Up @@ -112,6 +120,11 @@ static class NonOverlappingLongRangeSingleValueLeafFacetCutter
super(longValues, boundaries, pos);
}

NonOverlappingLongRangeSingleValueLeafFacetCutter(
LongValues longValues, long[] boundaries, int[] pos, DocValuesSkipper skipper) {
super(longValues, boundaries, pos, skipper);
}

@Override
public int nextOrd() throws IOException {
if (elementaryIntervalOrd == NO_MORE_ORDS) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import org.apache.lucene.facet.MultiLongValues;
import org.apache.lucene.facet.MultiLongValuesSource;
import org.apache.lucene.facet.range.LongRange;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.internal.hppc.IntCursor;
import org.apache.lucene.sandbox.facet.cutters.LeafFacetCutter;
Expand All @@ -43,8 +44,9 @@ class OverlappingLongRangeFacetCutter extends LongRangeFacetCutter {
OverlappingLongRangeFacetCutter(
MultiLongValuesSource longValuesSource,
LongValuesSource singleLongValuesSource,
LongRange[] longRanges) {
super(longValuesSource, singleLongValuesSource, longRanges);
LongRange[] longRanges,
String skipField) {
super(longValuesSource, singleLongValuesSource, longRanges, skipField);

// Build binary tree on top of intervals:
root = split(0, elementaryIntervals.size(), elementaryIntervals);
Expand Down Expand Up @@ -147,6 +149,12 @@ private static LongRangeNode split(int start, int end, List<InclusiveRange> elem

@Override
public LeafFacetCutter createLeafCutter(LeafReaderContext context) throws IOException {
LongValues skipFieldValues = singleValuedSkipField(context);
if (skipFieldValues != null) {
DocValuesSkipper skipper = context.reader().getDocValuesSkipper(skipField);
return new OverlappingSingleValuedRangeLeafFacetCutter(
skipFieldValues, boundaries, pos, requestedRangeCount, root, skipper);
}
if (singleValues != null) {
LongValues values = singleValues.getValues(context, null);
return new OverlappingSingleValuedRangeLeafFacetCutter(
Expand Down Expand Up @@ -233,6 +241,18 @@ static class OverlappingSingleValuedRangeLeafFacetCutter
this.elementaryIntervalRoot = elementaryIntervalRoot;
}

OverlappingSingleValuedRangeLeafFacetCutter(
LongValues longValues,
long[] boundaries,
int[] pos,
int requestedRangeCount,
LongRangeNode elementaryIntervalRoot,
DocValuesSkipper skipper) {
super(longValues, boundaries, pos, skipper);
requestedIntervalTracker = new IntervalTracker.MultiIntervalTracker(requestedRangeCount);
this.elementaryIntervalRoot = elementaryIntervalRoot;
}

@Override
void maybeRollUp(IntervalTracker rollUpInto) {
// TODO: for single valued we can rollup after collecting all documents, e.g. in reduce
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@ private RangeFacetBuilderFactory() {}

/** Request long range facets for numeric field by name. */
public static CommonFacetBuilder forLongRanges(String field, LongRange... ranges) {
return forLongRanges(field, MultiLongValuesSource.fromLongField(field), ranges);
return new CommonFacetBuilder(
field, LongRangeFacetCutter.create(field, ranges), new RangeOrdToLabel(ranges))
.withSortByOrdinal();
}

/**
Expand Down
Loading
Loading