From 83fa675489961ea58c9188da3905bc8fa68080a4 Mon Sep 17 00:00:00 2001 From: slow-J <32519034+slow-J@users.noreply.github.com> Date: Tue, 16 Jun 2026 14:43:09 +0100 Subject: [PATCH 1/3] Use the doc-values skip index to skip per-doc value lookups in LongRangeFacetCutter --- lucene/CHANGES.txt | 2 + .../cutters/ranges/LongRangeFacetCutter.java | 138 ++++++++++++++- .../NonOverlappingLongRangeFacetCutter.java | 18 +- .../OverlappingLongRangeFacetCutter.java | 25 ++- .../facet/utils/RangeFacetBuilderFactory.java | 5 +- .../lucene/sandbox/facet/TestRangeFacet.java | 163 ++++++++++++++++++ 6 files changed, 340 insertions(+), 11 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 42a6e8073576..aac109bb5799 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -568,6 +568,8 @@ Optimizations * GITHUB#16228: Reuse scratch int[] for ordinal translation. (Tim Brooks) +* GITHUB#16268: Use the doc-values skip index to skip per-doc value lookups for dense blocks in LongRangeFacetCutter. (Jakub Slowinski) + Bug Fixes --------------------- * GITHUB#15754: Fix HTMLStripCharFilter to prevent tags from incorrectly consuming subsequent diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/LongRangeFacetCutter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/LongRangeFacetCutter.java index b9518bfca154..14a7942e54db 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/LongRangeFacetCutter.java +++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/LongRangeFacetCutter.java @@ -23,6 +23,11 @@ import org.apache.lucene.facet.MultiLongValues; import org.apache.lucene.facet.MultiLongValuesSource; import org.apache.lucene.facet.range.LongRange; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.DocValuesSkipper; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.sandbox.facet.cutters.FacetCutter; import org.apache.lucene.sandbox.facet.cutters.LeafFacetCutter; import org.apache.lucene.search.LongValues; @@ -42,6 +47,10 @@ public abstract class LongRangeFacetCutter implements FacetCutter { // TODO: refactor - weird that we have both multi and single here. final LongValuesSource singleValues; + + // Field to read a DocValuesSkipper from on the single-valued path, or null when disabled. + final String skipField; + final LongRangeAndPos[] sortedRanges; final int requestedRangeCount; @@ -62,17 +71,34 @@ static LongRangeFacetCutter createSingleOrMultiValued( MultiLongValuesSource longValuesSource, LongValuesSource singleLongValuesSource, LongRange[] longRanges) { + return createSingleOrMultiValued(longValuesSource, singleLongValuesSource, longRanges, null); + } + + /** Same as above, but uses the {@code skipField} skip index on the single-valued path. */ + static LongRangeFacetCutter createSingleOrMultiValued( + MultiLongValuesSource longValuesSource, + LongValuesSource singleLongValuesSource, + LongRange[] longRanges, + String skipField) { if (areOverlappingRanges(longRanges)) { return new OverlappingLongRangeFacetCutter( - longValuesSource, singleLongValuesSource, longRanges); + longValuesSource, singleLongValuesSource, longRanges, skipField); } return new NonOverlappingLongRangeFacetCutter( - longValuesSource, singleLongValuesSource, longRanges); + longValuesSource, singleLongValuesSource, longRanges, skipField); } public static LongRangeFacetCutter create( MultiLongValuesSource longValuesSource, LongRange[] longRanges) { - return createSingleOrMultiValued(longValuesSource, null, longRanges); + return createSingleOrMultiValued(longValuesSource, null, longRanges, null); + } + + /** Create {@link FacetCutter} for a long field by name, using its skip index when present. */ + public static LongRangeFacetCutter create(String field, LongRange[] longRanges) { + // Leave the single-valued source null. The skip path reads the field directly, and a + // multi-valued segment must fall back to the multi-valued leaf cutter. + return createSingleOrMultiValued( + MultiLongValuesSource.fromLongField(field), null, longRanges, field); } // caller handles conversion of Doubles and DoubleRange to Long and LongRange @@ -80,7 +106,8 @@ public static LongRangeFacetCutter create( LongRangeFacetCutter( MultiLongValuesSource longValuesSource, LongValuesSource singleLongValuesSource, - LongRange[] longRanges) { + LongRange[] longRanges, + String skipField) { super(); valuesSource = longValuesSource; if (singleLongValuesSource != null) { @@ -88,6 +115,7 @@ public static LongRangeFacetCutter create( } else { singleValues = MultiLongValuesSource.unwrapSingleton(valuesSource); } + this.skipField = skipField; sortedRanges = new LongRangeAndPos[longRanges.length]; requestedRangeCount = longRanges.length; @@ -124,6 +152,39 @@ public static LongRangeFacetCutter create( */ abstract List buildElementaryIntervals(); + /** + * Returns the {@link DocValuesSkipper} for {@link #skipField} in this segment. Null when: no skip + * field is configured, the field has no skip index, or some doc in this segment has more than one + * value. + */ + final DocValuesSkipper maybeSkipper(LeafReaderContext context) throws IOException { + if (skipField == null) { + return null; + } + SortedNumericDocValues sortedNumeric = DocValues.getSortedNumeric(context.reader(), skipField); + if (DocValues.unwrapSingleton(sortedNumeric) == null) { + return null; + } + return context.reader().getDocValuesSkipper(skipField); + } + + /** Single-valued {@link LongValues} for {@link #skipField} in this segment. */ + final LongValues skipFieldValues(LeafReaderContext context) throws IOException { + NumericDocValues values = + DocValues.unwrapSingleton(DocValues.getSortedNumeric(context.reader(), skipField)); + return new LongValues() { + @Override + public long longValue() throws IOException { + return values.longValue(); + } + + @Override + public boolean advanceExact(int doc) throws IOException { + return values.advanceExact(doc); + } + }; + } + private static boolean areOverlappingRanges(LongRange[] ranges) { if (ranges.length == 0) { return false; @@ -252,21 +313,52 @@ abstract static class LongRangeSingleValuedLeafFacetCutter implements LeafFacetC IntervalTracker requestedIntervalTracker; + // Skip index for the faceted field, or null when disabled. + private final DocValuesSkipper skipper; + + // Cached decision from advanceSkipper, valid for every doc up to (and including) upToInclusive: + // when upToSameInterval is true, all those docs map to elementary interval upToIntervalOrd. + private int upToInclusive = -1; + private boolean upToSameInterval; + private int upToIntervalOrd; + LongRangeSingleValuedLeafFacetCutter(LongValues longValues, long[] boundaries, int[] pos) { + this(longValues, boundaries, pos, null); + } + + LongRangeSingleValuedLeafFacetCutter( + LongValues longValues, long[] boundaries, int[] pos, DocValuesSkipper skipper) { this.longValues = longValues; this.boundaries = boundaries; this.pos = pos; + this.skipper = skipper; + // The skip path counts a dense block as one value per doc, so it's single-valued only. + assert skipper == null || skipper.maxValueCount() <= 1 + : "skip-index fast path requires a single-valued field, got maxValueCount=" + + skipper.maxValueCount(); } @Override public boolean advanceExact(int doc) throws IOException { - if (longValues.advanceExact(doc) == false) { + if (skipper != null && doc > upToInclusive) { + advanceSkipper(doc); + } + + int intervalOrd; + if (upToSameInterval) { + // We are inside a dense skip block that maps entirely to one elementary interval, so reuse + // the cached ordinal and skip the per-doc value lookup and binary search. + intervalOrd = upToIntervalOrd; + } else if (longValues.advanceExact(doc)) { + intervalOrd = processValue(longValues.longValue()); + } else { return false; } + if (requestedIntervalTracker != null) { requestedIntervalTracker.clear(); } - elementaryIntervalOrd = processValue(longValues.longValue()); + elementaryIntervalOrd = intervalOrd; maybeRollUp(requestedIntervalTracker); if (requestedIntervalTracker != null) { requestedIntervalTracker.freeze(); @@ -275,6 +367,40 @@ public boolean advanceExact(int doc) throws IOException { return true; } + /** Mirrors {@code HistogramCollector#advanceSkipper}. */ + private void advanceSkipper(int doc) throws IOException { + if (doc > skipper.maxDocID(0)) { + skipper.advance(doc); + } + upToSameInterval = false; + + if (skipper.minDocID(0) > doc) { + // Corner case which happens if doc doesn't have a value and is between two intervals of the + // skip index. Fall back to per-doc lookups until the next block. + upToInclusive = skipper.minDocID(0) - 1; + return; + } + + upToInclusive = skipper.maxDocID(0); + // Now find the highest level where all docs have a value and map to the same interval. + for (int level = 0; level < skipper.numLevels(); ++level) { + int totalDocsAtLevel = skipper.maxDocID(level) - skipper.minDocID(level) + 1; + if (skipper.docCount(level) != totalDocsAtLevel) { + // Some docs at this level have no value, so we can't resolve the whole block at once. + break; + } + // Long fields store raw values, the skipper's min/max map straight into the boundary space. + int minInterval = processValue(skipper.minValue(level)); + int maxInterval = processValue(skipper.maxValue(level)); + if (minInterval != maxInterval) { + break; + } + upToInclusive = skipper.maxDocID(level); + upToSameInterval = true; + upToIntervalOrd = minInterval; + } + } + // Returns the value of the interval v belongs or lastIntervalSeen // if no processing is done, it returns the lastIntervalSeen private int processValue(long v) { diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/NonOverlappingLongRangeFacetCutter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/NonOverlappingLongRangeFacetCutter.java index 3d657a96570d..4bb68dea11b1 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/NonOverlappingLongRangeFacetCutter.java +++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/NonOverlappingLongRangeFacetCutter.java @@ -22,6 +22,7 @@ import org.apache.lucene.facet.MultiLongValues; import org.apache.lucene.facet.MultiLongValuesSource; import org.apache.lucene.facet.range.LongRange; +import org.apache.lucene.index.DocValuesSkipper; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.sandbox.facet.cutters.LeafFacetCutter; import org.apache.lucene.search.LongValues; @@ -32,8 +33,9 @@ class NonOverlappingLongRangeFacetCutter extends LongRangeFacetCutter { NonOverlappingLongRangeFacetCutter( MultiLongValuesSource longValuesSource, LongValuesSource singleLongValuesSource, - LongRange[] longRanges) { - super(longValuesSource, singleLongValuesSource, longRanges); + LongRange[] longRanges, + String skipField) { + super(longValuesSource, singleLongValuesSource, longRanges, skipField); } /** @@ -68,6 +70,13 @@ List buildElementaryIntervals() { @Override public LeafFacetCutter createLeafCutter(LeafReaderContext context) throws IOException { + // Use the skip index when we can, otherwise fall back to the value source. + DocValuesSkipper skipper = maybeSkipper(context); + if (skipper != null) { + LongValues values = skipFieldValues(context); + return new NonOverlappingLongRangeSingleValueLeafFacetCutter( + values, boundaries, pos, skipper); + } if (singleValues != null) { LongValues values = singleValues.getValues(context, null); return new NonOverlappingLongRangeSingleValueLeafFacetCutter(values, boundaries, pos); @@ -112,6 +121,11 @@ static class NonOverlappingLongRangeSingleValueLeafFacetCutter super(longValues, boundaries, pos); } + NonOverlappingLongRangeSingleValueLeafFacetCutter( + LongValues longValues, long[] boundaries, int[] pos, DocValuesSkipper skipper) { + super(longValues, boundaries, pos, skipper); + } + @Override public int nextOrd() throws IOException { if (elementaryIntervalOrd == NO_MORE_ORDS) { diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/OverlappingLongRangeFacetCutter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/OverlappingLongRangeFacetCutter.java index 58586db892f7..2eff548329ae 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/OverlappingLongRangeFacetCutter.java +++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/OverlappingLongRangeFacetCutter.java @@ -25,6 +25,7 @@ import org.apache.lucene.facet.MultiLongValues; import org.apache.lucene.facet.MultiLongValuesSource; import org.apache.lucene.facet.range.LongRange; +import org.apache.lucene.index.DocValuesSkipper; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.internal.hppc.IntCursor; import org.apache.lucene.sandbox.facet.cutters.LeafFacetCutter; @@ -43,8 +44,9 @@ class OverlappingLongRangeFacetCutter extends LongRangeFacetCutter { OverlappingLongRangeFacetCutter( MultiLongValuesSource longValuesSource, LongValuesSource singleLongValuesSource, - LongRange[] longRanges) { - super(longValuesSource, singleLongValuesSource, longRanges); + LongRange[] longRanges, + String skipField) { + super(longValuesSource, singleLongValuesSource, longRanges, skipField); // Build binary tree on top of intervals: root = split(0, elementaryIntervals.size(), elementaryIntervals); @@ -147,6 +149,13 @@ private static LongRangeNode split(int start, int end, List elem @Override public LeafFacetCutter createLeafCutter(LeafReaderContext context) throws IOException { + // Use the skip index when we can, otherwise fall back to the value source. + DocValuesSkipper skipper = maybeSkipper(context); + if (skipper != null) { + LongValues values = skipFieldValues(context); + return new OverlappingSingleValuedRangeLeafFacetCutter( + values, boundaries, pos, requestedRangeCount, root, skipper); + } if (singleValues != null) { LongValues values = singleValues.getValues(context, null); return new OverlappingSingleValuedRangeLeafFacetCutter( @@ -233,6 +242,18 @@ static class OverlappingSingleValuedRangeLeafFacetCutter this.elementaryIntervalRoot = elementaryIntervalRoot; } + OverlappingSingleValuedRangeLeafFacetCutter( + LongValues longValues, + long[] boundaries, + int[] pos, + int requestedRangeCount, + LongRangeNode elementaryIntervalRoot, + DocValuesSkipper skipper) { + super(longValues, boundaries, pos, skipper); + requestedIntervalTracker = new IntervalTracker.MultiIntervalTracker(requestedRangeCount); + this.elementaryIntervalRoot = elementaryIntervalRoot; + } + @Override void maybeRollUp(IntervalTracker rollUpInto) { // TODO: for single valued we can rollup after collecting all documents, e.g. in reduce diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/utils/RangeFacetBuilderFactory.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/utils/RangeFacetBuilderFactory.java index 8d69acfdc336..4caccc4b07e4 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/utils/RangeFacetBuilderFactory.java +++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/utils/RangeFacetBuilderFactory.java @@ -35,7 +35,10 @@ private RangeFacetBuilderFactory() {} /** Request long range facets for numeric field by name. */ public static CommonFacetBuilder forLongRanges(String field, LongRange... ranges) { - return forLongRanges(field, MultiLongValuesSource.fromLongField(field), ranges); + // Pass the field by name so we can use its skip index when present. + return new CommonFacetBuilder( + field, LongRangeFacetCutter.create(field, ranges), new RangeOrdToLabel(ranges)) + .withSortByOrdinal(); } /** diff --git a/lucene/sandbox/src/test/org/apache/lucene/sandbox/facet/TestRangeFacet.java b/lucene/sandbox/src/test/org/apache/lucene/sandbox/facet/TestRangeFacet.java index 739f77fe37c5..7deb908cb93d 100644 --- a/lucene/sandbox/src/test/org/apache/lucene/sandbox/facet/TestRangeFacet.java +++ b/lucene/sandbox/src/test/org/apache/lucene/sandbox/facet/TestRangeFacet.java @@ -21,6 +21,7 @@ import com.carrotsearch.randomizedtesting.generators.RandomNumbers; import java.io.IOException; import java.util.List; +import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat; import org.apache.lucene.document.Document; import org.apache.lucene.document.DoubleDocValuesField; import org.apache.lucene.document.DoublePoint; @@ -57,6 +58,8 @@ import org.apache.lucene.search.LongValuesSource; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MultiCollectorManager; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; import org.apache.lucene.store.Directory; import org.apache.lucene.tests.index.RandomIndexWriter; import org.apache.lucene.tests.search.DummyTotalHitCountCollector; @@ -893,6 +896,166 @@ public void testRandomLongsSingleValued() throws Exception { IOUtils.close(r, dir); } + public void testSkipIndexEquivalenceLong() throws Exception { + // mode 1 sorts by the field and mode 2 also shrinks the skip interval, so the blocks are dense + // enough for the fast path to fire. + for (int mode = 0; mode < 3; mode++) { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(); + if (mode >= 1) { + iwc.setIndexSort(new Sort(new SortField("field", SortField.Type.LONG))); + } + if (mode == 2) { + iwc.setCodec(TestUtil.alwaysDocValuesFormat(new Lucene90DocValuesFormat(4))); + } + RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); + + int numDocs = atLeast(1000); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + long v = TestUtil.nextLong(random(), -100, 100); + doc.add(NumericDocValuesField.indexedField("field", v)); + w.addDocument(doc); + } + + assertSkipIndexEquivalence(w, "mode=" + mode); + + w.close(); + IOUtils.close(dir); + } + } + + public void testSkipIndexEquivalenceExtremeValues() throws Exception { + // Index sorted with extreme values mixed in, so some skip blocks carry Long.MIN/MAX_VALUE as + // their min/max bounds and advanceSkipper's processValue is exercised on those bounds. + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(); + iwc.setIndexSort(new Sort(new SortField("field", SortField.Type.LONG))); + iwc.setCodec(TestUtil.alwaysDocValuesFormat(new Lucene90DocValuesFormat(4))); + RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); + + int numDocs = atLeast(1000); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + long v = + switch (random().nextInt(4)) { + case 0 -> Long.MIN_VALUE; + case 1 -> Long.MAX_VALUE; + default -> TestUtil.nextLong(random(), -100, 100); + }; + doc.add(NumericDocValuesField.indexedField("field", v)); + doc.add(new LongPoint("point", v)); + w.addDocument(doc); + } + + assertSkipIndexEquivalence(w, "extreme"); + + w.close(); + IOUtils.close(dir); + } + + public void testSkipIndexEquivalenceSparse() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(); + iwc.setIndexSort(new Sort(new SortField("field", SortField.Type.LONG, false))); + iwc.setCodec(TestUtil.alwaysDocValuesFormat(new Lucene90DocValuesFormat(4))); + RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); + + int numDocs = atLeast(1000); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + // Leave roughly a third of the docs without a value so skip blocks aren't dense. + if (random().nextInt(3) != 0) { + doc.add( + NumericDocValuesField.indexedField("field", TestUtil.nextLong(random(), -100, 100))); + } + w.addDocument(doc); + } + + assertSkipIndexEquivalence(w, "sparse"); + + w.close(); + IOUtils.close(dir); + } + + public void testSkipIndexEquivalenceMultiValued() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir); + + int numDocs = atLeast(500); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + int numVals = TestUtil.nextInt(random(), 1, 5); + for (int j = 0; j < numVals; j++) { + doc.add(new SortedNumericDocValuesField("field", TestUtil.nextLong(random(), -100, 100))); + } + w.addDocument(doc); + } + + assertSkipIndexEquivalence(w, "multi-valued"); + + w.close(); + IOUtils.close(dir); + } + + // Asserts faceting "field" by name (skip index when available) matches faceting via a + // MultiLongValuesSource (no skip index), over random range sets including extreme bounds. + private void assertSkipIndexEquivalence(RandomIndexWriter w, String desc) throws IOException { + IndexReader r = w.getReader(); + try { + IndexSearcher s = newSearcher(r, false); + + int numIters = atLeast(10); + for (int iter = 0; iter < numIters; iter++) { + int numRange = TestUtil.nextInt(random(), 0, 20); + LongRange[] ranges = new LongRange[numRange]; + for (int rangeID = 0; rangeID < numRange; rangeID++) { + long min; + long max; + if (random().nextInt(20) == 0) { + // Occasionally use extreme bounds to exercise the boundary edges of processValue. + min = random().nextBoolean() ? Long.MIN_VALUE : TestUtil.nextLong(random(), -120, 120); + max = random().nextBoolean() ? Long.MAX_VALUE : TestUtil.nextLong(random(), -120, 120); + } else { + min = TestUtil.nextLong(random(), -120, 120); + max = TestUtil.nextLong(random(), -120, 120); + } + if (min > max) { + long x = min; + min = max; + max = x; + } + ranges[rangeID] = new LongRange("r" + rangeID, min, true, max, true); + } + OrdToLabel ordToLabel = new RangeOrdToLabel(ranges); + + // value-source path, no skipper. + CountFacetRecorder baselineRecorder = new CountFacetRecorder(); + s.search( + MatchAllDocsQuery.INSTANCE, + new FacetFieldCollectorManager<>( + LongRangeFacetCutter.create(MultiLongValuesSource.fromLongField("field"), ranges), + baselineRecorder)); + String baseline = + getAllSortByOrd(getRangeOrdinals(ranges), baselineRecorder, "field", ordToLabel) + .toString(); + + // by-field cutter, uses the skip index. + CountFacetRecorder skipRecorder = new CountFacetRecorder(); + s.search( + MatchAllDocsQuery.INSTANCE, + new FacetFieldCollectorManager<>( + LongRangeFacetCutter.create("field", ranges), skipRecorder)); + String withSkip = + getAllSortByOrd(getRangeOrdinals(ranges), skipRecorder, "field", ordToLabel).toString(); + + assertEquals(desc + " iter=" + iter, baseline, withSkip); + } + } finally { + IOUtils.close(r); + } + } + public void testRandomLongsMultiValued() throws Exception { Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir); From c39f9caaebb4fb72f4c68a8eec10e1919b60b8cc Mon Sep 17 00:00:00 2001 From: slow-J <32519034+slow-J@users.noreply.github.com> Date: Fri, 19 Jun 2026 15:03:43 +0100 Subject: [PATCH 2/3] Remove redundant assertion --- .../sandbox/facet/cutters/ranges/LongRangeFacetCutter.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/LongRangeFacetCutter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/LongRangeFacetCutter.java index 14a7942e54db..1720ca502deb 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/LongRangeFacetCutter.java +++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/LongRangeFacetCutter.java @@ -332,10 +332,6 @@ abstract static class LongRangeSingleValuedLeafFacetCutter implements LeafFacetC this.boundaries = boundaries; this.pos = pos; this.skipper = skipper; - // The skip path counts a dense block as one value per doc, so it's single-valued only. - assert skipper == null || skipper.maxValueCount() <= 1 - : "skip-index fast path requires a single-valued field, got maxValueCount=" - + skipper.maxValueCount(); } @Override From 7db2833b5fc8a3ceca2e98ac246394e7b6e30dc4 Mon Sep 17 00:00:00 2001 From: slow-J <32519034+slow-J@users.noreply.github.com> Date: Tue, 23 Jun 2026 08:26:06 +0100 Subject: [PATCH 3/3] Extend the skip-index fast path to non-dense blocks and reuse the interval tracker --- lucene/CHANGES.txt | 4 +- .../facet/cutters/ranges/IntervalTracker.java | 11 ++++ .../cutters/ranges/LongRangeFacetCutter.java | 66 +++++++++---------- .../NonOverlappingLongRangeFacetCutter.java | 9 ++- .../OverlappingLongRangeFacetCutter.java | 9 ++- .../facet/utils/RangeFacetBuilderFactory.java | 1 - .../lucene/sandbox/facet/TestRangeFacet.java | 40 ++++++++++- 7 files changed, 90 insertions(+), 50 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index aac109bb5799..c88ea8b5e8b5 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -291,7 +291,7 @@ Improvements Optimizations --------------------- -(No changes) +* GITHUB#16268: Use the doc-values skip index to skip per-doc value lookups in LongRangeFacetCutter. (Jakub Slowinski) Bug Fixes --------------------- @@ -568,8 +568,6 @@ Optimizations * GITHUB#16228: Reuse scratch int[] for ordinal translation. (Tim Brooks) -* GITHUB#16268: Use the doc-values skip index to skip per-doc value lookups for dense blocks in LongRangeFacetCutter. (Jakub Slowinski) - Bug Fixes --------------------- * GITHUB#15754: Fix HTMLStripCharFilter to prevent tags from incorrectly consuming subsequent diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/IntervalTracker.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/IntervalTracker.java index f3b11f56296f..f36c18bd54c8 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/IntervalTracker.java +++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/IntervalTracker.java @@ -36,6 +36,11 @@ interface IntervalTracker extends OrdinalIterator { /** clear recorded information on this tracker. * */ void clear(); + /** + * restart reading from the first recorded ordinal, to replay a {@link #freeze() frozen} tracker + */ + void rewind(); + /** check if any data for the interval has been recorded * */ boolean get(int index); @@ -71,6 +76,12 @@ public void clear() { intervalsWithHit = 0; } + @Override + public void rewind() { + bitFrom = 0; + trackerState = 0; + } + @Override public boolean get(int index) { return tracker.get(index); diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/LongRangeFacetCutter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/LongRangeFacetCutter.java index 1720ca502deb..b4618feff5ce 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/LongRangeFacetCutter.java +++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/LongRangeFacetCutter.java @@ -27,7 +27,6 @@ import org.apache.lucene.index.DocValuesSkipper; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.NumericDocValues; -import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.sandbox.facet.cutters.FacetCutter; import org.apache.lucene.sandbox.facet.cutters.LeafFacetCutter; import org.apache.lucene.search.LongValues; @@ -48,7 +47,7 @@ public abstract class LongRangeFacetCutter implements FacetCutter { // TODO: refactor - weird that we have both multi and single here. final LongValuesSource singleValues; - // Field to read a DocValuesSkipper from on the single-valued path, or null when disabled. + // Field name whose skip index is used on the single-valued path, or null when faceting a source. final String skipField; final LongRangeAndPos[] sortedRanges; @@ -153,25 +152,18 @@ public static LongRangeFacetCutter create(String field, LongRange[] longRanges) abstract List buildElementaryIntervals(); /** - * Returns the {@link DocValuesSkipper} for {@link #skipField} in this segment. Null when: no skip - * field is configured, the field has no skip index, or some doc in this segment has more than one - * value. + * Single-valued {@link LongValues} read directly from {@link #skipField} so its skip index can be + * used, or null when there is no skip field or the segment is multi-valued. */ - final DocValuesSkipper maybeSkipper(LeafReaderContext context) throws IOException { + final LongValues singleValuedSkipField(LeafReaderContext context) throws IOException { if (skipField == null) { return null; } - SortedNumericDocValues sortedNumeric = DocValues.getSortedNumeric(context.reader(), skipField); - if (DocValues.unwrapSingleton(sortedNumeric) == null) { - return null; - } - return context.reader().getDocValuesSkipper(skipField); - } - - /** Single-valued {@link LongValues} for {@link #skipField} in this segment. */ - final LongValues skipFieldValues(LeafReaderContext context) throws IOException { NumericDocValues values = DocValues.unwrapSingleton(DocValues.getSortedNumeric(context.reader(), skipField)); + if (values == null) { + return null; + } return new LongValues() { @Override public long longValue() throws IOException { @@ -313,15 +305,20 @@ abstract static class LongRangeSingleValuedLeafFacetCutter implements LeafFacetC IntervalTracker requestedIntervalTracker; - // Skip index for the faceted field, or null when disabled. private final DocValuesSkipper skipper; - // Cached decision from advanceSkipper, valid for every doc up to (and including) upToInclusive: - // when upToSameInterval is true, all those docs map to elementary interval upToIntervalOrd. + // advanceSkipper's decisions for the current block; the fields below hold while doc <= + // upToInclusive, after which it runs again for the next block. private int upToInclusive = -1; + // Whether every value in the block maps to the single interval upToIntervalOrd. private boolean upToSameInterval; + // Whether every doc in the block has a value. + private boolean upToDense; private int upToIntervalOrd; + // Interval of the previous doc with a value, for replaying the tracker on a repeat. + private int previousIntervalOrd = -1; + LongRangeSingleValuedLeafFacetCutter(LongValues longValues, long[] boundaries, int[] pos) { this(longValues, boundaries, pos, null); } @@ -342,8 +339,11 @@ public boolean advanceExact(int doc) throws IOException { int intervalOrd; if (upToSameInterval) { - // We are inside a dense skip block that maps entirely to one elementary interval, so reuse - // the cached ordinal and skip the per-doc value lookup and binary search. + // Reuse the cached ordinal, skipping the binary search. A dense block also skips the value + // lookup, a sparse one still needs advanceExact to know whether this doc has a value. + if (upToDense == false && longValues.advanceExact(doc) == false) { + return false; + } intervalOrd = upToIntervalOrd; } else if (longValues.advanceExact(doc)) { intervalOrd = processValue(longValues.longValue()); @@ -351,19 +351,22 @@ public boolean advanceExact(int doc) throws IOException { return false; } - if (requestedIntervalTracker != null) { - requestedIntervalTracker.clear(); - } elementaryIntervalOrd = intervalOrd; - maybeRollUp(requestedIntervalTracker); if (requestedIntervalTracker != null) { - requestedIntervalTracker.freeze(); + if (skipper != null && intervalOrd == previousIntervalOrd) { + // Same interval as the previous doc, so replay its frozen rollup instead of rebuilding. + requestedIntervalTracker.rewind(); + } else { + requestedIntervalTracker.clear(); + maybeRollUp(requestedIntervalTracker); + requestedIntervalTracker.freeze(); + previousIntervalOrd = intervalOrd; + } } return true; } - /** Mirrors {@code HistogramCollector#advanceSkipper}. */ private void advanceSkipper(int doc) throws IOException { if (doc > skipper.maxDocID(0)) { skipper.advance(doc); @@ -378,14 +381,9 @@ private void advanceSkipper(int doc) throws IOException { } upToInclusive = skipper.maxDocID(0); - // Now find the highest level where all docs have a value and map to the same interval. + // Climb to the highest level that still maps to a single interval. for (int level = 0; level < skipper.numLevels(); ++level) { - int totalDocsAtLevel = skipper.maxDocID(level) - skipper.minDocID(level) + 1; - if (skipper.docCount(level) != totalDocsAtLevel) { - // Some docs at this level have no value, so we can't resolve the whole block at once. - break; - } - // Long fields store raw values, the skipper's min/max map straight into the boundary space. + // Long fields store raw values, skipper's min/max maps straight into the boundary space. int minInterval = processValue(skipper.minValue(level)); int maxInterval = processValue(skipper.maxValue(level)); if (minInterval != maxInterval) { @@ -394,6 +392,8 @@ private void advanceSkipper(int doc) throws IOException { upToInclusive = skipper.maxDocID(level); upToSameInterval = true; upToIntervalOrd = minInterval; + int totalDocsAtLevel = skipper.maxDocID(level) - skipper.minDocID(level) + 1; + upToDense = skipper.docCount(level) == totalDocsAtLevel; } } diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/NonOverlappingLongRangeFacetCutter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/NonOverlappingLongRangeFacetCutter.java index 4bb68dea11b1..598d083b5c8a 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/NonOverlappingLongRangeFacetCutter.java +++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/NonOverlappingLongRangeFacetCutter.java @@ -70,12 +70,11 @@ List buildElementaryIntervals() { @Override public LeafFacetCutter createLeafCutter(LeafReaderContext context) throws IOException { - // Use the skip index when we can, otherwise fall back to the value source. - DocValuesSkipper skipper = maybeSkipper(context); - if (skipper != null) { - LongValues values = skipFieldValues(context); + LongValues skipFieldValues = singleValuedSkipField(context); + if (skipFieldValues != null) { + DocValuesSkipper skipper = context.reader().getDocValuesSkipper(skipField); return new NonOverlappingLongRangeSingleValueLeafFacetCutter( - values, boundaries, pos, skipper); + skipFieldValues, boundaries, pos, skipper); } if (singleValues != null) { LongValues values = singleValues.getValues(context, null); diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/OverlappingLongRangeFacetCutter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/OverlappingLongRangeFacetCutter.java index 2eff548329ae..c25023ae661b 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/OverlappingLongRangeFacetCutter.java +++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/cutters/ranges/OverlappingLongRangeFacetCutter.java @@ -149,12 +149,11 @@ private static LongRangeNode split(int start, int end, List elem @Override public LeafFacetCutter createLeafCutter(LeafReaderContext context) throws IOException { - // Use the skip index when we can, otherwise fall back to the value source. - DocValuesSkipper skipper = maybeSkipper(context); - if (skipper != null) { - LongValues values = skipFieldValues(context); + LongValues skipFieldValues = singleValuedSkipField(context); + if (skipFieldValues != null) { + DocValuesSkipper skipper = context.reader().getDocValuesSkipper(skipField); return new OverlappingSingleValuedRangeLeafFacetCutter( - values, boundaries, pos, requestedRangeCount, root, skipper); + skipFieldValues, boundaries, pos, requestedRangeCount, root, skipper); } if (singleValues != null) { LongValues values = singleValues.getValues(context, null); diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/utils/RangeFacetBuilderFactory.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/utils/RangeFacetBuilderFactory.java index 4caccc4b07e4..05ab7a315c55 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/utils/RangeFacetBuilderFactory.java +++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/utils/RangeFacetBuilderFactory.java @@ -35,7 +35,6 @@ private RangeFacetBuilderFactory() {} /** Request long range facets for numeric field by name. */ public static CommonFacetBuilder forLongRanges(String field, LongRange... ranges) { - // Pass the field by name so we can use its skip index when present. return new CommonFacetBuilder( field, LongRangeFacetCutter.create(field, ranges), new RangeOrdToLabel(ranges)) .withSortByOrdinal(); diff --git a/lucene/sandbox/src/test/org/apache/lucene/sandbox/facet/TestRangeFacet.java b/lucene/sandbox/src/test/org/apache/lucene/sandbox/facet/TestRangeFacet.java index 7deb908cb93d..5eabe94d03f6 100644 --- a/lucene/sandbox/src/test/org/apache/lucene/sandbox/facet/TestRangeFacet.java +++ b/lucene/sandbox/src/test/org/apache/lucene/sandbox/facet/TestRangeFacet.java @@ -944,7 +944,6 @@ public void testSkipIndexEquivalenceExtremeValues() throws Exception { default -> TestUtil.nextLong(random(), -100, 100); }; doc.add(NumericDocValuesField.indexedField("field", v)); - doc.add(new LongPoint("point", v)); w.addDocument(doc); } @@ -998,8 +997,43 @@ public void testSkipIndexEquivalenceMultiValued() throws Exception { IOUtils.close(dir); } - // Asserts faceting "field" by name (skip index when available) matches faceting via a - // MultiLongValuesSource (no skip index), over random range sets including extreme bounds. + public void testSkipIndexEquivalenceFewValues() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(); + iwc.setIndexSort(new Sort(new SortField("field", SortField.Type.LONG, false))); + iwc.setCodec(TestUtil.alwaysDocValuesFormat(new Lucene90DocValuesFormat(4))); + RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); + + int numDocs = atLeast(1000); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + doc.add(NumericDocValuesField.indexedField("field", TestUtil.nextLong(random(), 0, 5))); + w.addDocument(doc); + } + + assertSkipIndexEquivalence(w, "few-values"); + + w.close(); + IOUtils.close(dir); + } + + public void testSingleValuedNoSkipIndex() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir); + + int numDocs = atLeast(1000); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + doc.add(new NumericDocValuesField("field", TestUtil.nextLong(random(), -100, 100))); + w.addDocument(doc); + } + + assertSkipIndexEquivalence(w, "single-valued-no-skip"); + + w.close(); + IOUtils.close(dir); + } + private void assertSkipIndexEquivalence(RandomIndexWriter w, String desc) throws IOException { IndexReader r = w.getReader(); try {