diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 80ba771596a9..3f04b8428d65 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -100,6 +100,9 @@ API Changes New Features --------------------- +* GITHUB#15794: Add DocValuesSkipper metadata for the maximum number of values + on any document in a field. (Prithvi S) + * GITHUB#15505: Upgrade snowball to 2d2e312df56f2ede014a4ffb3e91e6dea43c24be. New stemmer: PolishStemmer (and PolishSnowballAnalyzer in the stempel package) (Justas Sakalauskas, Dawid Weiss) diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java index a2c5befd1787..e8538a7f537b 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java @@ -22,6 +22,7 @@ import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.LENGTH; import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.MAXLENGTH; import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.MAXVALUE; +import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.MAXVALUECOUNT; import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.MINVALUE; import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.NUMVALUES; import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.ORDPATTERN; @@ -72,6 +73,7 @@ static class OneField { long origin; long minValue; long maxValue; + int maxValueCount; long numValues; } @@ -123,8 +125,19 @@ assert startsWith(DOCCOUNT) : "got " + scratch.get().utf8ToString() + " field=" + fieldName + " ext=" + ext; field.docCount = Integer.parseInt(stripPrefix(DOCCOUNT)); - if (dvType == DocValuesType.NUMERIC) { + readLine(); + if (startsWith(MAXVALUECOUNT)) { + field.maxValueCount = Integer.parseInt(stripPrefix(MAXVALUECOUNT)); readLine(); + } else if (field.docCount == 0) { + field.maxValueCount = 0; + } else if (dvType == DocValuesType.NUMERIC || dvType == DocValuesType.SORTED) { + field.maxValueCount = 1; + } else { + field.maxValueCount = -1; + } + + if (dvType == DocValuesType.NUMERIC) { assert startsWith(ORIGIN) : "got " + scratch.get().utf8ToString() + " field=" + fieldName + " ext=" + ext; field.origin = Long.parseLong(stripPrefix(ORIGIN)); @@ -134,7 +147,6 @@ assert startsWith(ORIGIN) field.dataStartFilePointer = data.getFilePointer(); data.seek(data.getFilePointer() + (1 + field.pattern.length() + 2) * (long) maxDoc); } else if (dvType == DocValuesType.BINARY || dvType == DocValuesType.SORTED_NUMERIC) { - readLine(); assert startsWith(MAXLENGTH); field.maxLength = Integer.parseInt(stripPrefix(MAXLENGTH)); readLine(); @@ -145,7 +157,6 @@ assert startsWith(ORIGIN) data.getFilePointer() + (9 + field.pattern.length() + field.maxLength + 2) * (long) maxDoc); } else if (dvType == DocValuesType.SORTED || dvType == DocValuesType.SORTED_SET) { - readLine(); assert startsWith(NUMVALUES); field.numValues = Long.parseLong(stripPrefix(NUMVALUES)); readLine(); @@ -898,6 +909,11 @@ public int docCount() { return field.docCount; } + @Override + public int maxValueCount() { + return field.maxValueCount; + } + @Override public int minDocID(int level) { if (doc == -1) { diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesWriter.java index 61e0d58501e0..aee1d0cb3df7 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesWriter.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesWriter.java @@ -51,6 +51,7 @@ class SimpleTextDocValuesWriter extends DocValuesConsumer { static final BytesRef MINVALUE = new BytesRef(" minvalue "); static final BytesRef MAXVALUE = new BytesRef(" maxvalue "); + static final BytesRef MAXVALUECOUNT = new BytesRef(" maxvaluecount "); static final BytesRef PATTERN = new BytesRef(" pattern "); // used for bytes @@ -115,6 +116,10 @@ public void addNumericField(FieldInfo field, DocValuesProducer valuesProducer) SimpleTextUtil.write(data, Integer.toString(numValues), scratch); SimpleTextUtil.writeNewline(data); + SimpleTextUtil.write(data, MAXVALUECOUNT); + SimpleTextUtil.write(data, Integer.toString(numValues == 0 ? 0 : 1), scratch); + SimpleTextUtil.writeNewline(data); + if (numValues != numDocs) { minValue = Math.min(minValue, 0); maxValue = Math.max(maxValue, 0); @@ -185,6 +190,11 @@ public void addBinaryField(FieldInfo field, DocValuesProducer valuesProducer) th private void doAddBinaryField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { + doAddBinaryField(field, valuesProducer, -1); + } + + private void doAddBinaryField( + FieldInfo field, DocValuesProducer valuesProducer, int maxValueCount) throws IOException { int maxLength = 0; BinaryDocValues values = valuesProducer.getBinary(field); int docCount = 0; @@ -197,6 +207,13 @@ private void doAddBinaryField(FieldInfo field, DocValuesProducer valuesProducer) SimpleTextUtil.write(data, Integer.toString(docCount), scratch); SimpleTextUtil.writeNewline(data); + SimpleTextUtil.write(data, MAXVALUECOUNT); + SimpleTextUtil.write( + data, + Integer.toString(maxValueCount == -1 ? (docCount == 0 ? 0 : 1) : maxValueCount), + scratch); + SimpleTextUtil.writeNewline(data); + // write maxLength SimpleTextUtil.write(data, MAXLENGTH); SimpleTextUtil.write(data, Integer.toString(maxLength), scratch); @@ -265,6 +282,10 @@ public void addSortedField(FieldInfo field, DocValuesProducer valuesProducer) th SimpleTextUtil.write(data, Integer.toString(docCount), scratch); SimpleTextUtil.writeNewline(data); + SimpleTextUtil.write(data, MAXVALUECOUNT); + SimpleTextUtil.write(data, Integer.toString(docCount == 0 ? 0 : 1), scratch); + SimpleTextUtil.writeNewline(data); + int valueCount = 0; int maxLength = -1; TermsEnum terms = valuesProducer.getSorted(field).termsEnum(); @@ -358,9 +379,12 @@ public void addSortedNumericField(FieldInfo field, final DocValuesProducer value long minValue = Long.MAX_VALUE; long maxValue = Long.MIN_VALUE; + int maxValueCount = 0; SortedNumericDocValues values = valuesProducer.getSortedNumeric(field); for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) { - for (int i = 0; i < values.docValueCount(); ++i) { + int valueCount = values.docValueCount(); + maxValueCount = Math.max(maxValueCount, valueCount); + for (int i = 0; i < valueCount; ++i) { long v = values.nextValue(); minValue = Math.min(minValue, v); maxValue = Math.max(maxValue, v); @@ -440,7 +464,8 @@ public BytesRef binaryValue() throws IOException { } }; } - }); + }, + maxValueCount); } @Override @@ -451,14 +476,20 @@ public void addSortedSetField(FieldInfo field, DocValuesProducer valuesProducer) writeFieldEntry(field, DocValuesType.SORTED_SET); int docCount = 0; + int maxValueCount = 0; SortedSetDocValues values = valuesProducer.getSortedSet(field); for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) { ++docCount; + maxValueCount = Math.max(maxValueCount, values.docValueCount()); } SimpleTextUtil.write(data, DOCCOUNT); SimpleTextUtil.write(data, Integer.toString(docCount), scratch); SimpleTextUtil.writeNewline(data); + SimpleTextUtil.write(data, MAXVALUECOUNT); + SimpleTextUtil.write(data, Integer.toString(maxValueCount), scratch); + SimpleTextUtil.writeNewline(data); + long valueCount = 0; int maxLength = 0; TermsEnum terms = valuesProducer.getSortedSet(field).termsEnum(); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java index 833acb26a493..208e5a81ae93 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java @@ -266,14 +266,17 @@ private void writeSkipIndex(FieldInfo field, DocValuesProducer valuesProducer) long globalMaxValue = Long.MIN_VALUE; long globalMinValue = Long.MAX_VALUE; int globalDocCount = 0; + int globalMaxValueCount = 0; int maxDocId = -1; final List accumulators = new ArrayList<>(); SkipAccumulator accumulator = null; final int maxAccumulators = 1 << (SKIP_INDEX_LEVEL_SHIFT * (SKIP_INDEX_MAX_LEVEL - 1)); for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) { + final int valueCount = values.docValueCount(); final long firstValue = values.nextValue(); + globalMaxValueCount = Math.max(globalMaxValueCount, valueCount); if (accumulator != null - && accumulator.isDone(skipIndexIntervalSize, values.docValueCount(), firstValue, doc)) { + && accumulator.isDone(skipIndexIntervalSize, valueCount, firstValue, doc)) { globalMaxValue = Math.max(globalMaxValue, accumulator.maxValue); globalMinValue = Math.min(globalMinValue, accumulator.minValue); globalDocCount += accumulator.docCount; @@ -290,7 +293,7 @@ private void writeSkipIndex(FieldInfo field, DocValuesProducer valuesProducer) } accumulator.nextDoc(doc); accumulator.accumulate(firstValue); - for (int i = 1, end = values.docValueCount(); i < end; ++i) { + for (int i = 1; i < valueCount; ++i) { accumulator.accumulate(values.nextValue()); } } @@ -310,6 +313,7 @@ private void writeSkipIndex(FieldInfo field, DocValuesProducer valuesProducer) assert globalDocCount <= maxDocId + 1; meta.writeInt(globalDocCount); meta.writeInt(maxDocId); + meta.writeInt(globalMaxValueCount); } private void writeLevels(List accumulators) throws IOException { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesFormat.java index cd4bb7ef076c..22cd7581f1db 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesFormat.java @@ -189,7 +189,8 @@ public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOExcepti static final String SKIP_INDEX_EXTENSION = "dvs"; static final int VERSION_START = 0; static final int VERSION_SKIPPER_SEPARATE_FILE = 1; - static final int VERSION_CURRENT = VERSION_SKIPPER_SEPARATE_FILE; + static final int VERSION_SKIPPER_MAX_VALUE_COUNT = 2; + static final int VERSION_CURRENT = VERSION_SKIPPER_MAX_VALUE_COUNT; // indicates docvalues type static final byte NUMERIC = 0; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java index 7c5f03373526..876ca5741050 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java @@ -108,6 +108,10 @@ final class Lucene90DocValuesProducer extends DocValuesProducer { readFields(in, state.fieldInfos); + if (version < Lucene90DocValuesFormat.VERSION_SKIPPER_MAX_VALUE_COUNT) { + inferMaxValueCounts(state.fieldInfos); + } + } catch (Throwable exception) { priorE = exception; } finally { @@ -216,6 +220,55 @@ public DocValuesProducer getMergeInstance() { true); } + private void inferMaxValueCounts(FieldInfos fieldInfos) { + for (var cursor : skippers) { + DocValuesSkipperEntry entry = cursor.value; + if (entry.maxValueCount == -1 && entry.docCount != 0) { + int fieldNumber = cursor.key; + FieldInfo info = fieldInfos.fieldInfo(fieldNumber); + int inferredMaxValueCount = -1; + if (info != null) { + switch (info.getDocValuesType()) { + case NUMERIC, SORTED -> inferredMaxValueCount = 1; + case SORTED_NUMERIC -> { + SortedNumericEntry sne = sortedNumerics.get(fieldNumber); + if (sne != null && sne.numValues == sne.numDocsWithField) { + inferredMaxValueCount = 1; + } + } + case SORTED_SET -> { + SortedSetEntry sse = sortedSets.get(fieldNumber); + if (sse != null) { + if (sse.singleValueEntry != null) { + inferredMaxValueCount = 1; + } else if (sse.ordsEntry != null + && sse.ordsEntry.numValues == sse.ordsEntry.numDocsWithField) { + inferredMaxValueCount = 1; + } + } + } + // $CASES-OMITTED$ + default -> { + // leave as -1 + } + } + } + if (inferredMaxValueCount != -1) { + skippers.put( + fieldNumber, + new DocValuesSkipperEntry( + entry.offset, + entry.length, + entry.minValue, + entry.maxValue, + entry.docCount, + entry.maxDocId, + inferredMaxValueCount)); + } + } + } + } + private void readFields(IndexInput meta, FieldInfos infos) throws IOException { for (int fieldNumber = meta.readInt(); fieldNumber != -1; fieldNumber = meta.readInt()) { FieldInfo info = infos.fieldInfo(fieldNumber); @@ -255,8 +308,15 @@ private DocValuesSkipperEntry readDocValueSkipperMeta(IndexInput meta) throws IO long minValue = meta.readLong(); int docCount = meta.readInt(); int maxDocID = meta.readInt(); + final int maxValueCount; + if (version >= Lucene90DocValuesFormat.VERSION_SKIPPER_MAX_VALUE_COUNT) { + maxValueCount = meta.readInt(); + } else { + maxValueCount = docCount == 0 ? 0 : -1; + } - return new DocValuesSkipperEntry(offset, length, minValue, maxValue, docCount, maxDocID); + return new DocValuesSkipperEntry( + offset, length, minValue, maxValue, docCount, maxDocID, maxValueCount); } private void readNumeric(IndexInput meta, NumericEntry entry) throws IOException { @@ -389,7 +449,13 @@ public void close() throws IOException { } private record DocValuesSkipperEntry( - long offset, long length, long minValue, long maxValue, int docCount, int maxDocId) {} + long offset, + long length, + long minValue, + long maxValue, + int docCount, + int maxDocId, + int maxValueCount) {} private static class NumericEntry { long[] table; @@ -2004,6 +2070,11 @@ public long maxValue() { public int docCount() { return entry.docCount; } + + @Override + public int maxValueCount() { + return entry.maxValueCount; + } }; } } diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java index e9c46b1c48e1..570adb9738da 100644 --- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java +++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java @@ -3624,6 +3624,20 @@ private static void checkDocValueSkipper(FieldInfo fi, DocValuesSkipper skipper) + " > " + skipper.maxValue()); } + if (skipper.maxValueCount() < -1) { + throw new CheckIndexException( + "skipper dv iterator for field: " + + fieldName + + " reports invalid maxValueCount, got " + + skipper.maxValueCount()); + } + if (skipper.docCount() == 0 && skipper.maxValueCount() != 0) { + throw new CheckIndexException( + "skipper dv iterator for field: " + + fieldName + + " reports maxValueCount for an empty field, got " + + skipper.maxValueCount()); + } int docCount = 0; int doc; while (true) { diff --git a/lucene/core/src/java/org/apache/lucene/index/DocValuesSkipper.java b/lucene/core/src/java/org/apache/lucene/index/DocValuesSkipper.java index 79364b001736..0dd105d6b465 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DocValuesSkipper.java +++ b/lucene/core/src/java/org/apache/lucene/index/DocValuesSkipper.java @@ -99,6 +99,18 @@ public abstract class DocValuesSkipper { /** Return the global number of documents with a value for the field. */ public abstract int docCount(); + /** + * Return the global maximum number of values that any single document has for the field. Returns + * {@code -1} if the exact value is unavailable (e.g., the segment was written by an older codec + * that did not persist this metadata and it could not be inferred from other metadata). + * + *

This returns {@code 0} if {@link #docCount()} is {@code 0}. A field is known to be + * single-valued if this method returns {@code 1}. + */ + public int maxValueCount() { + return docCount() == 0 ? 0 : -1; + } + /** * Advance this skipper so that all levels intersects the range given by {@code minValue} and * {@code maxValue}. If there are no intersecting levels, the skipper is exhausted. diff --git a/lucene/core/src/test/org/apache/lucene/search/BaseDocValuesSkipperTests.java b/lucene/core/src/test/org/apache/lucene/search/BaseDocValuesSkipperTests.java index cfcd465d6427..12a4f2d757a2 100644 --- a/lucene/core/src/test/org/apache/lucene/search/BaseDocValuesSkipperTests.java +++ b/lucene/core/src/test/org/apache/lucene/search/BaseDocValuesSkipperTests.java @@ -186,6 +186,11 @@ public long maxValue() { public int docCount() { return 1024 + 1024 / 2; } + + @Override + public int maxValueCount() { + return 1; + } }; } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/index/AssertingLeafReader.java b/lucene/test-framework/src/java/org/apache/lucene/tests/index/AssertingLeafReader.java index 8964507556b9..e4ec9c1145bd 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/index/AssertingLeafReader.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/index/AssertingLeafReader.java @@ -1405,6 +1405,12 @@ public int minDocID(int level) { return minDocID; } + @Override + public int maxValueCount() { + assertThread("Doc values skipper", creationThread); + return in.maxValueCount(); + } + @Override public int maxDocID(int level) { assertThread("Doc values skipper", creationThread); diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseDocValuesFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseDocValuesFormatTestCase.java index 12ccecfaeaf7..f689a7bd6911 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseDocValuesFormatTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseDocValuesFormatTestCase.java @@ -107,6 +107,7 @@ public void testSortedMergeAwayAllValuesWithSkipper() throws IOException { DocValuesSkipper skipper = getOnlyLeafReader(ireader).getDocValuesSkipper("field"); assertEquals(0, skipper.docCount()); + assertEquals(0, skipper.maxValueCount()); skipper.advance(0); assertEquals(NO_MORE_DOCS, skipper.minDocID(0)); @@ -145,6 +146,7 @@ public void testSortedSetMergeAwayAllValuesWithSkipper() throws IOException { DocValuesSkipper skipper = getOnlyLeafReader(ireader).getDocValuesSkipper("field"); assertEquals(0, skipper.docCount()); + assertEquals(0, skipper.maxValueCount()); skipper.advance(0); assertEquals(NO_MORE_DOCS, skipper.minDocID(0)); @@ -183,6 +185,7 @@ public void testNumberMergeAwayAllValuesWithSkipper() throws IOException { DocValuesSkipper skipper = getOnlyLeafReader(ireader).getDocValuesSkipper("field"); assertEquals(0, skipper.docCount()); + assertEquals(0, skipper.maxValueCount()); skipper.advance(0); assertEquals(NO_MORE_DOCS, skipper.minDocID(0)); @@ -216,6 +219,7 @@ public void testSortedNumberMergeAwayAllValuesWithSkipper() throws IOException { DocValuesSkipper skipper = getOnlyLeafReader(ireader).getDocValuesSkipper("field"); assertEquals(0, skipper.docCount()); + assertEquals(0, skipper.maxValueCount()); skipper.advance(0); assertEquals(NO_MORE_DOCS, skipper.minDocID(0)); @@ -251,6 +255,7 @@ public void testSortedMergeAwayAllValuesLargeSegmentWithSkipper() throws IOExcep DocValuesSkipper skipper = getOnlyLeafReader(ireader).getDocValuesSkipper("field"); assertEquals(0, skipper.docCount()); + assertEquals(0, skipper.maxValueCount()); skipper.advance(0); assertEquals(NO_MORE_DOCS, skipper.minDocID(0)); @@ -291,6 +296,7 @@ public void testSortedSetMergeAwayAllValuesLargeSegmentWithSkipper() throws IOEx DocValuesSkipper skipper = getOnlyLeafReader(ireader).getDocValuesSkipper("field"); assertEquals(0, skipper.docCount()); + assertEquals(0, skipper.maxValueCount()); skipper.advance(0); assertEquals(NO_MORE_DOCS, skipper.minDocID(0)); @@ -331,6 +337,7 @@ public void testNumericMergeAwayAllValuesLargeSegmentWithSkipper() throws IOExce DocValuesSkipper skipper = getOnlyLeafReader(ireader).getDocValuesSkipper("field"); assertEquals(0, skipper.docCount()); + assertEquals(0, skipper.maxValueCount()); skipper.advance(0); assertEquals(NO_MORE_DOCS, skipper.minDocID(0)); @@ -367,6 +374,7 @@ public void testSortedNumericMergeAwayAllValuesLargeSegmentWithSkipper() throws DocValuesSkipper skipper = getOnlyLeafReader(ireader).getDocValuesSkipper("field"); assertEquals(0, skipper.docCount()); + assertEquals(0, skipper.maxValueCount()); skipper.advance(0); assertEquals(NO_MORE_DOCS, skipper.minDocID(0)); @@ -374,6 +382,41 @@ public void testSortedNumericMergeAwayAllValuesLargeSegmentWithSkipper() throws directory.close(); } + public void testMaxValueCountWithSkipper() throws IOException { + Directory directory = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), directory); + + Document doc = new Document(); + doc.add(NumericDocValuesField.indexedField("numeric", 1)); + doc.add(SortedDocValuesField.indexedField("sorted", newBytesRef("a"))); + doc.add(SortedNumericDocValuesField.indexedField("sorted_numeric", 1)); + doc.add(SortedSetDocValuesField.indexedField("sorted_set", newBytesRef("a"))); + writer.addDocument(doc); + + doc = new Document(); + doc.add(NumericDocValuesField.indexedField("numeric", 2)); + doc.add(SortedDocValuesField.indexedField("sorted", newBytesRef("b"))); + doc.add(SortedNumericDocValuesField.indexedField("sorted_numeric", 1)); + doc.add(SortedNumericDocValuesField.indexedField("sorted_numeric", 2)); + doc.add(SortedNumericDocValuesField.indexedField("sorted_numeric", 3)); + doc.add(SortedSetDocValuesField.indexedField("sorted_set", newBytesRef("a"))); + doc.add(SortedSetDocValuesField.indexedField("sorted_set", newBytesRef("b"))); + writer.addDocument(doc); + + writer.forceMerge(1); + DirectoryReader reader = writer.getReader(); + writer.close(); + + LeafReader leafReader = getOnlyLeafReader(reader); + assertEquals(1, leafReader.getDocValuesSkipper("numeric").maxValueCount()); + assertEquals(1, leafReader.getDocValuesSkipper("sorted").maxValueCount()); + assertEquals(3, leafReader.getDocValuesSkipper("sorted_numeric").maxValueCount()); + assertEquals(2, leafReader.getDocValuesSkipper("sorted_set").maxValueCount()); + + reader.close(); + directory.close(); + } + public void testNumericDocValuesWithSkipperSmall() throws Exception { doTestNumericDocValuesWithSkipper(random().nextInt(1, 1000)); } @@ -425,6 +468,11 @@ public long minValue() throws IOException { public int docID() { return numericDocValues.docID(); } + + @Override + public int docValueCount() { + return 1; + } }; } @@ -509,6 +557,11 @@ public long minValue() { public int docID() { return sortedNumericDocValues.docID(); } + + @Override + public int docValueCount() { + return sortedNumericDocValues.docValueCount(); + } }; } @@ -570,6 +623,11 @@ public long minValue() throws IOException { public int docID() { return sortedDocValues.docID(); } + + @Override + public int docValueCount() { + return 1; + } }; } @@ -655,6 +713,11 @@ public long minValue() { public int docID() { return sortedSetDocValues.docID(); } + + @Override + public int docValueCount() { + return sortedSetDocValues.docValueCount(); + } }; } @@ -731,6 +794,7 @@ private int assertDocValuesSkipSequential(DocValuesWrapper iterator, DocValuesSk iterator.advance(0); int docCount = 0; + int maxValueCount = 0; while (true) { int previousMaxDoc = skipper.maxDocID(0); skipper.advance(previousMaxDoc + 1); @@ -757,6 +821,7 @@ private int assertDocValuesSkipSequential(DocValuesWrapper iterator, DocValuesSk maxDoc = Math.max(maxDoc, iterator.docID()); minVal = Math.min(minVal, iterator.minValue()); maxVal = Math.max(maxVal, iterator.maxValue()); + maxValueCount = Math.max(maxValueCount, iterator.docValueCount()); iterator.advance(iterator.docID() + 1); } if (skipperHasAccurateDocBounds()) { @@ -788,6 +853,7 @@ private int assertDocValuesSkipSequential(DocValuesWrapper iterator, DocValuesSk } assertEquals(docCount, skipper.docCount()); + assertEquals(maxValueCount, skipper.maxValueCount()); return docCount; } @@ -836,6 +902,8 @@ private interface DocValuesWrapper { long minValue() throws IOException; int docID(); + + int docValueCount(); } public void testMismatchedFields() throws Exception {