From 36aec7baad3427c4580f8a8903170efc22d3dc9e Mon Sep 17 00:00:00 2001 From: Ankit Jain Date: Thu, 19 Feb 2026 22:27:38 -0800 Subject: [PATCH 1/7] Adding configuration files to gitignore Signed-off-by: Ankit Jain --- .gitignore | 4 ++++ .vscode/extensions.json | 10 ---------- .vscode/settings.json | 13 ------------- 3 files changed, 4 insertions(+), 23 deletions(-) delete mode 100644 .vscode/extensions.json delete mode 100644 .vscode/settings.json diff --git a/.gitignore b/.gitignore index 5acb86f4a8d8..99fbb00205c7 100644 --- a/.gitignore +++ b/.gitignore @@ -21,6 +21,10 @@ gradle/wrapper/gradle-wrapper.jar /.classpath /.settings/ +# Kiro +.vscode +.kiro + # Eclipse Gradle oddity **/bin/default/ diff --git a/.vscode/extensions.json b/.vscode/extensions.json deleted file mode 100644 index 5371eaa8630d..000000000000 --- a/.vscode/extensions.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "recommendations": [ - "redhat.java", - "editorconfig.editorconfig" - ], - "unwantedRecommendations": [ - "vscjava.vscode-java-pack", - "vscjava.vscode-gradle" - ] -} diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 1aab8989122c..000000000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "java.server.launchMode": "Standard", - "java.import.gradle.enabled": false, - "java.import.maven.enabled": false, - "java.jdt.ls.lombokSupport.enabled": false, - "java.compile.nullAnalysis.mode": "disabled", - "java.completion.maxResults": 500, - "java.completion.favoriteStaticMembers": [ "none.*" ], - "java.inlayHints.parameterNames.enabled": "all", - "java.inlayHints.parameterTypes.enabled": true, - "java.inlayHints.variableTypes.enabled": true, - "redhat.telemetry.enabled": false -} From 4addc3707b2990a3d9abf8402f5c7d40fff126aa Mon Sep 17 00:00:00 2001 From: Ankit Jain Date: Thu, 19 Feb 2026 22:57:07 -0800 Subject: [PATCH 2/7] Adding logic for sum and value count in DocValuesSkipper Signed-off-by: Ankit Jain --- .../simpletext/SimpleTextDocValuesReader.java | 33 +++++++++++ .../lucene90/Lucene90DocValuesConsumer.java | 57 +++++++++++++++++++ .../lucene90/Lucene90DocValuesFormat.java | 3 +- .../lucene90/Lucene90DocValuesProducer.java | 52 ++++++++++++++++- .../apache/lucene/index/DocValuesSkipper.java | 50 ++++++++++++++++ .../search/BaseDocValuesSkipperTests.java | 30 ++++++++++ .../tests/index/AssertingLeafReader.java | 42 ++++++++++++++ 7 files changed, 264 insertions(+), 3 deletions(-) diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java index 6ecd8b5a08ed..a2954d661120 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java @@ -898,6 +898,39 @@ public int docCount() { return field.docCount; } + @Override + public long sumLow(int level) { + return sumLow(); + } + + @Override + public long sumHigh(int level) { + return sumHigh(); + } + + @Override + public long valueCount(int level) { + return valueCount(); + } + + @Override + public long sumLow() { + // SimpleText doesn't store pre-aggregated sums + return 0; + } + + @Override + public long sumHigh() { + // SimpleText doesn't store pre-aggregated sums + return 0; + } + + @Override + public long valueCount() { + // SimpleText doesn't store pre-aggregated value counts + return 0; + } + @Override public int minDocID(int level) { if (doc == -1) { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java index 8223320dfcc7..4fe1935d8b58 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java @@ -194,12 +194,18 @@ private static class SkipAccumulator { int docCount; long minValue; long maxValue; + long sumHigh; + long sumLow; + long valueCount; SkipAccumulator(int docID) { minDocID = docID; minValue = Long.MAX_VALUE; maxValue = Long.MIN_VALUE; docCount = 0; + sumHigh = 0; + sumLow = 0; + valueCount = 0; } boolean isDone(int skipIndexIntervalSize, int valueCount, long nextValue, int nextDoc) { @@ -219,6 +225,20 @@ boolean isDone(int skipIndexIntervalSize, int valueCount, long nextValue, int ne void accumulate(long value) { minValue = Math.min(minValue, value); maxValue = Math.max(maxValue, value); + // 128-bit addition: add a signed long to (sumHigh, sumLow) + long newLow = sumLow + value; + // Detect carry/borrow using unsigned overflow detection + if (value >= 0) { + if (Long.compareUnsigned(newLow, sumLow) < 0) { + sumHigh++; + } + } else { + if (Long.compareUnsigned(newLow, sumLow) >= 0) { + sumHigh--; + } + } + sumLow = newLow; + valueCount++; } void accumulate(SkipAccumulator other) { @@ -227,6 +247,16 @@ void accumulate(SkipAccumulator other) { minValue = Math.min(minValue, other.minValue); maxValue = Math.max(maxValue, other.maxValue); docCount += other.docCount; + // 128-bit addition: add (other.sumHigh, other.sumLow) to (sumHigh, sumLow) + long newLow = sumLow + other.sumLow; + if (Long.compareUnsigned(newLow, sumLow) < 0 + || Long.compareUnsigned(newLow, other.sumLow) < 0) { + // unsigned overflow means carry + sumHigh++; + } + sumLow = newLow; + sumHigh += other.sumHigh; + valueCount += other.valueCount; } void nextDoc(int docID) { @@ -251,6 +281,9 @@ private void writeSkipIndex(FieldInfo field, DocValuesProducer valuesProducer) long globalMaxValue = Long.MIN_VALUE; long globalMinValue = Long.MAX_VALUE; int globalDocCount = 0; + long globalSumHigh = 0; + long globalSumLow = 0; + long globalValueCount = 0; int maxDocId = -1; final List accumulators = new ArrayList<>(); SkipAccumulator accumulator = null; @@ -262,6 +295,15 @@ private void writeSkipIndex(FieldInfo field, DocValuesProducer valuesProducer) globalMaxValue = Math.max(globalMaxValue, accumulator.maxValue); globalMinValue = Math.min(globalMinValue, accumulator.minValue); globalDocCount += accumulator.docCount; + // 128-bit addition for global sum + long newLow = globalSumLow + accumulator.sumLow; + if (Long.compareUnsigned(newLow, globalSumLow) < 0 + || Long.compareUnsigned(newLow, accumulator.sumLow) < 0) { + globalSumHigh++; + } + globalSumLow = newLow; + globalSumHigh += accumulator.sumHigh; + globalValueCount += accumulator.valueCount; maxDocId = accumulator.maxDocID; accumulator = null; if (accumulators.size() == maxAccumulators) { @@ -284,6 +326,15 @@ private void writeSkipIndex(FieldInfo field, DocValuesProducer valuesProducer) globalMaxValue = Math.max(globalMaxValue, accumulator.maxValue); globalMinValue = Math.min(globalMinValue, accumulator.minValue); globalDocCount += accumulator.docCount; + // 128-bit addition for global sum + long newLow = globalSumLow + accumulator.sumLow; + if (Long.compareUnsigned(newLow, globalSumLow) < 0 + || Long.compareUnsigned(newLow, accumulator.sumLow) < 0) { + globalSumHigh++; + } + globalSumLow = newLow; + globalSumHigh += accumulator.sumHigh; + globalValueCount += accumulator.valueCount; maxDocId = accumulator.maxDocID; writeLevels(accumulators); } @@ -295,6 +346,9 @@ private void writeSkipIndex(FieldInfo field, DocValuesProducer valuesProducer) assert globalDocCount <= maxDocId + 1; meta.writeInt(globalDocCount); meta.writeInt(maxDocId); + meta.writeLong(globalSumHigh); + meta.writeLong(globalSumLow); + meta.writeLong(globalValueCount); } private void writeLevels(List accumulators) throws IOException { @@ -319,6 +373,9 @@ private void writeLevels(List accumulators) throws IOException data.writeLong(accumulator.maxValue); data.writeLong(accumulator.minValue); data.writeInt(accumulator.docCount); + data.writeLong(accumulator.sumHigh); + data.writeLong(accumulator.sumLow); + data.writeLong(accumulator.valueCount); } } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesFormat.java index 922bc2481c99..1f3cd3a8f4a9 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesFormat.java @@ -201,7 +201,8 @@ public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOExcepti // * 16 bytes: min / max value, // * 8 bytes: min / max docID // * 4 bytes: number of documents - private static final long SKIP_INDEX_INTERVAL_BYTES = 29L; + // * 24 bytes: sum (high + low) and value count + private static final long SKIP_INDEX_INTERVAL_BYTES = 53L; // number of intervals represented as a shift to create a new level, this is 1 << 3 == 8 // intervals. static final int SKIP_INDEX_LEVEL_SHIFT = 3; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java index ea748fbc8078..a88e448a3011 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java @@ -219,8 +219,12 @@ private DocValuesSkipperEntry readDocValueSkipperMeta(IndexInput meta) throws IO long minValue = meta.readLong(); int docCount = meta.readInt(); int maxDocID = meta.readInt(); + long sumHigh = meta.readLong(); + long sumLow = meta.readLong(); + long valueCount = meta.readLong(); - return new DocValuesSkipperEntry(offset, length, minValue, maxValue, docCount, maxDocID); + return new DocValuesSkipperEntry( + offset, length, minValue, maxValue, docCount, maxDocID, sumHigh, sumLow, valueCount); } private void readNumeric(IndexInput meta, NumericEntry entry) throws IOException { @@ -353,7 +357,15 @@ public void close() throws IOException { } private record DocValuesSkipperEntry( - long offset, long length, long minValue, long maxValue, int docCount, int maxDocId) {} + long offset, + long length, + long minValue, + long maxValue, + int docCount, + int maxDocId, + long sumHigh, + long sumLow, + long valueCount) {} private static class NumericEntry { long[] table; @@ -1884,6 +1896,9 @@ public DocValuesSkipper getSkipper(FieldInfo field) throws IOException { final long[] minValue = new long[SKIP_INDEX_MAX_LEVEL]; final long[] maxValue = new long[SKIP_INDEX_MAX_LEVEL]; final int[] docCount = new int[SKIP_INDEX_MAX_LEVEL]; + final long[] sumHigh = new long[SKIP_INDEX_MAX_LEVEL]; + final long[] sumLow = new long[SKIP_INDEX_MAX_LEVEL]; + final long[] valueCount = new long[SKIP_INDEX_MAX_LEVEL]; int levels = 1; @Override @@ -1913,6 +1928,9 @@ public void advance(int target) throws IOException { maxValue[level] = input.readLong(); minValue[level] = input.readLong(); docCount[level] = input.readInt(); + sumHigh[level] = input.readLong(); + sumLow[level] = input.readLong(); + valueCount[level] = input.readLong(); } if (valid) { // adjust levels @@ -1955,6 +1973,21 @@ public int docCount(int level) { return docCount[level]; } + @Override + public long sumLow(int level) { + return sumLow[level]; + } + + @Override + public long sumHigh(int level) { + return sumHigh[level]; + } + + @Override + public long valueCount(int level) { + return valueCount[level]; + } + @Override public long minValue() { return entry.minValue; @@ -1969,6 +2002,21 @@ public long maxValue() { public int docCount() { return entry.docCount; } + + @Override + public long sumLow() { + return entry.sumLow; + } + + @Override + public long sumHigh() { + return entry.sumHigh; + } + + @Override + public long valueCount() { + return entry.valueCount; + } }; } } diff --git a/lucene/core/src/java/org/apache/lucene/index/DocValuesSkipper.java b/lucene/core/src/java/org/apache/lucene/index/DocValuesSkipper.java index 79364b001736..a72c3adee220 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DocValuesSkipper.java +++ b/lucene/core/src/java/org/apache/lucene/index/DocValuesSkipper.java @@ -80,6 +80,32 @@ public abstract class DocValuesSkipper { */ public abstract int docCount(int level); + /** + * Return the lower 64 bits of the 128-bit sum of all values in the interval at the given level. + * Use together with {@link #sumHigh(int)} to reconstruct the full 128-bit sum. + * + *

NOTE: For multi-valued fields, this includes all values from all documents in the + * interval. The sum is stored as a signed 128-bit integer to avoid overflow. + */ + public abstract long sumLow(int level); + + /** + * Return the upper 64 bits of the 128-bit sum of all values in the interval at the given level. + * Use together with {@link #sumLow(int)} to reconstruct the full 128-bit sum. + * + *

NOTE: For multi-valued fields, this includes all values from all documents in the + * interval. The sum is stored as a signed 128-bit integer to avoid overflow. + */ + public abstract long sumHigh(int level); + + /** + * Return the total count of values (not documents) in the interval at the given level. + * + *

NOTE: For single-valued fields, this equals {@link #docCount(int)}. For + * multi-valued fields, this is the sum of value counts across all documents. + */ + public abstract long valueCount(int level); + /** * Return the global minimum value. * @@ -99,6 +125,30 @@ public abstract class DocValuesSkipper { /** Return the global number of documents with a value for the field. */ public abstract int docCount(); + /** + * Return the lower 64 bits of the global 128-bit sum of all values. Use together with {@link + * #sumHigh()} to reconstruct the full 128-bit sum. + * + *

NOTE: For multi-valued fields, this includes all values from all documents. + */ + public abstract long sumLow(); + + /** + * Return the upper 64 bits of the global 128-bit sum of all values. Use together with {@link + * #sumLow()} to reconstruct the full 128-bit sum. + * + *

NOTE: For multi-valued fields, this includes all values from all documents. + */ + public abstract long sumHigh(); + + /** + * Return the global total count of values (not documents). + * + *

NOTE: For single-valued fields, this equals {@link #docCount()}. For multi-valued + * fields, this is the sum of value counts across all documents. + */ + public abstract long valueCount(); + /** * Advance this skipper so that all levels intersects the range given by {@code minValue} and * {@code maxValue}. If there are no intersecting levels, the skipper is exhausted. diff --git a/lucene/core/src/test/org/apache/lucene/search/BaseDocValuesSkipperTests.java b/lucene/core/src/test/org/apache/lucene/search/BaseDocValuesSkipperTests.java index 6e1f5430e0e7..3b199252f1f3 100644 --- a/lucene/core/src/test/org/apache/lucene/search/BaseDocValuesSkipperTests.java +++ b/lucene/core/src/test/org/apache/lucene/search/BaseDocValuesSkipperTests.java @@ -191,6 +191,36 @@ public long maxValue() { public int docCount() { return 1024 + 1024 / 2; } + + @Override + public long sumLow(int level) { + return 0; + } + + @Override + public long sumHigh(int level) { + return 0; + } + + @Override + public long valueCount(int level) { + return docCount(level); + } + + @Override + public long sumLow() { + return 0; + } + + @Override + public long sumHigh() { + return 0; + } + + @Override + public long valueCount() { + return docCount(); + } }; } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/index/AssertingLeafReader.java b/lucene/test-framework/src/java/org/apache/lucene/tests/index/AssertingLeafReader.java index ab4b3b143aab..474fdaed5849 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/index/AssertingLeafReader.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/index/AssertingLeafReader.java @@ -1442,6 +1442,30 @@ public int docCount(int level) { return in.docCount(level); } + @Override + public long sumLow(int level) { + assertThread("Doc values skipper", creationThread); + assert iterating() : "Unpositioned iterator"; + Objects.checkIndex(level, numLevels()); + return in.sumLow(level); + } + + @Override + public long sumHigh(int level) { + assertThread("Doc values skipper", creationThread); + assert iterating() : "Unpositioned iterator"; + Objects.checkIndex(level, numLevels()); + return in.sumHigh(level); + } + + @Override + public long valueCount(int level) { + assertThread("Doc values skipper", creationThread); + assert iterating() : "Unpositioned iterator"; + Objects.checkIndex(level, numLevels()); + return in.valueCount(level); + } + @Override public long minValue() { assertThread("Doc values skipper", creationThread); @@ -1459,6 +1483,24 @@ public int docCount() { assertThread("Doc values skipper", creationThread); return in.docCount(); } + + @Override + public long sumLow() { + assertThread("Doc values skipper", creationThread); + return in.sumLow(); + } + + @Override + public long sumHigh() { + assertThread("Doc values skipper", creationThread); + return in.sumHigh(); + } + + @Override + public long valueCount() { + assertThread("Doc values skipper", creationThread); + return in.valueCount(); + } } /** Wraps a SortedSetDocValues but with additional asserts */ From 176445e5b1c1e3a426a9acb4f37506cef8911740 Mon Sep 17 00:00:00 2001 From: Ankit Jain Date: Thu, 19 Feb 2026 22:57:42 -0800 Subject: [PATCH 3/7] Adding unit tests for sum and value count logic Signed-off-by: Ankit Jain --- .../TestDocValuesSkipperSumAndValueCount.java | 397 ++++++++++++++++++ 1 file changed, 397 insertions(+) create mode 100644 lucene/core/src/test/org/apache/lucene/index/TestDocValuesSkipperSumAndValueCount.java diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDocValuesSkipperSumAndValueCount.java b/lucene/core/src/test/org/apache/lucene/index/TestDocValuesSkipperSumAndValueCount.java new file mode 100644 index 000000000000..d0f3c397f82c --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/index/TestDocValuesSkipperSumAndValueCount.java @@ -0,0 +1,397 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.index; + +import java.io.IOException; +import java.math.BigInteger; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.index.RandomIndexWriter; +import org.apache.lucene.tests.util.LuceneTestCase; + +/** Tests for sum and valueCount in {@link DocValuesSkipper}. */ +public class TestDocValuesSkipperSumAndValueCount extends LuceneTestCase { + + /** Helper to reconstruct a 128-bit BigInteger from high/low longs. */ + private static BigInteger toBigInteger(long high, long low) { + return BigInteger.valueOf(high) + .shiftLeft(64) + .add(BigInteger.valueOf(low).and(new BigInteger("FFFFFFFFFFFFFFFF", 16))); + } + + public void testSingleValuedNumericSum() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig config = newIndexWriterConfig(); + config.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config); + + long expectedSum = 0; + int numDocs = atLeast(500); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + long value = random().nextInt(1000); + doc.add(NumericDocValuesField.indexedField("field", value)); + expectedSum += value; + writer.addDocument(doc); + } + writer.forceMerge(1); + + DirectoryReader reader = writer.getReader(); + LeafReader leafReader = getOnlyLeafReader(reader); + DocValuesSkipper skipper = leafReader.getDocValuesSkipper("field"); + assertNotNull(skipper); + + // Check global values + assertEquals(numDocs, skipper.docCount()); + assertEquals(numDocs, skipper.valueCount()); + BigInteger globalSum = toBigInteger(skipper.sumHigh(), skipper.sumLow()); + assertEquals(BigInteger.valueOf(expectedSum), globalSum); + + // Walk through level 0 intervals and verify sum/valueCount add up + long runningSum = 0; + long runningValueCount = 0; + int runningDocCount = 0; + skipper.advance(0); + while (skipper.minDocID(0) != DocIdSetIterator.NO_MORE_DOCS) { + BigInteger intervalSum = toBigInteger(skipper.sumHigh(0), skipper.sumLow(0)); + long intervalValueCount = skipper.valueCount(0); + int intervalDocCount = skipper.docCount(0); + + assertTrue("valueCount must be >= docCount", intervalValueCount >= intervalDocCount); + runningSum += intervalSum.longValueExact(); + runningValueCount += intervalValueCount; + runningDocCount += intervalDocCount; + + skipper.advance(skipper.maxDocID(0) + 1); + } + assertEquals(expectedSum, runningSum); + assertEquals(numDocs, runningValueCount); + assertEquals(numDocs, runningDocCount); + + reader.close(); + writer.close(); + dir.close(); + } + + public void testMultiValuedNumericSum() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig config = newIndexWriterConfig(); + config.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config); + + long expectedSum = 0; + long expectedValueCount = 0; + int numDocs = atLeast(500); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + int numValues = random().nextInt(1, 5); + for (int j = 0; j < numValues; j++) { + long value = random().nextInt(1000); + doc.add(SortedNumericDocValuesField.indexedField("field", value)); + expectedSum += value; + expectedValueCount++; + } + writer.addDocument(doc); + } + writer.forceMerge(1); + + DirectoryReader reader = writer.getReader(); + LeafReader leafReader = getOnlyLeafReader(reader); + DocValuesSkipper skipper = leafReader.getDocValuesSkipper("field"); + assertNotNull(skipper); + + // Global checks + assertEquals(numDocs, skipper.docCount()); + assertEquals(expectedValueCount, skipper.valueCount()); + BigInteger globalSum = toBigInteger(skipper.sumHigh(), skipper.sumLow()); + assertEquals(BigInteger.valueOf(expectedSum), globalSum); + + // Walk level 0 and verify valueCount > docCount for multi-valued + long totalValueCount = 0; + skipper.advance(0); + while (skipper.minDocID(0) != DocIdSetIterator.NO_MORE_DOCS) { + totalValueCount += skipper.valueCount(0); + skipper.advance(skipper.maxDocID(0) + 1); + } + assertEquals(expectedValueCount, totalValueCount); + + reader.close(); + writer.close(); + dir.close(); + } + + public void testSumWithNegativeValues() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig config = newIndexWriterConfig(); + config.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config); + + long expectedSum = 0; + int numDocs = atLeast(500); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + long value = random().nextLong(-1000, 1000); + doc.add(NumericDocValuesField.indexedField("field", value)); + expectedSum += value; + writer.addDocument(doc); + } + writer.forceMerge(1); + + DirectoryReader reader = writer.getReader(); + LeafReader leafReader = getOnlyLeafReader(reader); + DocValuesSkipper skipper = leafReader.getDocValuesSkipper("field"); + assertNotNull(skipper); + + BigInteger globalSum = toBigInteger(skipper.sumHigh(), skipper.sumLow()); + assertEquals(BigInteger.valueOf(expectedSum), globalSum); + + reader.close(); + writer.close(); + dir.close(); + } + + public void testSumOverflow128Bit() throws Exception { + // Test that 128-bit sum handles values near Long.MAX_VALUE without overflow + Directory dir = newDirectory(); + IndexWriterConfig config = newIndexWriterConfig(); + config.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config); + + BigInteger expectedSum = BigInteger.ZERO; + int numDocs = atLeast(500); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + // Use values near Long.MAX_VALUE to force overflow of a single long + long value = Long.MAX_VALUE - random().nextInt(100); + doc.add(NumericDocValuesField.indexedField("field", value)); + expectedSum = expectedSum.add(BigInteger.valueOf(value)); + writer.addDocument(doc); + } + writer.forceMerge(1); + + DirectoryReader reader = writer.getReader(); + LeafReader leafReader = getOnlyLeafReader(reader); + DocValuesSkipper skipper = leafReader.getDocValuesSkipper("field"); + assertNotNull(skipper); + + BigInteger globalSum = toBigInteger(skipper.sumHigh(), skipper.sumLow()); + assertEquals( + "128-bit sum should handle overflow beyond Long.MAX_VALUE", expectedSum, globalSum); + + // Verify sumHigh is non-zero (overflow actually happened) + assertTrue( + "sumHigh should be non-zero for large sums exceeding Long.MAX_VALUE", + skipper.sumHigh() > 0); + + reader.close(); + writer.close(); + dir.close(); + } + + public void testSumOverflowNegative128Bit() throws Exception { + // Test that 128-bit sum handles values near Long.MIN_VALUE + Directory dir = newDirectory(); + IndexWriterConfig config = newIndexWriterConfig(); + config.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config); + + BigInteger expectedSum = BigInteger.ZERO; + int numDocs = atLeast(500); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + long value = Long.MIN_VALUE + random().nextInt(100); + doc.add(NumericDocValuesField.indexedField("field", value)); + expectedSum = expectedSum.add(BigInteger.valueOf(value)); + writer.addDocument(doc); + } + writer.forceMerge(1); + + DirectoryReader reader = writer.getReader(); + LeafReader leafReader = getOnlyLeafReader(reader); + DocValuesSkipper skipper = leafReader.getDocValuesSkipper("field"); + assertNotNull(skipper); + + BigInteger globalSum = toBigInteger(skipper.sumHigh(), skipper.sumLow()); + assertEquals( + "128-bit sum should handle negative overflow beyond Long.MIN_VALUE", + expectedSum, + globalSum); + + assertTrue( + "sumHigh should be negative for large negative sums", skipper.sumHigh() < 0); + + reader.close(); + writer.close(); + dir.close(); + } + + public void testSumAndValueCountConsistencyAcrossLevels() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig config = newIndexWriterConfig(); + config.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config); + + // Need enough docs to get multiple levels in the skip index + int numDocs = atLeast(10000); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + doc.add(NumericDocValuesField.indexedField("field", random().nextInt(10000))); + writer.addDocument(doc); + } + writer.forceMerge(1); + + DirectoryReader reader = writer.getReader(); + LeafReader leafReader = getOnlyLeafReader(reader); + DocValuesSkipper skipper = leafReader.getDocValuesSkipper("field"); + assertNotNull(skipper); + + // Advance and check that higher levels contain sums that are >= level 0 sums + skipper.advance(0); + while (skipper.minDocID(0) != DocIdSetIterator.NO_MORE_DOCS) { + int numLevels = skipper.numLevels(); + for (int level = 1; level < numLevels; level++) { + // Higher level should have >= valueCount than lower level + assertTrue( + "Higher level valueCount should be >= lower level", + skipper.valueCount(level) >= skipper.valueCount(level - 1)); + assertTrue( + "Higher level docCount should be >= lower level", + skipper.docCount(level) >= skipper.docCount(level - 1)); + } + skipper.advance(skipper.maxDocID(0) + 1); + } + + reader.close(); + writer.close(); + dir.close(); + } + + public void testEmptyField() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig config = newIndexWriterConfig(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config); + + // Add docs without the field + for (int i = 0; i < 100; i++) { + writer.addDocument(new Document()); + } + // Add one doc with the field to create the field info + Document doc = new Document(); + doc.add(NumericDocValuesField.indexedField("field", 42)); + writer.addDocument(doc); + writer.forceMerge(1); + + DirectoryReader reader = writer.getReader(); + LeafReader leafReader = getOnlyLeafReader(reader); + DocValuesSkipper skipper = leafReader.getDocValuesSkipper("field"); + assertNotNull(skipper); + + assertEquals(1, skipper.docCount()); + assertEquals(1, skipper.valueCount()); + BigInteger globalSum = toBigInteger(skipper.sumHigh(), skipper.sumLow()); + assertEquals(BigInteger.valueOf(42), globalSum); + + reader.close(); + writer.close(); + dir.close(); + } + + public void testAllSameValues() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig config = newIndexWriterConfig(); + config.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config); + + long value = random().nextLong(-1000, 1000); + int numDocs = atLeast(500); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + doc.add(NumericDocValuesField.indexedField("field", value)); + writer.addDocument(doc); + } + writer.forceMerge(1); + + DirectoryReader reader = writer.getReader(); + LeafReader leafReader = getOnlyLeafReader(reader); + DocValuesSkipper skipper = leafReader.getDocValuesSkipper("field"); + assertNotNull(skipper); + + BigInteger expectedSum = BigInteger.valueOf(value).multiply(BigInteger.valueOf(numDocs)); + BigInteger globalSum = toBigInteger(skipper.sumHigh(), skipper.sumLow()); + assertEquals(expectedSum, globalSum); + assertEquals(numDocs, skipper.valueCount()); + + reader.close(); + writer.close(); + dir.close(); + } + + public void testIntervalSumsAddUpToGlobal() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig config = newIndexWriterConfig(); + config.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config); + + int numDocs = atLeast(5000); + BigInteger expectedSum = BigInteger.ZERO; + long expectedValueCount = 0; + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + int numValues = random().nextInt(1, 4); + for (int j = 0; j < numValues; j++) { + long value = random().nextLong(); + doc.add(SortedNumericDocValuesField.indexedField("field", value)); + expectedSum = expectedSum.add(BigInteger.valueOf(value)); + expectedValueCount++; + } + writer.addDocument(doc); + } + writer.forceMerge(1); + + DirectoryReader reader = writer.getReader(); + LeafReader leafReader = getOnlyLeafReader(reader); + DocValuesSkipper skipper = leafReader.getDocValuesSkipper("field"); + assertNotNull(skipper); + + // Walk level 0 intervals and sum them up + BigInteger intervalSumTotal = BigInteger.ZERO; + long intervalValueCountTotal = 0; + skipper.advance(0); + while (skipper.minDocID(0) != DocIdSetIterator.NO_MORE_DOCS) { + intervalSumTotal = + intervalSumTotal.add(toBigInteger(skipper.sumHigh(0), skipper.sumLow(0))); + intervalValueCountTotal += skipper.valueCount(0); + skipper.advance(skipper.maxDocID(0) + 1); + } + + BigInteger globalSum = toBigInteger(skipper.sumHigh(), skipper.sumLow()); + assertEquals("Level 0 interval sums must equal global sum", globalSum, intervalSumTotal); + assertEquals( + "Level 0 interval valueCounts must equal global valueCount", + skipper.valueCount(), + intervalValueCountTotal); + assertEquals(expectedSum, globalSum); + assertEquals(expectedValueCount, skipper.valueCount()); + + reader.close(); + writer.close(); + dir.close(); + } +} From ed9007013d71ebcf02db82009801126408634dc6 Mon Sep 17 00:00:00 2001 From: Ankit Jain Date: Thu, 19 Feb 2026 23:05:11 -0800 Subject: [PATCH 4/7] Making the change backward compatible Signed-off-by: Ankit Jain --- .../lucene90/Lucene90DocValuesFormat.java | 30 +++++++++++----- .../lucene90/Lucene90DocValuesProducer.java | 34 ++++++++++++++----- 2 files changed, 47 insertions(+), 17 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesFormat.java index 1f3cd3a8f4a9..9a1d8c8314c7 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesFormat.java @@ -172,7 +172,8 @@ public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOExcepti static final String META_CODEC = "Lucene90DocValuesMetadata"; static final String META_EXTENSION = "dvm"; static final int VERSION_START = 0; - static final int VERSION_CURRENT = VERSION_START; + static final int VERSION_SUM_AND_VALUE_COUNT = 1; + static final int VERSION_CURRENT = VERSION_SUM_AND_VALUE_COUNT; // indicates docvalues type static final byte NUMERIC = 0; @@ -196,13 +197,19 @@ public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOExcepti // number of documents in an interval private static final int DEFAULT_SKIP_INDEX_INTERVAL_SIZE = 4096; - // bytes on an interval: + // bytes on an interval for VERSION_START (v0): + // * 1 byte : number of levels + // * 16 bytes: min / max value, + // * 8 bytes: min / max docID + // * 4 bytes: number of documents + static final long SKIP_INDEX_INTERVAL_BYTES_V0 = 29L; + // bytes on an interval for VERSION_SUM_AND_VALUE_COUNT (v1): // * 1 byte : number of levels // * 16 bytes: min / max value, // * 8 bytes: min / max docID // * 4 bytes: number of documents // * 24 bytes: sum (high + low) and value count - private static final long SKIP_INDEX_INTERVAL_BYTES = 53L; + static final long SKIP_INDEX_INTERVAL_BYTES_V1 = 53L; // number of intervals represented as a shift to create a new level, this is 1 << 3 == 8 // intervals. static final int SKIP_INDEX_LEVEL_SHIFT = 3; @@ -212,19 +219,24 @@ public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOExcepti static final int SKIP_INDEX_MAX_LEVEL = 4; // number of bytes to skip when skipping a level. It does not take into account the // current interval that is being read. - static final long[] SKIP_INDEX_JUMP_LENGTH_PER_LEVEL = new long[SKIP_INDEX_MAX_LEVEL]; + static final long[] SKIP_INDEX_JUMP_LENGTH_PER_LEVEL_V0 = new long[SKIP_INDEX_MAX_LEVEL]; + static final long[] SKIP_INDEX_JUMP_LENGTH_PER_LEVEL_V1 = new long[SKIP_INDEX_MAX_LEVEL]; static { + computeJumpLengths(SKIP_INDEX_JUMP_LENGTH_PER_LEVEL_V0, SKIP_INDEX_INTERVAL_BYTES_V0); + computeJumpLengths(SKIP_INDEX_JUMP_LENGTH_PER_LEVEL_V1, SKIP_INDEX_INTERVAL_BYTES_V1); + } + + private static void computeJumpLengths(long[] jumpLengths, long intervalBytes) { // Size of the interval minus read bytes (1 byte for level and 4 bytes for maxDocID) - SKIP_INDEX_JUMP_LENGTH_PER_LEVEL[0] = SKIP_INDEX_INTERVAL_BYTES - 5L; + jumpLengths[0] = intervalBytes - 5L; for (int level = 1; level < SKIP_INDEX_MAX_LEVEL; level++) { // jump from previous level - SKIP_INDEX_JUMP_LENGTH_PER_LEVEL[level] = SKIP_INDEX_JUMP_LENGTH_PER_LEVEL[level - 1]; + jumpLengths[level] = jumpLengths[level - 1]; // nodes added by new level - SKIP_INDEX_JUMP_LENGTH_PER_LEVEL[level] += - (1 << (level * SKIP_INDEX_LEVEL_SHIFT)) * SKIP_INDEX_INTERVAL_BYTES; + jumpLengths[level] += (1 << (level * SKIP_INDEX_LEVEL_SHIFT)) * intervalBytes; // remove the byte levels added in the previous level - SKIP_INDEX_JUMP_LENGTH_PER_LEVEL[level] -= (1 << ((level - 1) * SKIP_INDEX_LEVEL_SHIFT)); + jumpLengths[level] -= (1 << ((level - 1) * SKIP_INDEX_LEVEL_SHIFT)); } } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java index a88e448a3011..70ad8df73189 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java @@ -16,7 +16,8 @@ */ package org.apache.lucene.codecs.lucene90; -import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.SKIP_INDEX_JUMP_LENGTH_PER_LEVEL; +import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.SKIP_INDEX_JUMP_LENGTH_PER_LEVEL_V0; +import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.SKIP_INDEX_JUMP_LENGTH_PER_LEVEL_V1; import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.SKIP_INDEX_MAX_LEVEL; import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.TERMS_DICT_BLOCK_LZ4_SHIFT; @@ -219,9 +220,18 @@ private DocValuesSkipperEntry readDocValueSkipperMeta(IndexInput meta) throws IO long minValue = meta.readLong(); int docCount = meta.readInt(); int maxDocID = meta.readInt(); - long sumHigh = meta.readLong(); - long sumLow = meta.readLong(); - long valueCount = meta.readLong(); + long sumHigh; + long sumLow; + long valueCount; + if (version >= Lucene90DocValuesFormat.VERSION_SUM_AND_VALUE_COUNT) { + sumHigh = meta.readLong(); + sumLow = meta.readLong(); + valueCount = meta.readLong(); + } else { + sumHigh = 0; + sumLow = 0; + valueCount = 0; + } return new DocValuesSkipperEntry( offset, length, minValue, maxValue, docCount, maxDocID, sumHigh, sumLow, valueCount); @@ -1882,6 +1892,12 @@ public DocValuesSkipper getSkipper(FieldInfo field) throws IOException { if (input.length() > 0) { input.prefetch(0, 1); } + final boolean hasSumAndValueCount = + version >= Lucene90DocValuesFormat.VERSION_SUM_AND_VALUE_COUNT; + final long[] jumpLengths = + hasSumAndValueCount + ? SKIP_INDEX_JUMP_LENGTH_PER_LEVEL_V1 + : SKIP_INDEX_JUMP_LENGTH_PER_LEVEL_V0; // TODO: should we write to disk the actual max level for this segment? return new DocValuesSkipper() { final int[] minDocID = new int[SKIP_INDEX_MAX_LEVEL]; @@ -1920,7 +1936,7 @@ public void advance(int target) throws IOException { // check if current interval is competitive or we can jump to the next position for (int level = levels - 1; level >= 0; level--) { if ((maxDocID[level] = input.readInt()) < target) { - input.skipBytes(SKIP_INDEX_JUMP_LENGTH_PER_LEVEL[level]); // the jump for the level + input.skipBytes(jumpLengths[level]); // the jump for the level valid = false; break; } @@ -1928,9 +1944,11 @@ public void advance(int target) throws IOException { maxValue[level] = input.readLong(); minValue[level] = input.readLong(); docCount[level] = input.readInt(); - sumHigh[level] = input.readLong(); - sumLow[level] = input.readLong(); - valueCount[level] = input.readLong(); + if (hasSumAndValueCount) { + sumHigh[level] = input.readLong(); + sumLow[level] = input.readLong(); + valueCount[level] = input.readLong(); + } } if (valid) { // adjust levels From be7157019f87efc185aaf53649b6c38774cf9440 Mon Sep 17 00:00:00 2001 From: Ankit Jain Date: Thu, 19 Feb 2026 23:12:12 -0800 Subject: [PATCH 5/7] Adding test for skipper backward compatibility Signed-off-by: Ankit Jain --- .../TestDocValuesSkipperBackwardCompat.java | 249 ++++++++++++++++++ 1 file changed, 249 insertions(+) create mode 100644 lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestDocValuesSkipperBackwardCompat.java diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestDocValuesSkipperBackwardCompat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestDocValuesSkipperBackwardCompat.java new file mode 100644 index 000000000000..b96293038a47 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestDocValuesSkipperBackwardCompat.java @@ -0,0 +1,249 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.codecs.lucene90; + +import java.math.BigInteger; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.DocValuesSkipper; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.index.RandomIndexWriter; +import org.apache.lucene.tests.util.LuceneTestCase; + +/** + * Tests backward compatibility of DocValuesSkipper format versioning. Verifies format constants, + * version-conditional behavior, and that the new v1 format correctly stores sum/valueCount. + */ +public class TestDocValuesSkipperBackwardCompat extends LuceneTestCase { + + private static BigInteger toBigInteger(long high, long low) { + return BigInteger.valueOf(high) + .shiftLeft(64) + .add(BigInteger.valueOf(low).and(new BigInteger("FFFFFFFFFFFFFFFF", 16))); + } + + /** Verify version constants are correctly defined for backward compatibility. */ + public void testVersionConstants() { + assertEquals(0, Lucene90DocValuesFormat.VERSION_START); + assertEquals(1, Lucene90DocValuesFormat.VERSION_SUM_AND_VALUE_COUNT); + assertEquals( + Lucene90DocValuesFormat.VERSION_SUM_AND_VALUE_COUNT, + Lucene90DocValuesFormat.VERSION_CURRENT); + assertTrue( + "VERSION_CURRENT must be >= VERSION_START", + Lucene90DocValuesFormat.VERSION_CURRENT >= Lucene90DocValuesFormat.VERSION_START); + } + + /** Verify the jump table constants for v0 and v1 are correctly computed. */ + public void testJumpTableConstants() { + // v0: 29 bytes per interval + assertEquals(29L, Lucene90DocValuesFormat.SKIP_INDEX_INTERVAL_BYTES_V0); + // v1: 53 bytes per interval (v0 + 8 sumHigh + 8 sumLow + 8 valueCount) + assertEquals(53L, Lucene90DocValuesFormat.SKIP_INDEX_INTERVAL_BYTES_V1); + assertEquals( + Lucene90DocValuesFormat.SKIP_INDEX_INTERVAL_BYTES_V0 + 24, + Lucene90DocValuesFormat.SKIP_INDEX_INTERVAL_BYTES_V1); + + // Jump length at level 0 = interval bytes - 5 (1 byte levels + 4 bytes maxDocID already read) + assertEquals( + Lucene90DocValuesFormat.SKIP_INDEX_INTERVAL_BYTES_V0 - 5L, + Lucene90DocValuesFormat.SKIP_INDEX_JUMP_LENGTH_PER_LEVEL_V0[0]); + assertEquals( + Lucene90DocValuesFormat.SKIP_INDEX_INTERVAL_BYTES_V1 - 5L, + Lucene90DocValuesFormat.SKIP_INDEX_JUMP_LENGTH_PER_LEVEL_V1[0]); + + // v1 jump lengths must always be larger than v0 + for (int level = 0; level < Lucene90DocValuesFormat.SKIP_INDEX_MAX_LEVEL; level++) { + assertTrue( + "v1 jump length at level " + level + " should be > v0", + Lucene90DocValuesFormat.SKIP_INDEX_JUMP_LENGTH_PER_LEVEL_V1[level] + > Lucene90DocValuesFormat.SKIP_INDEX_JUMP_LENGTH_PER_LEVEL_V0[level]); + } + } + + /** Verify that a v1 index has correct sum and valueCount for single-valued numeric fields. */ + public void testV1FormatHasSumAndValueCount() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig config = newIndexWriterConfig(); + config.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config); + + long expectedSum = 0; + int numDocs = atLeast(500); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + long value = i + 1; + doc.add(NumericDocValuesField.indexedField("field", value)); + expectedSum += value; + writer.addDocument(doc); + } + writer.forceMerge(1); + + DirectoryReader reader = writer.getReader(); + LeafReader leafReader = getOnlyLeafReader(reader); + DocValuesSkipper skipper = leafReader.getDocValuesSkipper("field"); + assertNotNull(skipper); + + assertEquals(numDocs, skipper.docCount()); + assertEquals(numDocs, skipper.valueCount()); + BigInteger globalSum = toBigInteger(skipper.sumHigh(), skipper.sumLow()); + assertEquals(BigInteger.valueOf(expectedSum), globalSum); + assertEquals(1, skipper.minValue()); + assertEquals(numDocs, skipper.maxValue()); + + // Walk intervals and verify sum is non-zero at level 0 + boolean foundNonZeroSum = false; + skipper.advance(0); + while (skipper.minDocID(0) != DocIdSetIterator.NO_MORE_DOCS) { + BigInteger intervalSum = toBigInteger(skipper.sumHigh(0), skipper.sumLow(0)); + if (intervalSum.signum() != 0) { + foundNonZeroSum = true; + } + assertTrue("valueCount should be > 0", skipper.valueCount(0) > 0); + skipper.advance(skipper.maxDocID(0) + 1); + } + assertTrue("At least one interval should have non-zero sum", foundNonZeroSum); + + reader.close(); + writer.close(); + dir.close(); + } + + /** Verify multi-valued fields correctly track valueCount > docCount. */ + public void testMultiValuedFieldValueCountExceedsDocCount() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig config = newIndexWriterConfig(); + config.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config); + + long expectedValueCount = 0; + int numDocs = atLeast(500); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + int numValues = random().nextInt(2, 6); + for (int j = 0; j < numValues; j++) { + doc.add(SortedNumericDocValuesField.indexedField("field", random().nextInt(1000))); + expectedValueCount++; + } + writer.addDocument(doc); + } + writer.forceMerge(1); + + DirectoryReader reader = writer.getReader(); + LeafReader leafReader = getOnlyLeafReader(reader); + DocValuesSkipper skipper = leafReader.getDocValuesSkipper("field"); + assertNotNull(skipper); + + assertEquals(numDocs, skipper.docCount()); + assertEquals(expectedValueCount, skipper.valueCount()); + assertTrue( + "valueCount should exceed docCount for multi-valued fields", + skipper.valueCount() > skipper.docCount()); + + reader.close(); + writer.close(); + dir.close(); + } + + /** Verify advance(minValue, maxValue) works correctly with the new format. */ + public void testValueRangeAdvanceWorksWithNewFormat() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig config = newIndexWriterConfig(); + config.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config); + + int numDocs = atLeast(5000); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + doc.add(NumericDocValuesField.indexedField("field", i)); + writer.addDocument(doc); + } + writer.forceMerge(1); + + DirectoryReader reader = writer.getReader(); + LeafReader leafReader = getOnlyLeafReader(reader); + DocValuesSkipper skipper = leafReader.getDocValuesSkipper("field"); + assertNotNull(skipper); + + long rangeMin = numDocs / 4; + long rangeMax = numDocs / 2; + skipper.advance(rangeMin, rangeMax); + + if (skipper.minDocID(0) != DocIdSetIterator.NO_MORE_DOCS) { + assertTrue("interval maxValue should be >= rangeMin", skipper.maxValue(0) >= rangeMin); + assertTrue("interval minValue should be <= rangeMax", skipper.minValue(0) <= rangeMax); + } + + reader.close(); + writer.close(); + dir.close(); + } + + /** Verify min/max and docCount invariants hold across all intervals. */ + public void testMinMaxDocCountInvariants() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig config = newIndexWriterConfig(); + config.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config); + + int numDocs = atLeast(1000); + long globalMin = Long.MAX_VALUE; + long globalMax = Long.MIN_VALUE; + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + long value = random().nextLong(-10000, 10000); + globalMin = Math.min(globalMin, value); + globalMax = Math.max(globalMax, value); + doc.add(NumericDocValuesField.indexedField("field", value)); + writer.addDocument(doc); + } + writer.forceMerge(1); + + DirectoryReader reader = writer.getReader(); + LeafReader leafReader = getOnlyLeafReader(reader); + DocValuesSkipper skipper = leafReader.getDocValuesSkipper("field"); + assertNotNull(skipper); + + assertEquals(numDocs, skipper.docCount()); + assertEquals(globalMin, skipper.minValue()); + assertEquals(globalMax, skipper.maxValue()); + + int totalDocCount = 0; + skipper.advance(0); + while (skipper.minDocID(0) != DocIdSetIterator.NO_MORE_DOCS) { + assertTrue("docCount must be > 0", skipper.docCount(0) > 0); + assertTrue( + "interval minValue must be >= global minValue", + skipper.minValue(0) >= skipper.minValue()); + assertTrue( + "interval maxValue must be <= global maxValue", + skipper.maxValue(0) <= skipper.maxValue()); + totalDocCount += skipper.docCount(0); + skipper.advance(skipper.maxDocID(0) + 1); + } + assertEquals("sum of interval docCounts must equal global docCount", numDocs, totalDocCount); + + reader.close(); + writer.close(); + dir.close(); + } +} From bbd257f01f5d9838cccf2c81e119505757ff998d Mon Sep 17 00:00:00 2001 From: Ankit Jain Date: Thu, 19 Feb 2026 23:16:40 -0800 Subject: [PATCH 6/7] Revert "Adding configuration files to gitignore" This reverts commit 36aec7baad3427c4580f8a8903170efc22d3dc9e. --- .gitignore | 4 ---- .vscode/extensions.json | 10 ++++++++++ .vscode/settings.json | 13 +++++++++++++ 3 files changed, 23 insertions(+), 4 deletions(-) create mode 100644 .vscode/extensions.json create mode 100644 .vscode/settings.json diff --git a/.gitignore b/.gitignore index 99fbb00205c7..5acb86f4a8d8 100644 --- a/.gitignore +++ b/.gitignore @@ -21,10 +21,6 @@ gradle/wrapper/gradle-wrapper.jar /.classpath /.settings/ -# Kiro -.vscode -.kiro - # Eclipse Gradle oddity **/bin/default/ diff --git a/.vscode/extensions.json b/.vscode/extensions.json new file mode 100644 index 000000000000..5371eaa8630d --- /dev/null +++ b/.vscode/extensions.json @@ -0,0 +1,10 @@ +{ + "recommendations": [ + "redhat.java", + "editorconfig.editorconfig" + ], + "unwantedRecommendations": [ + "vscjava.vscode-java-pack", + "vscjava.vscode-gradle" + ] +} diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 000000000000..1aab8989122c --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,13 @@ +{ + "java.server.launchMode": "Standard", + "java.import.gradle.enabled": false, + "java.import.maven.enabled": false, + "java.jdt.ls.lombokSupport.enabled": false, + "java.compile.nullAnalysis.mode": "disabled", + "java.completion.maxResults": 500, + "java.completion.favoriteStaticMembers": [ "none.*" ], + "java.inlayHints.parameterNames.enabled": "all", + "java.inlayHints.parameterTypes.enabled": true, + "java.inlayHints.variableTypes.enabled": true, + "redhat.telemetry.enabled": false +} From 97c22136c5f02534b3239cba7792891312b50328 Mon Sep 17 00:00:00 2001 From: Ankit Jain Date: Thu, 19 Feb 2026 23:26:45 -0800 Subject: [PATCH 7/7] tidy Signed-off-by: Ankit Jain --- lucene/CHANGES.txt | 3 ++- .../src/java/org/apache/lucene/index/DocValuesSkipper.java | 4 ++-- .../lucene/index/TestDocValuesSkipperSumAndValueCount.java | 7 ++----- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index df5d0d2cc9cd..26d445eaa18a 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -190,7 +190,8 @@ API Changes New Features --------------------- -(No changes) +* GITHUB##15737: Store pre-aggregated sum and value count in DocValuesSkipper (Ankit Jain) + Improvements --------------------- diff --git a/lucene/core/src/java/org/apache/lucene/index/DocValuesSkipper.java b/lucene/core/src/java/org/apache/lucene/index/DocValuesSkipper.java index a72c3adee220..6263fe51f53c 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DocValuesSkipper.java +++ b/lucene/core/src/java/org/apache/lucene/index/DocValuesSkipper.java @@ -101,8 +101,8 @@ public abstract class DocValuesSkipper { /** * Return the total count of values (not documents) in the interval at the given level. * - *

NOTE: For single-valued fields, this equals {@link #docCount(int)}. For - * multi-valued fields, this is the sum of value counts across all documents. + *

NOTE: For single-valued fields, this equals {@link #docCount(int)}. For multi-valued + * fields, this is the sum of value counts across all documents. */ public abstract long valueCount(int level); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDocValuesSkipperSumAndValueCount.java b/lucene/core/src/test/org/apache/lucene/index/TestDocValuesSkipperSumAndValueCount.java index d0f3c397f82c..f2ebbe093431 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestDocValuesSkipperSumAndValueCount.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestDocValuesSkipperSumAndValueCount.java @@ -16,7 +16,6 @@ */ package org.apache.lucene.index; -import java.io.IOException; import java.math.BigInteger; import org.apache.lucene.document.Document; import org.apache.lucene.document.NumericDocValuesField; @@ -234,8 +233,7 @@ public void testSumOverflowNegative128Bit() throws Exception { expectedSum, globalSum); - assertTrue( - "sumHigh should be negative for large negative sums", skipper.sumHigh() < 0); + assertTrue("sumHigh should be negative for large negative sums", skipper.sumHigh() < 0); reader.close(); writer.close(); @@ -375,8 +373,7 @@ public void testIntervalSumsAddUpToGlobal() throws Exception { long intervalValueCountTotal = 0; skipper.advance(0); while (skipper.minDocID(0) != DocIdSetIterator.NO_MORE_DOCS) { - intervalSumTotal = - intervalSumTotal.add(toBigInteger(skipper.sumHigh(0), skipper.sumLow(0))); + intervalSumTotal = intervalSumTotal.add(toBigInteger(skipper.sumHigh(0), skipper.sumLow(0))); intervalValueCountTotal += skipper.valueCount(0); skipper.advance(skipper.maxDocID(0) + 1); }