From 734c43355e6e747ccb475b8603e5f7342082e4ba Mon Sep 17 00:00:00 2001 From: tlrx Date: Tue, 16 Jun 2026 14:18:16 +0200 Subject: [PATCH 1/3] Check if merge is aborted before executing file integrity checks --- .../lucene/codecs/DocValuesConsumer.java | 1 + .../apache/lucene/codecs/FieldsConsumer.java | 1 + .../lucene/codecs/KnnVectorsWriter.java | 1 + .../apache/lucene/codecs/NormsConsumer.java | 1 + .../apache/lucene/codecs/PointsWriter.java | 1 + .../lucene/codecs/StoredFieldsWriter.java | 1 + .../lucene/codecs/TermVectorsWriter.java | 1 + .../codecs/lucene90/Lucene90PointsWriter.java | 1 + ...Lucene90CompressingStoredFieldsWriter.java | 1 + .../Lucene90CompressingTermVectorsWriter.java | 1 + .../codecs/perfield/PerFieldMergeState.java | 3 ++- .../org/apache/lucene/index/IndexWriter.java | 6 +++-- .../org/apache/lucene/index/MergeState.java | 24 +++++++++++++++++-- .../apache/lucene/index/SegmentMerger.java | 6 +++-- .../lucene/codecs/TestMergedVectorValues.java | 4 ++-- .../test/org/apache/lucene/index/TestDoc.java | 3 ++- .../lucene/index/TestSegmentMerger.java | 3 ++- 17 files changed, 48 insertions(+), 11 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java index 0c26af891529..b75cce9b1a5b 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java @@ -132,6 +132,7 @@ public abstract void addSortedSetField(FieldInfo field, DocValuesProducer values public void merge(MergeState mergeState) throws IOException { for (DocValuesProducer docValuesProducer : mergeState.docValuesProducers) { if (docValuesProducer != null) { + mergeState.checkAborted(); docValuesProducer.checkIntegrity(); } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/FieldsConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/FieldsConsumer.java index 5443585ab970..537410f85698 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/FieldsConsumer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/FieldsConsumer.java @@ -80,6 +80,7 @@ public void merge(MergeState mergeState, NormsProducer norms) throws IOException final int maxDoc = mergeState.maxDocs[readerIndex]; if (f != null) { + mergeState.checkAborted(); f.checkIntegrity(); slices.add(new ReaderSlice(docBase, maxDoc, readerIndex)); fields.add(f); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsWriter.java index d2215e99808f..d1e2b2ee1c40 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsWriter.java @@ -113,6 +113,7 @@ public final void merge(MergeState mergeState) throws IOException { KnnVectorsReader reader = mergeState.knnVectorsReaders[i]; assert reader != null || mergeState.fieldInfos[i].hasVectorValues() == false; if (reader != null) { + mergeState.checkAborted(); reader.checkIntegrity(); } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/NormsConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/NormsConsumer.java index 7147e6b3327c..fedd9584db31 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/NormsConsumer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/NormsConsumer.java @@ -66,6 +66,7 @@ public abstract void addNormsField(FieldInfo field, NormsProducer normsProducer) public void merge(MergeState mergeState) throws IOException { for (NormsProducer normsProducer : mergeState.normsProducers) { if (normsProducer != null) { + mergeState.checkAborted(); normsProducer.checkIntegrity(); } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/PointsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/PointsWriter.java index 9d756e36d6fb..4b4f2a017738 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/PointsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/PointsWriter.java @@ -222,6 +222,7 @@ public void merge(MergeState mergeState) throws IOException { // check each incoming reader for (PointsReader reader : mergeState.pointsReaders) { if (reader != null) { + mergeState.checkAborted(); reader.checkIntegrity(); } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java index a9f9ce464a30..f5eb9e4c1137 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java @@ -130,6 +130,7 @@ public int merge(MergeState mergeState) throws IOException { List subs = new ArrayList<>(); for (int i = 0; i < mergeState.storedFieldsReaders.length; i++) { StoredFieldsReader storedFieldsReader = mergeState.storedFieldsReaders[i]; + mergeState.checkAborted(); storedFieldsReader.checkIntegrity(); subs.add( new StoredFieldsMergeSub( diff --git a/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java index 45dcc504186e..8c9b11dd352d 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java @@ -200,6 +200,7 @@ public int merge(MergeState mergeState) throws IOException { for (int i = 0; i < mergeState.termVectorsReaders.length; i++) { TermVectorsReader reader = mergeState.termVectorsReaders[i]; if (reader != null) { + mergeState.checkAborted(); reader.checkIntegrity(); } subs.add(new TermVectorsMergeSub(mergeState.docMaps[i], reader, mergeState.maxDocs[i])); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsWriter.java index 7f56d855473e..eaea9d45bf57 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsWriter.java @@ -204,6 +204,7 @@ public void merge(MergeState mergeState) throws IOException { } for (PointsReader reader : mergeState.pointsReaders) { if (reader != null) { + mergeState.checkAborted(); reader.checkIntegrity(); } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingStoredFieldsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingStoredFieldsWriter.java index 39009b1eb568..4d2b2f25a14e 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingStoredFieldsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingStoredFieldsWriter.java @@ -601,6 +601,7 @@ public int merge(MergeState mergeState) throws IOException { new ArrayList<>(mergeState.storedFieldsReaders.length); for (int i = 0; i < mergeState.storedFieldsReaders.length; i++) { final StoredFieldsReader reader = mergeState.storedFieldsReaders[i]; + mergeState.checkAborted(); reader.checkIntegrity(); MergeStrategy mergeStrategy = getMergeStrategy(mergeState, matchingReaders, i); if (mergeStrategy == MergeStrategy.VISITOR) { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsWriter.java index 00eddc03ee8e..3fe9f2a47ef8 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsWriter.java @@ -898,6 +898,7 @@ public int merge(MergeState mergeState) throws IOException { for (int i = 0; i < numReaders; i++) { final TermVectorsReader reader = mergeState.termVectorsReaders[i]; if (reader != null) { + mergeState.checkAborted(); reader.checkIntegrity(); } final boolean bulkMerge = canPerformBulkMerge(mergeState, matchingReaders, i); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldMergeState.java b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldMergeState.java index bb67ec618fd2..85df2414aa38 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldMergeState.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldMergeState.java @@ -69,7 +69,8 @@ static MergeState restrictFields(MergeState in, Collection fields) { in.maxDocs, in.infoStream, in.intraMergeTaskExecutor, - in.needsIndexSort); + in.needsIndexSort, + in.oneMerge); } private static class FilterFieldInfos extends FieldInfos { diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 79bada22d57b..faf93829feaa 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -3491,7 +3491,8 @@ public void addIndexesReaderMerge(MergePolicy.OneMerge merge) throws IOException trackingDir, globalFieldNumberMap, context, - intraMergeExecutor); + intraMergeExecutor, + merge); try { if (!merger.shouldMerge()) { return; @@ -5292,7 +5293,8 @@ public int length() { dirWrapper, globalFieldNumberMap, context, - intraMergeExecutor); + intraMergeExecutor, + merge); MergeState mergeState = merger.mergeState; MergeState.DocMap[] docMaps; try { diff --git a/lucene/core/src/java/org/apache/lucene/index/MergeState.java b/lucene/core/src/java/org/apache/lucene/index/MergeState.java index 838699215f0d..efc8eb9a5823 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MergeState.java +++ b/lucene/core/src/java/org/apache/lucene/index/MergeState.java @@ -91,14 +91,22 @@ public class MergeState { /** Indicates if the index needs to be sorted * */ public boolean needsIndexSort; + /** + * The merge that this state is associated with, or {@code null} if this merge state is not + * associated with an {@link IndexWriter} merge (e.g. for addIndexes). + */ + public final MergePolicy.OneMerge oneMerge; + /** Sole constructor. */ MergeState( List readers, SegmentInfo segmentInfo, InfoStream infoStream, - Executor intraMergeTaskExecutor) + Executor intraMergeTaskExecutor, + MergePolicy.OneMerge oneMerge) throws IOException { verifyIndexSort(readers, segmentInfo); + this.oneMerge = oneMerge; this.infoStream = infoStream; int numReaders = readers.size(); this.intraMergeTaskExecutor = intraMergeTaskExecutor; @@ -230,6 +238,16 @@ private DocMap[] buildDocMaps(List readers, Sort indexSort) throws } } + /** + * Checks if the merge has been aborted, throwing {@link MergePolicy.MergeAbortedException} if so. + * This is a no-op if this merge state is not associated with an {@link IndexWriter} merge. + */ + public void checkAborted() throws MergePolicy.MergeAbortedException { + if (oneMerge != null) { + oneMerge.checkAborted(); + } + } + private static void verifyIndexSort(List readers, SegmentInfo segmentInfo) { Sort indexSort = segmentInfo.getIndexSort(); if (indexSort == null) { @@ -284,7 +302,8 @@ public MergeState( int[] maxDocs, InfoStream infoStream, Executor intraMergeTaskExecutor, - boolean needsIndexSort) { + boolean needsIndexSort, + MergePolicy.OneMerge oneMerge) { this.docMaps = docMaps; this.segmentInfo = segmentInfo; this.mergeFieldInfos = mergeFieldInfos; @@ -301,5 +320,6 @@ public MergeState( this.infoStream = infoStream; this.intraMergeTaskExecutor = intraMergeTaskExecutor; this.needsIndexSort = needsIndexSort; + this.oneMerge = oneMerge; } } diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java b/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java index 8dab44c1c8e2..321780406ad7 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java @@ -60,18 +60,20 @@ final class SegmentMerger { Directory dir, FieldInfos.FieldNumbers fieldNumbers, IOContext context, - Executor intraMergeTaskExecutor) + Executor intraMergeTaskExecutor, + MergePolicy.OneMerge oneMerge) throws IOException { if (context.context() != IOContext.Context.MERGE) { throw new IllegalArgumentException( "IOContext.context should be MERGE; got: " + context.context()); } - mergeState = new MergeState(readers, segmentInfo, infoStream, intraMergeTaskExecutor); mergeStateCreationThread = Thread.currentThread(); directory = dir; this.codec = segmentInfo.getCodec(); this.context = context; this.fieldInfosBuilder = new FieldInfos.Builder(fieldNumbers); + this.mergeState = + new MergeState(readers, segmentInfo, infoStream, intraMergeTaskExecutor, oneMerge); Version minVersion = Version.LATEST; for (CodecReader reader : readers) { Version leafMinVersion = reader.getMetaData().minVersion(); diff --git a/lucene/core/src/test/org/apache/lucene/codecs/TestMergedVectorValues.java b/lucene/core/src/test/org/apache/lucene/codecs/TestMergedVectorValues.java index 5f1fc6a2267c..e00a1d8c96a8 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/TestMergedVectorValues.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/TestMergedVectorValues.java @@ -40,7 +40,7 @@ public void testSkipsInMergedByteVectorValues() throws IOException { MergeState state = new MergeState( null, null, null, null, null, null, null, null, null, null, null, null, null, null, - null, false); + null, false, null); // Run the test ByteVectorValues values = @@ -68,7 +68,7 @@ public void testSkipsInMergedFloat32VectorValues() throws IOException { MergeState state = new MergeState( null, null, null, null, null, null, null, null, null, null, null, null, null, null, - null, false); + null, false, null); // Run the test FloatVectorValues values = diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDoc.java b/lucene/core/src/test/org/apache/lucene/index/TestDoc.java index 13384fcb8925..2151b24157cb 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestDoc.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestDoc.java @@ -239,7 +239,8 @@ private SegmentCommitInfo merge( trackingDir, new FieldInfos.FieldNumbers(null, null), context, - new SameThreadExecutorService()); + new SameThreadExecutorService(), + null); merger.merge(); merger.cleanupMerge(); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java b/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java index 762e934a636b..56dd6ed2e2ef 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java @@ -107,7 +107,8 @@ public void testMerge() throws IOException { mergedDir, new FieldInfos.FieldNumbers(null, null), newIOContext(random(), IOContext.merge(new MergeInfo(-1, -1, false, -1))), - new SameThreadExecutorService()); + new SameThreadExecutorService(), + null); MergeState mergeState = merger.merge(); merger.cleanupMerge(); int docsMerged = mergeState.segmentInfo.maxDoc(); From db5d68462ecc831cf3102e4f3a8691d407026618 Mon Sep 17 00:00:00 2001 From: tlrx Date: Wed, 17 Jun 2026 09:20:21 +0200 Subject: [PATCH 2/3] @lucene.internal --- lucene/core/src/java/org/apache/lucene/index/MergeState.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lucene/core/src/java/org/apache/lucene/index/MergeState.java b/lucene/core/src/java/org/apache/lucene/index/MergeState.java index efc8eb9a5823..fafdafbb0a49 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MergeState.java +++ b/lucene/core/src/java/org/apache/lucene/index/MergeState.java @@ -94,6 +94,8 @@ public class MergeState { /** * The merge that this state is associated with, or {@code null} if this merge state is not * associated with an {@link IndexWriter} merge (e.g. for addIndexes). + * + * @lucene.internal */ public final MergePolicy.OneMerge oneMerge; From 4ee6459c213aa3188b6a070196bb836191f747a0 Mon Sep 17 00:00:00 2001 From: tlrx Date: Fri, 19 Jun 2026 09:18:27 +0200 Subject: [PATCH 3/3] add change --- lucene/CHANGES.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index da64a61cc8c0..8b1d73eda6ed 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -407,6 +407,9 @@ Improvements DiversifiedTopDocsCollector, removing usages of the deprecated IndexSearcher#search(Query, Collector). (Luca Cavanna) +* GITHUB#16264: Check if merge is aborted before executing file integrity checks to avoid + costly full-file checksums on segments when the merge has already been cancelled. (Tanguy Leroux) + Optimizations --------------------- * GITHUB#16222: MultiTermQuery constant-score wrapper now defers term collection to ScorerSupplier#get()