From 6fa63d2b6617b9835ebb1eafa83527fbae33dbfe Mon Sep 17 00:00:00 2001 From: tlrx Date: Tue, 19 May 2026 17:26:35 +0200 Subject: [PATCH 1/3] Allow merge integrity checks to be aborted sooner This change introduces a merge AbortChecker that is threaded through MergeState into StoredFieldsReader.checkIntegrity and CodecUtil.checksumEntireFile, allowing long-running integrity checks like in #13354 to be aborted sooner after a merge is aborted. It is enabled via IndexWriterConfig.setMergeAbortCheckIntervalBytes, which indicate a number of bytes to read during checksumming before checking if the corresponding merge has been aborted. Only stored fields are wired up but other files like postings and doc values could benefit from the same treatment in a follow-up. Having MergeState expose the AbortChecker.checkAborted method can also be useful to other merging logic I think. --- .../SimpleTextStoredFieldsReader.java | 4 ++ .../org/apache/lucene/codecs/CodecUtil.java | 59 ++++++++++++++++++- .../lucene/codecs/StoredFieldsReader.java | 8 +++ .../lucene/codecs/StoredFieldsWriter.java | 2 +- .../lucene90/compressing/FieldsIndex.java | 4 ++ .../compressing/FieldsIndexReader.java | 8 ++- ...Lucene90CompressingStoredFieldsReader.java | 10 +++- ...Lucene90CompressingStoredFieldsWriter.java | 2 +- .../codecs/perfield/PerFieldMergeState.java | 3 +- .../org/apache/lucene/index/IndexWriter.java | 22 ++++++- .../lucene/index/IndexWriterConfig.java | 17 ++++++ .../lucene/index/LiveIndexWriterConfig.java | 14 +++++ .../org/apache/lucene/index/MergePolicy.java | 31 ++++++++++ .../org/apache/lucene/index/MergeState.java | 11 +++- .../apache/lucene/index/SegmentMerger.java | 6 +- .../lucene/index/SlowCodecReaderWrapper.java | 5 ++ .../SlowCompositeCodecReaderWrapper.java | 7 ++- .../lucene/index/SortingCodecReader.java | 5 ++ .../apache/lucene/codecs/TestCodecUtil.java | 37 ++++++++++++ .../lucene/codecs/TestMergedVectorValues.java | 39 ++++++++++-- .../test/org/apache/lucene/index/TestDoc.java | 3 +- .../lucene/index/TestSegmentMerger.java | 3 +- .../AssertingStoredFieldsFormat.java | 6 ++ .../tests/index/MismatchedCodecReader.java | 6 ++ 24 files changed, 292 insertions(+), 20 deletions(-) diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java index ff396af78a4f..2799907876ea 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java @@ -36,6 +36,7 @@ import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.MergePolicy; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.StoredFieldVisitor; import org.apache.lucene.store.AlreadyClosedException; @@ -226,4 +227,7 @@ public String toString() { @Override public void checkIntegrity() throws IOException {} + + @Override + public void checkIntegrity(MergePolicy.AbortChecker abortChecker) throws IOException {} } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java b/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java index 91c1c045247d..df2a6a6bf16a 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java @@ -22,6 +22,7 @@ import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexFormatTooNewException; import org.apache.lucene.index.IndexFormatTooOldException; +import org.apache.lucene.index.MergePolicy; import org.apache.lucene.store.BufferedChecksumIndexInput; import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.DataInput; @@ -604,9 +605,31 @@ private static void validateFooter(IndexInput in) throws IOException { * extract the checksum value, call {@link #retrieveChecksum}. */ public static long checksumEntireFile(IndexInput input) throws IOException { + return checksumEntireFile(input, MergePolicy.AbortChecker.NO_OP); + } + + /** + * Like {@link #checksumEntireFile(IndexInput)}, but periodically checks if the operation should + * be aborted via the provided {@link org.apache.lucene.index.MergePolicy.AbortChecker}. This is + * useful during merge operations where the merge may be cancelled while a long-running integrity + * check is in progress. + * + * @param input the index input to checksum + * @param abortChecker checked periodically during the read; throws {@link + * org.apache.lucene.index.MergePolicy.MergeAbortedException} if the operation should stop + */ + public static long checksumEntireFile(IndexInput input, MergePolicy.AbortChecker abortChecker) + throws IOException { IndexInput clone = input.clone(); clone.seek(0); - ChecksumIndexInput in = new BufferedChecksumIndexInput(clone); + final ChecksumIndexInput in; + int intervalBytes = abortChecker.getAbortCheckIntervalBytes(); + if (intervalBytes > 0) { + in = new AbortableBufferedChecksumIndexInput(clone, abortChecker, intervalBytes); + } else { + assert abortChecker == MergePolicy.AbortChecker.NO_OP : abortChecker; + in = new BufferedChecksumIndexInput(clone); + } assert in.getFilePointer() == 0; if (in.length() < footerLength()) { throw new CorruptIndexException( @@ -675,4 +698,38 @@ public static int readBEInt(DataInput in) throws IOException { public static long readBELong(DataInput in) throws IOException { return (((long) readBEInt(in)) << 32) | (readBEInt(in) & 0xFFFFFFFFL); } + + /** + * A {@link BufferedChecksumIndexInput} that periodically checks if the current merge should be + * aborted. + * + *

The check is performed in {@link #readBytes} since that is what {@link + * ChecksumIndexInput#seek} calls in a loop to compute the checksum. + */ + private static final class AbortableBufferedChecksumIndexInput + extends BufferedChecksumIndexInput { + + private final MergePolicy.AbortChecker abortChecker; + private final int intervalBytes; + private long bytesSinceLastCheck; + + AbortableBufferedChecksumIndexInput( + IndexInput delegate, MergePolicy.AbortChecker abortChecker, int intervalBytes) { + super(delegate); + this.abortChecker = abortChecker; + this.intervalBytes = intervalBytes; + // set to trigger a check before the first read + this.bytesSinceLastCheck = intervalBytes; + } + + @Override + public void readBytes(byte[] b, int offset, int len) throws IOException { + bytesSinceLastCheck += len; + if (bytesSinceLastCheck >= intervalBytes) { + abortChecker.checkAborted(); + bytesSinceLastCheck = 0L; + } + super.readBytes(b, offset, len); + } + } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsReader.java index 0e4d33c5abe2..d98453d7e145 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsReader.java @@ -18,6 +18,7 @@ import java.io.Closeable; import java.io.IOException; +import org.apache.lucene.index.MergePolicy; import org.apache.lucene.index.StoredFieldVisitor; import org.apache.lucene.index.StoredFields; @@ -47,6 +48,13 @@ protected StoredFieldsReader() {} */ public abstract void checkIntegrity() throws IOException; + /** + * Checks consistency of this reader, with periodic merge abort checking. + * + * @lucene.internal + */ + public abstract void checkIntegrity(MergePolicy.AbortChecker abortChecker) throws IOException; + /** * Returns an instance optimized for merging. This instance may not be cloned. * diff --git a/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java index a9f9ce464a30..42d15873245d 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java @@ -130,7 +130,7 @@ public int merge(MergeState mergeState) throws IOException { List subs = new ArrayList<>(); for (int i = 0; i < mergeState.storedFieldsReaders.length; i++) { StoredFieldsReader storedFieldsReader = mergeState.storedFieldsReaders[i]; - storedFieldsReader.checkIntegrity(); + storedFieldsReader.checkIntegrity(mergeState.abortChecker); subs.add( new StoredFieldsMergeSub( new MergeVisitor(mergeState, i), diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/FieldsIndex.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/FieldsIndex.java index a0f59575b27b..7dc3fd4bd0d9 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/FieldsIndex.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/FieldsIndex.java @@ -18,6 +18,7 @@ import java.io.Closeable; import java.io.IOException; +import org.apache.lucene.index.MergePolicy; abstract class FieldsIndex implements Cloneable, Closeable { @@ -38,6 +39,9 @@ final long getStartPointer(int docID) { /** Check the integrity of the index. */ abstract void checkIntegrity() throws IOException; + /** Check the integrity of the index, with periodic merge abort checking. */ + public abstract void checkIntegrity(MergePolicy.AbortChecker abortChecker) throws IOException; + @Override public abstract FieldsIndex clone(); } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/FieldsIndexReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/FieldsIndexReader.java index d823626c9da8..e44fbffaeaa7 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/FieldsIndexReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/FieldsIndexReader.java @@ -24,6 +24,7 @@ import java.util.Objects; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.MergePolicy; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FileTypeHint; import org.apache.lucene.store.IOContext; @@ -155,6 +156,11 @@ public long getMaxPointer() { @Override void checkIntegrity() throws IOException { - CodecUtil.checksumEntireFile(indexInput); + checkIntegrity(MergePolicy.AbortChecker.NO_OP); + } + + @Override + public void checkIntegrity(MergePolicy.AbortChecker abortChecker) throws IOException { + CodecUtil.checksumEntireFile(indexInput, abortChecker); } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingStoredFieldsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingStoredFieldsReader.java index 9d142ba5ef28..ee6c907e5e21 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingStoredFieldsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingStoredFieldsReader.java @@ -50,6 +50,7 @@ import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.MergePolicy; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.StoredFieldDataInput; import org.apache.lucene.index.StoredFieldVisitor; @@ -756,8 +757,13 @@ int getNumDocs() { @Override public void checkIntegrity() throws IOException { - indexReader.checkIntegrity(); - CodecUtil.checksumEntireFile(fieldsStream); + checkIntegrity(MergePolicy.AbortChecker.NO_OP); + } + + @Override + public void checkIntegrity(MergePolicy.AbortChecker abortChecker) throws IOException { + indexReader.checkIntegrity(abortChecker); + CodecUtil.checksumEntireFile(fieldsStream, abortChecker); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingStoredFieldsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingStoredFieldsWriter.java index 39009b1eb568..28d35a296d25 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingStoredFieldsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingStoredFieldsWriter.java @@ -601,7 +601,7 @@ public int merge(MergeState mergeState) throws IOException { new ArrayList<>(mergeState.storedFieldsReaders.length); for (int i = 0; i < mergeState.storedFieldsReaders.length; i++) { final StoredFieldsReader reader = mergeState.storedFieldsReaders[i]; - reader.checkIntegrity(); + reader.checkIntegrity(mergeState.abortChecker); MergeStrategy mergeStrategy = getMergeStrategy(mergeState, matchingReaders, i); if (mergeStrategy == MergeStrategy.VISITOR) { visitors[i] = new MergeVisitor(mergeState, i); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldMergeState.java b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldMergeState.java index bb67ec618fd2..4ef70bac3d1a 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldMergeState.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldMergeState.java @@ -69,7 +69,8 @@ static MergeState restrictFields(MergeState in, Collection fields) { in.maxDocs, in.infoStream, in.intraMergeTaskExecutor, - in.needsIndexSort); + in.needsIndexSort, + in.abortChecker); } private static class FilterFieldInfos extends FieldInfos { diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 79bada22d57b..82a1c5ad045d 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -3483,6 +3483,14 @@ public void addIndexesReaderMerge(MergePolicy.OneMerge merge) throws IOException } } + final MergePolicy.AbortChecker mergeAbortChecker; + if (config.getMergeAbortCheckIntervalBytes() > 0) { + mergeAbortChecker = + new MergePolicy.AbortChecker(merge, config.getMergeAbortCheckIntervalBytes()); + } else { + mergeAbortChecker = MergePolicy.AbortChecker.NO_OP; + } + SegmentMerger merger = new SegmentMerger( readers, @@ -3491,7 +3499,8 @@ public void addIndexesReaderMerge(MergePolicy.OneMerge merge) throws IOException trackingDir, globalFieldNumberMap, context, - intraMergeExecutor); + intraMergeExecutor, + mergeAbortChecker); try { if (!merger.shouldMerge()) { return; @@ -5284,6 +5293,14 @@ public int length() { } } + final MergePolicy.AbortChecker mergeAbortChecker; + if (config.getMergeAbortCheckIntervalBytes() > 0) { + mergeAbortChecker = + new MergePolicy.AbortChecker(merge, config.getMergeAbortCheckIntervalBytes()); + } else { + mergeAbortChecker = MergePolicy.AbortChecker.NO_OP; + } + final SegmentMerger merger = new SegmentMerger( mergeReaders, @@ -5292,7 +5309,8 @@ public int length() { dirWrapper, globalFieldNumberMap, context, - intraMergeExecutor); + intraMergeExecutor, + mergeAbortChecker); MergeState mergeState = merger.mergeState; MergeState.DocMap[] docMaps; try { diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java index a1e1b7eaad9e..d13638ae4807 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java @@ -561,4 +561,21 @@ public IndexWriterConfig setParentField(String parentField) { this.parentField = parentField; return this; } + + /** + * Expert: sets the interval in bytes between abort checks during merge integrity verification. + * During merges, codec readers verify file integrity by reading entire files to compute + * checksums. This setting controls how often the merge abort flag is checked during these reads. + * Smaller values allow merges to be aborted more promptly but add slight overhead. + * + * @param intervalBytes the interval in bytes, must be positive + */ + public IndexWriterConfig setMergeAbortCheckIntervalBytes(int intervalBytes) { + if (intervalBytes <= 0) { + throw new IllegalArgumentException( + "mergeAbortCheckIntervalBytes must be positive, got: " + intervalBytes); + } + this.mergeAbortCheckIntervalBytes = intervalBytes; + return this; + } } diff --git a/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java b/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java index f08c86329ea6..9c072d8524d7 100644 --- a/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java +++ b/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java @@ -117,6 +117,9 @@ public class LiveIndexWriterConfig { /** The IndexWriter event listener to record key events * */ protected IndexWriterEventListener eventListener; + /** Interval in bytes between abort checks during merge integrity verification. */ + protected volatile int mergeAbortCheckIntervalBytes; + // used by IndexWriterConfig LiveIndexWriterConfig(Analyzer analyzer) { this.analyzer = analyzer; @@ -461,6 +464,14 @@ public IndexWriterEventListener getIndexWriterEventListener() { return eventListener; } + /** + * Returns the interval in bytes between abort checks during merge integrity verification. See + * {@link IndexWriterConfig#setMergeAbortCheckIntervalBytes(int)}. + */ + public int getMergeAbortCheckIntervalBytes() { + return mergeAbortCheckIntervalBytes; + } + /** Returns the parent document field name if configured. */ public String getParentField() { return parentField; @@ -495,6 +506,9 @@ public String toString() { sb.append("leafSorter=").append(getLeafSorter()).append("\n"); sb.append("eventListener=").append(getIndexWriterEventListener()).append("\n"); sb.append("parentField=").append(getParentField()).append("\n"); + sb.append("mergeAbortCheckIntervalBytes=") + .append(getMergeAbortCheckIntervalBytes()) + .append("\n"); return sb.toString(); } } diff --git a/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java b/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java index 0aba20338723..94a996541f21 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java +++ b/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java @@ -570,6 +570,37 @@ public MergeException(Throwable exc) { } } + /** + * Interface for checking whether a merge has been aborted. Implementations should throw {@link + * MergeAbortedException} if the merge should stop. + * + * @lucene.experimental + */ + public static final class AbortChecker { + + /** A no-op checker */ + public static final AbortChecker NO_OP = new AbortChecker(null, 0); + + private final OneMerge oneMerge; + private final int abortCheckIntervalBytes; + + public AbortChecker(OneMerge oneMerge, int abortCheckIntervalBytes) { + assert oneMerge != null || abortCheckIntervalBytes == 0 : abortCheckIntervalBytes; + this.oneMerge = oneMerge; + this.abortCheckIntervalBytes = abortCheckIntervalBytes; + } + + /** Checks if the merge should be aborted, throwing MergeAbortedException if so. */ + public void checkAborted() throws MergeAbortedException { + oneMerge.checkAborted(); + } + + /** Interval in bytes between abort checks during merge integrity verification. */ + public int getAbortCheckIntervalBytes() { + return abortCheckIntervalBytes; + } + } + /** * Thrown when a merge was explicitly aborted because {@link IndexWriter#abortMerges} was called. * Normally this exception is privately caught and suppressed by {@link IndexWriter}. diff --git a/lucene/core/src/java/org/apache/lucene/index/MergeState.java b/lucene/core/src/java/org/apache/lucene/index/MergeState.java index 838699215f0d..a5e2fa0bf04c 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MergeState.java +++ b/lucene/core/src/java/org/apache/lucene/index/MergeState.java @@ -88,6 +88,9 @@ public class MergeState { /** Executor for intra merge activity */ public final Executor intraMergeTaskExecutor; + /** Used to check whether a merge has been aborted */ + public final MergePolicy.AbortChecker abortChecker; + /** Indicates if the index needs to be sorted * */ public boolean needsIndexSort; @@ -96,12 +99,14 @@ public class MergeState { List readers, SegmentInfo segmentInfo, InfoStream infoStream, - Executor intraMergeTaskExecutor) + Executor intraMergeTaskExecutor, + MergePolicy.AbortChecker abortChecker) throws IOException { verifyIndexSort(readers, segmentInfo); this.infoStream = infoStream; int numReaders = readers.size(); this.intraMergeTaskExecutor = intraMergeTaskExecutor; + this.abortChecker = abortChecker; maxDocs = new int[numReaders]; fieldsProducers = new FieldsProducer[numReaders]; @@ -284,7 +289,8 @@ public MergeState( int[] maxDocs, InfoStream infoStream, Executor intraMergeTaskExecutor, - boolean needsIndexSort) { + boolean needsIndexSort, + MergePolicy.AbortChecker abortChecker) { this.docMaps = docMaps; this.segmentInfo = segmentInfo; this.mergeFieldInfos = mergeFieldInfos; @@ -301,5 +307,6 @@ public MergeState( this.infoStream = infoStream; this.intraMergeTaskExecutor = intraMergeTaskExecutor; this.needsIndexSort = needsIndexSort; + this.abortChecker = abortChecker; } } diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java b/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java index 8dab44c1c8e2..15f27f6eae8b 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java @@ -60,13 +60,15 @@ final class SegmentMerger { Directory dir, FieldInfos.FieldNumbers fieldNumbers, IOContext context, - Executor intraMergeTaskExecutor) + Executor intraMergeTaskExecutor, + MergePolicy.AbortChecker abortChecker) throws IOException { if (context.context() != IOContext.Context.MERGE) { throw new IllegalArgumentException( "IOContext.context should be MERGE; got: " + context.context()); } - mergeState = new MergeState(readers, segmentInfo, infoStream, intraMergeTaskExecutor); + mergeState = + new MergeState(readers, segmentInfo, infoStream, intraMergeTaskExecutor, abortChecker); mergeStateCreationThread = Thread.currentThread(); directory = dir; this.codec = segmentInfo.getCodec(); diff --git a/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java b/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java index b3493b93afc6..9dc706c97399 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java @@ -306,6 +306,11 @@ public void checkIntegrity() throws IOException { // We already checkIntegrity the entire reader up front } + @Override + public void checkIntegrity(MergePolicy.AbortChecker abortChecker) throws IOException { + // We already checkIntegrity the entire reader up front + } + @Override public void close() {} }; diff --git a/lucene/core/src/java/org/apache/lucene/index/SlowCompositeCodecReaderWrapper.java b/lucene/core/src/java/org/apache/lucene/index/SlowCompositeCodecReaderWrapper.java index ee639578d512..ea13ec5a97ba 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SlowCompositeCodecReaderWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/index/SlowCompositeCodecReaderWrapper.java @@ -146,9 +146,14 @@ public StoredFieldsReader clone() { @Override public void checkIntegrity() throws IOException { + checkIntegrity(MergePolicy.AbortChecker.NO_OP); + } + + @Override + public void checkIntegrity(MergePolicy.AbortChecker abortChecker) throws IOException { for (StoredFieldsReader reader : readers) { if (reader != null) { - reader.checkIntegrity(); + reader.checkIntegrity(abortChecker); } } } diff --git a/lucene/core/src/java/org/apache/lucene/index/SortingCodecReader.java b/lucene/core/src/java/org/apache/lucene/index/SortingCodecReader.java index 05e61f122577..7190893a087e 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SortingCodecReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/SortingCodecReader.java @@ -514,6 +514,11 @@ public void checkIntegrity() throws IOException { delegate.checkIntegrity(); } + @Override + public void checkIntegrity(MergePolicy.AbortChecker abortChecker) throws IOException { + delegate.checkIntegrity(abortChecker); + } + @Override public void close() throws IOException { delegate.close(); diff --git a/lucene/core/src/test/org/apache/lucene/codecs/TestCodecUtil.java b/lucene/core/src/test/org/apache/lucene/codecs/TestCodecUtil.java index ea47019de6ca..7e0f2d88da9c 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/TestCodecUtil.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/TestCodecUtil.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.util.concurrent.atomic.AtomicLong; import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.MergePolicy; import org.apache.lucene.store.BufferedChecksumIndexInput; import org.apache.lucene.store.ByteBuffersDataOutput; import org.apache.lucene.store.ByteBuffersIndexInput; @@ -99,6 +100,42 @@ public void testChecksumEntireFile() throws Exception { input.close(); } + public void testChecksumEntireFileWithAbortChecker() throws Exception { + ByteBuffersDataOutput out = new ByteBuffersDataOutput(); + IndexOutput output = new ByteBuffersIndexOutput(out, "temp", "temp"); + CodecUtil.writeHeader(output, "FooBar", 5); + output.writeString("this is the data"); + CodecUtil.writeFooter(output); + output.close(); + + IndexInput input = new ByteBuffersIndexInput(out.toDataInput(), "temp"); + final long checksum = CodecUtil.checksumEntireFile(input); + input.close(); + + // NO_OP abort checker should behave like the single-arg overload + try (IndexInput noop = new ByteBuffersIndexInput(out.toDataInput(), "no op")) { + assertEquals(checksum, CodecUtil.checksumEntireFile(noop, MergePolicy.AbortChecker.NO_OP)); + } + + // Abort checker with a positive value should behave like the single-arg overload when the merge + // is not aborted + try (IndexInput notAborted = new ByteBuffersIndexInput(out.toDataInput(), "not aborted")) { + MergePolicy.AbortChecker checker = + new MergePolicy.AbortChecker(new MergePolicy.OneMerge(), 1); + assertEquals(checksum, CodecUtil.checksumEntireFile(notAborted, checker)); + } + + // When the merge is aborted, checksumming should throw MergeAbortedException + try (IndexInput aborted = new ByteBuffersIndexInput(out.toDataInput(), "aborted")) { + MergePolicy.OneMerge merge = new MergePolicy.OneMerge(); + MergePolicy.AbortChecker checker = new MergePolicy.AbortChecker(merge, 3); + merge.setAborted(); + expectThrows( + MergePolicy.MergeAbortedException.class, + () -> CodecUtil.checksumEntireFile(aborted, checker)); + } + } + public void testCheckFooterValid() throws Exception { ByteBuffersDataOutput out = new ByteBuffersDataOutput(); IndexOutput output = new ByteBuffersIndexOutput(out, "temp", "temp"); diff --git a/lucene/core/src/test/org/apache/lucene/codecs/TestMergedVectorValues.java b/lucene/core/src/test/org/apache/lucene/codecs/TestMergedVectorValues.java index 5f1fc6a2267c..8df8b455bda8 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/TestMergedVectorValues.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/TestMergedVectorValues.java @@ -20,6 +20,7 @@ import java.util.List; import org.apache.lucene.index.ByteVectorValues; import org.apache.lucene.index.FloatVectorValues; +import org.apache.lucene.index.MergePolicy; import org.apache.lucene.index.MergeState; import org.apache.lucene.tests.util.LuceneTestCase; @@ -39,8 +40,23 @@ public void testSkipsInMergedByteVectorValues() throws IOException { new KnnVectorsWriter.ByteVectorValuesSub(x -> x, ByteVectorValues.fromBytes(vectors, 1)); MergeState state = new MergeState( - null, null, null, null, null, null, null, null, null, null, null, null, null, null, - null, false); + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + false, + MergePolicy.AbortChecker.NO_OP); // Run the test ByteVectorValues values = @@ -67,8 +83,23 @@ public void testSkipsInMergedFloat32VectorValues() throws IOException { new KnnVectorsWriter.FloatVectorValuesSub(x -> x, FloatVectorValues.fromFloats(vectors, 1)); MergeState state = new MergeState( - null, null, null, null, null, null, null, null, null, null, null, null, null, null, - null, false); + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + false, + MergePolicy.AbortChecker.NO_OP); // Run the test FloatVectorValues values = diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDoc.java b/lucene/core/src/test/org/apache/lucene/index/TestDoc.java index 13384fcb8925..a4b7e1550573 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestDoc.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestDoc.java @@ -239,7 +239,8 @@ private SegmentCommitInfo merge( trackingDir, new FieldInfos.FieldNumbers(null, null), context, - new SameThreadExecutorService()); + new SameThreadExecutorService(), + MergePolicy.AbortChecker.NO_OP); merger.merge(); merger.cleanupMerge(); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java b/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java index 762e934a636b..1e6a97e4b6cf 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java @@ -107,7 +107,8 @@ public void testMerge() throws IOException { mergedDir, new FieldInfos.FieldNumbers(null, null), newIOContext(random(), IOContext.merge(new MergeInfo(-1, -1, false, -1))), - new SameThreadExecutorService()); + new SameThreadExecutorService(), + MergePolicy.AbortChecker.NO_OP); MergeState mergeState = merger.merge(); merger.cleanupMerge(); int docsMerged = mergeState.segmentInfo.maxDoc(); diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/asserting/AssertingStoredFieldsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/asserting/AssertingStoredFieldsFormat.java index 2ea955c3c01a..0b4ac7fa7ed0 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/asserting/AssertingStoredFieldsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/asserting/AssertingStoredFieldsFormat.java @@ -23,6 +23,7 @@ import org.apache.lucene.codecs.StoredFieldsWriter; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.MergePolicy; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.StoredFieldDataInput; import org.apache.lucene.index.StoredFieldVisitor; @@ -88,6 +89,11 @@ public void checkIntegrity() throws IOException { in.checkIntegrity(); } + @Override + public void checkIntegrity(MergePolicy.AbortChecker abortChecker) throws IOException { + in.checkIntegrity(abortChecker); + } + @Override public StoredFieldsReader getMergeInstance() { return new AssertingStoredFieldsReader(in.getMergeInstance(), maxDoc, true); diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/index/MismatchedCodecReader.java b/lucene/test-framework/src/java/org/apache/lucene/tests/index/MismatchedCodecReader.java index 8c856aafcba2..a2e7f2bf4423 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/index/MismatchedCodecReader.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/index/MismatchedCodecReader.java @@ -28,6 +28,7 @@ import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.FilterCodecReader; +import org.apache.lucene.index.MergePolicy; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedNumericDocValues; @@ -97,6 +98,11 @@ public void checkIntegrity() throws IOException { in.checkIntegrity(); } + @Override + public void checkIntegrity(MergePolicy.AbortChecker abortChecker) throws IOException { + in.checkIntegrity(abortChecker); + } + @Override public void document(int docID, StoredFieldVisitor visitor) throws IOException { in.document(docID, new MismatchedLeafReader.MismatchedVisitor(visitor, shuffled)); From a68f87ca1f7684c860357651f9717c8387907a5d Mon Sep 17 00:00:00 2001 From: tlrx Date: Tue, 19 May 2026 17:52:26 +0200 Subject: [PATCH 2/3] check null --- lucene/core/src/java/org/apache/lucene/index/MergePolicy.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java b/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java index 94a996541f21..c6e03c55b7c0 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java +++ b/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java @@ -592,7 +592,9 @@ public AbortChecker(OneMerge oneMerge, int abortCheckIntervalBytes) { /** Checks if the merge should be aborted, throwing MergeAbortedException if so. */ public void checkAborted() throws MergeAbortedException { - oneMerge.checkAborted(); + if (oneMerge != null) { + oneMerge.checkAborted(); + } } /** Interval in bytes between abort checks during merge integrity verification. */ From 227c0216050defe33141f39ced1563869d4264c6 Mon Sep 17 00:00:00 2001 From: tlrx Date: Tue, 19 May 2026 18:18:58 +0200 Subject: [PATCH 3/3] factory method --- .../org/apache/lucene/index/IndexWriter.java | 18 ++++-------------- .../org/apache/lucene/index/MergePolicy.java | 14 ++++++++++++++ .../apache/lucene/codecs/TestCodecUtil.java | 4 ++-- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 82a1c5ad045d..ba70f7fd615e 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -3483,13 +3483,8 @@ public void addIndexesReaderMerge(MergePolicy.OneMerge merge) throws IOException } } - final MergePolicy.AbortChecker mergeAbortChecker; - if (config.getMergeAbortCheckIntervalBytes() > 0) { - mergeAbortChecker = - new MergePolicy.AbortChecker(merge, config.getMergeAbortCheckIntervalBytes()); - } else { - mergeAbortChecker = MergePolicy.AbortChecker.NO_OP; - } + final MergePolicy.AbortChecker mergeAbortChecker = + MergePolicy.AbortChecker.create(merge, config.getMergeAbortCheckIntervalBytes()); SegmentMerger merger = new SegmentMerger( @@ -5293,13 +5288,8 @@ public int length() { } } - final MergePolicy.AbortChecker mergeAbortChecker; - if (config.getMergeAbortCheckIntervalBytes() > 0) { - mergeAbortChecker = - new MergePolicy.AbortChecker(merge, config.getMergeAbortCheckIntervalBytes()); - } else { - mergeAbortChecker = MergePolicy.AbortChecker.NO_OP; - } + final MergePolicy.AbortChecker mergeAbortChecker = + MergePolicy.AbortChecker.create(merge, config.getMergeAbortCheckIntervalBytes()); final SegmentMerger merger = new SegmentMerger( diff --git a/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java b/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java index c6e03c55b7c0..7f4ae7ca8bb6 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java +++ b/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java @@ -578,6 +578,20 @@ public MergeException(Throwable exc) { */ public static final class AbortChecker { + /** + * Creates an {@link AbortChecker} for the given merge. If {@code abortCheckIntervalBytes} is + * zero or negative, returns {@link AbortChecker#NO_OP}. + * + * @param merge the merge to check for abort + * @param abortCheckIntervalBytes interval in bytes between abort checks, or 0 to disable + */ + public static AbortChecker create(OneMerge merge, int abortCheckIntervalBytes) { + if (merge != null && abortCheckIntervalBytes > 0) { + return new AbortChecker(merge, abortCheckIntervalBytes); + } + return AbortChecker.NO_OP; + } + /** A no-op checker */ public static final AbortChecker NO_OP = new AbortChecker(null, 0); diff --git a/lucene/core/src/test/org/apache/lucene/codecs/TestCodecUtil.java b/lucene/core/src/test/org/apache/lucene/codecs/TestCodecUtil.java index 7e0f2d88da9c..de1b289e95d4 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/TestCodecUtil.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/TestCodecUtil.java @@ -121,14 +121,14 @@ public void testChecksumEntireFileWithAbortChecker() throws Exception { // is not aborted try (IndexInput notAborted = new ByteBuffersIndexInput(out.toDataInput(), "not aborted")) { MergePolicy.AbortChecker checker = - new MergePolicy.AbortChecker(new MergePolicy.OneMerge(), 1); + MergePolicy.AbortChecker.create(new MergePolicy.OneMerge(), 1); assertEquals(checksum, CodecUtil.checksumEntireFile(notAborted, checker)); } // When the merge is aborted, checksumming should throw MergeAbortedException try (IndexInput aborted = new ByteBuffersIndexInput(out.toDataInput(), "aborted")) { MergePolicy.OneMerge merge = new MergePolicy.OneMerge(); - MergePolicy.AbortChecker checker = new MergePolicy.AbortChecker(merge, 3); + MergePolicy.AbortChecker checker = MergePolicy.AbortChecker.create(merge, 3); merge.setAborted(); expectThrows( MergePolicy.MergeAbortedException.class,