Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,9 @@ Improvements
DiversifiedTopDocsCollector, removing usages of the deprecated IndexSearcher#search(Query, Collector).
(Luca Cavanna)

* GITHUB#16264: Check if merge is aborted before executing file integrity checks to avoid
costly full-file checksums on segments when the merge has already been cancelled. (Tanguy Leroux)

Optimizations
---------------------
* GITHUB#16222: MultiTermQuery constant-score wrapper now defers term collection to ScorerSupplier#get()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ public abstract void addSortedSetField(FieldInfo field, DocValuesProducer values
public void merge(MergeState mergeState) throws IOException {
for (DocValuesProducer docValuesProducer : mergeState.docValuesProducers) {
if (docValuesProducer != null) {
mergeState.checkAborted();
docValuesProducer.checkIntegrity();
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ public void merge(MergeState mergeState, NormsProducer norms) throws IOException

final int maxDoc = mergeState.maxDocs[readerIndex];
if (f != null) {
mergeState.checkAborted();
f.checkIntegrity();
slices.add(new ReaderSlice(docBase, maxDoc, readerIndex));
fields.add(f);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ public final void merge(MergeState mergeState) throws IOException {
KnnVectorsReader reader = mergeState.knnVectorsReaders[i];
assert reader != null || mergeState.fieldInfos[i].hasVectorValues() == false;
if (reader != null) {
mergeState.checkAborted();
reader.checkIntegrity();
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ public abstract void addNormsField(FieldInfo field, NormsProducer normsProducer)
public void merge(MergeState mergeState) throws IOException {
for (NormsProducer normsProducer : mergeState.normsProducers) {
if (normsProducer != null) {
mergeState.checkAborted();
normsProducer.checkIntegrity();
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@ public void merge(MergeState mergeState) throws IOException {
// check each incoming reader
for (PointsReader reader : mergeState.pointsReaders) {
if (reader != null) {
mergeState.checkAborted();
reader.checkIntegrity();
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ public int merge(MergeState mergeState) throws IOException {
List<StoredFieldsMergeSub> subs = new ArrayList<>();
for (int i = 0; i < mergeState.storedFieldsReaders.length; i++) {
StoredFieldsReader storedFieldsReader = mergeState.storedFieldsReaders[i];
mergeState.checkAborted();
storedFieldsReader.checkIntegrity();
subs.add(
new StoredFieldsMergeSub(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ public int merge(MergeState mergeState) throws IOException {
for (int i = 0; i < mergeState.termVectorsReaders.length; i++) {
TermVectorsReader reader = mergeState.termVectorsReaders[i];
if (reader != null) {
mergeState.checkAborted();
reader.checkIntegrity();
}
subs.add(new TermVectorsMergeSub(mergeState.docMaps[i], reader, mergeState.maxDocs[i]));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ public void merge(MergeState mergeState) throws IOException {
}
for (PointsReader reader : mergeState.pointsReaders) {
if (reader != null) {
mergeState.checkAborted();
reader.checkIntegrity();
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -601,6 +601,7 @@ public int merge(MergeState mergeState) throws IOException {
new ArrayList<>(mergeState.storedFieldsReaders.length);
for (int i = 0; i < mergeState.storedFieldsReaders.length; i++) {
final StoredFieldsReader reader = mergeState.storedFieldsReaders[i];
mergeState.checkAborted();
reader.checkIntegrity();
MergeStrategy mergeStrategy = getMergeStrategy(mergeState, matchingReaders, i);
if (mergeStrategy == MergeStrategy.VISITOR) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -898,6 +898,7 @@ public int merge(MergeState mergeState) throws IOException {
for (int i = 0; i < numReaders; i++) {
final TermVectorsReader reader = mergeState.termVectorsReaders[i];
if (reader != null) {
mergeState.checkAborted();
reader.checkIntegrity();
}
final boolean bulkMerge = canPerformBulkMerge(mergeState, matchingReaders, i);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ static MergeState restrictFields(MergeState in, Collection<String> fields) {
in.maxDocs,
in.infoStream,
in.intraMergeTaskExecutor,
in.needsIndexSort);
in.needsIndexSort,
in.oneMerge);
}

private static class FilterFieldInfos extends FieldInfos {
Expand Down
6 changes: 4 additions & 2 deletions lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -3491,7 +3491,8 @@ public void addIndexesReaderMerge(MergePolicy.OneMerge merge) throws IOException
trackingDir,
globalFieldNumberMap,
context,
intraMergeExecutor);
intraMergeExecutor,
merge);
try {
if (!merger.shouldMerge()) {
return;
Expand Down Expand Up @@ -5292,7 +5293,8 @@ public int length() {
dirWrapper,
globalFieldNumberMap,
context,
intraMergeExecutor);
intraMergeExecutor,
merge);
MergeState mergeState = merger.mergeState;
MergeState.DocMap[] docMaps;
try {
Expand Down
26 changes: 24 additions & 2 deletions lucene/core/src/java/org/apache/lucene/index/MergeState.java
Original file line number Diff line number Diff line change
Expand Up @@ -91,14 +91,24 @@ public class MergeState {
/** Indicates if the index needs to be sorted * */
public boolean needsIndexSort;

/**
* The merge that this state is associated with, or {@code null} if this merge state is not
* associated with an {@link IndexWriter} merge (e.g. for addIndexes).
*
* @lucene.internal
*/
public final MergePolicy.OneMerge oneMerge;

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oneMerge is public, do we really want to expose the raw OneMerge on MergeState's public API? checkAborted() method is all the codecs need, and that's already public..making the field package-private (or at least @lucene.internal) would avoid leaking merge internals to custom codec implementations!

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, that's a valid point.

I added the @lucene.internal annotation in db5d684 since org.apache.lucene.codecs.perfield.PerFieldMergeState sits in a different package and requires oneMerge to be public in order to propagate it when creating a restricted MergeState.


/** Sole constructor. */
MergeState(
List<CodecReader> readers,
SegmentInfo segmentInfo,
InfoStream infoStream,
Executor intraMergeTaskExecutor)
Executor intraMergeTaskExecutor,
MergePolicy.OneMerge oneMerge)
throws IOException {
verifyIndexSort(readers, segmentInfo);
this.oneMerge = oneMerge;
this.infoStream = infoStream;
int numReaders = readers.size();
this.intraMergeTaskExecutor = intraMergeTaskExecutor;
Expand Down Expand Up @@ -230,6 +240,16 @@ private DocMap[] buildDocMaps(List<CodecReader> readers, Sort indexSort) throws
}
}

/**
* Checks if the merge has been aborted, throwing {@link MergePolicy.MergeAbortedException} if so.
* This is a no-op if this merge state is not associated with an {@link IndexWriter} merge.
*/
public void checkAborted() throws MergePolicy.MergeAbortedException {
if (oneMerge != null) {
oneMerge.checkAborted();
}
}

private static void verifyIndexSort(List<CodecReader> readers, SegmentInfo segmentInfo) {
Sort indexSort = segmentInfo.getIndexSort();
if (indexSort == null) {
Expand Down Expand Up @@ -284,7 +304,8 @@ public MergeState(
int[] maxDocs,
InfoStream infoStream,
Executor intraMergeTaskExecutor,
boolean needsIndexSort) {
boolean needsIndexSort,
MergePolicy.OneMerge oneMerge) {
this.docMaps = docMaps;
this.segmentInfo = segmentInfo;
this.mergeFieldInfos = mergeFieldInfos;
Expand All @@ -301,5 +322,6 @@ public MergeState(
this.infoStream = infoStream;
this.intraMergeTaskExecutor = intraMergeTaskExecutor;
this.needsIndexSort = needsIndexSort;
this.oneMerge = oneMerge;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -60,18 +60,20 @@ final class SegmentMerger {
Directory dir,
FieldInfos.FieldNumbers fieldNumbers,
IOContext context,
Executor intraMergeTaskExecutor)
Executor intraMergeTaskExecutor,
MergePolicy.OneMerge oneMerge)
throws IOException {
if (context.context() != IOContext.Context.MERGE) {
throw new IllegalArgumentException(
"IOContext.context should be MERGE; got: " + context.context());
}
mergeState = new MergeState(readers, segmentInfo, infoStream, intraMergeTaskExecutor);
mergeStateCreationThread = Thread.currentThread();
directory = dir;
this.codec = segmentInfo.getCodec();
this.context = context;
this.fieldInfosBuilder = new FieldInfos.Builder(fieldNumbers);
this.mergeState =
new MergeState(readers, segmentInfo, infoStream, intraMergeTaskExecutor, oneMerge);
Version minVersion = Version.LATEST;
for (CodecReader reader : readers) {
Version leafMinVersion = reader.getMetaData().minVersion();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ public void testSkipsInMergedByteVectorValues() throws IOException {
MergeState state =
new MergeState(
null, null, null, null, null, null, null, null, null, null, null, null, null, null,
null, false);
null, false, null);

// Run the test
ByteVectorValues values =
Expand Down Expand Up @@ -68,7 +68,7 @@ public void testSkipsInMergedFloat32VectorValues() throws IOException {
MergeState state =
new MergeState(
null, null, null, null, null, null, null, null, null, null, null, null, null, null,
null, false);
null, false, null);

// Run the test
FloatVectorValues values =
Expand Down
3 changes: 2 additions & 1 deletion lucene/core/src/test/org/apache/lucene/index/TestDoc.java
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,8 @@ private SegmentCommitInfo merge(
trackingDir,
new FieldInfos.FieldNumbers(null, null),
context,
new SameThreadExecutorService());
new SameThreadExecutorService(),
null);

merger.merge();
merger.cleanupMerge();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,8 @@ public void testMerge() throws IOException {
mergedDir,
new FieldInfos.FieldNumbers(null, null),
newIOContext(random(), IOContext.merge(new MergeInfo(-1, -1, false, -1))),
new SameThreadExecutorService());
new SameThreadExecutorService(),
null);
MergeState mergeState = merger.merge();
merger.cleanupMerge();
int docsMerged = mergeState.segmentInfo.maxDoc();
Expand Down
Loading