-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Allow merge integrity checks to be aborted sooner #16086
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,6 +18,7 @@ | |
|
|
||
| import java.io.Closeable; | ||
| import java.io.IOException; | ||
| import org.apache.lucene.index.MergePolicy; | ||
|
|
||
| abstract class FieldsIndex implements Cloneable, Closeable { | ||
|
|
||
|
|
@@ -38,6 +39,9 @@ final long getStartPointer(int docID) { | |
| /** Check the integrity of the index. */ | ||
| abstract void checkIntegrity() throws IOException; | ||
|
|
||
| /** Check the integrity of the index, with periodic merge abort checking. */ | ||
| public abstract void checkIntegrity(MergePolicy.AbortChecker abortChecker) throws IOException; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since we are only adding "check every N bytes", can we rename |
||
|
|
||
| @Override | ||
| public abstract FieldsIndex clone(); | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -561,4 +561,21 @@ public IndexWriterConfig setParentField(String parentField) { | |
| this.parentField = parentField; | ||
| return this; | ||
| } | ||
|
|
||
| /** | ||
| * Expert: sets the interval in bytes between abort checks during merge integrity verification. | ||
| * During merges, codec readers verify file integrity by reading entire files to compute | ||
| * checksums. This setting controls how often the merge abort flag is checked during these reads. | ||
| * Smaller values allow merges to be aborted more promptly but add slight overhead. | ||
| * | ||
| * @param intervalBytes the interval in bytes, must be positive | ||
| */ | ||
| public IndexWriterConfig setMergeAbortCheckIntervalBytes(int intervalBytes) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's make this
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. does not need a configurable value at all, just a reasonable one (e.g. 1MB). If someone is doing s3, even setting this to If we can simplify how we do it (see idea on CodecUtil.java), i'd suggest overloading the implementing function with a parameter, to allow unit tests to use a small value, but it doesn't need to be public. |
||
| if (intervalBytes <= 0) { | ||
| throw new IllegalArgumentException( | ||
| "mergeAbortCheckIntervalBytes must be positive, got: " + intervalBytes); | ||
| } | ||
| this.mergeAbortCheckIntervalBytes = intervalBytes; | ||
| return this; | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -117,6 +117,9 @@ public class LiveIndexWriterConfig { | |
| /** The IndexWriter event listener to record key events * */ | ||
| protected IndexWriterEventListener eventListener; | ||
|
|
||
| /** Interval in bytes between abort checks during merge integrity verification. */ | ||
| protected volatile int mergeAbortCheckIntervalBytes; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it only |
||
|
|
||
| // used by IndexWriterConfig | ||
| LiveIndexWriterConfig(Analyzer analyzer) { | ||
| this.analyzer = analyzer; | ||
|
|
@@ -461,6 +464,14 @@ public IndexWriterEventListener getIndexWriterEventListener() { | |
| return eventListener; | ||
| } | ||
|
|
||
| /** | ||
| * Returns the interval in bytes between abort checks during merge integrity verification. See | ||
| * {@link IndexWriterConfig#setMergeAbortCheckIntervalBytes(int)}. | ||
| */ | ||
| public int getMergeAbortCheckIntervalBytes() { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| return mergeAbortCheckIntervalBytes; | ||
| } | ||
|
|
||
| /** Returns the parent document field name if configured. */ | ||
| public String getParentField() { | ||
| return parentField; | ||
|
|
@@ -495,6 +506,9 @@ public String toString() { | |
| sb.append("leafSorter=").append(getLeafSorter()).append("\n"); | ||
| sb.append("eventListener=").append(getIndexWriterEventListener()).append("\n"); | ||
| sb.append("parentField=").append(getParentField()).append("\n"); | ||
| sb.append("mergeAbortCheckIntervalBytes=") | ||
| .append(getMergeAbortCheckIntervalBytes()) | ||
| .append("\n"); | ||
| return sb.toString(); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -570,6 +570,53 @@ public MergeException(Throwable exc) { | |
| } | ||
| } | ||
|
|
||
| /** | ||
| * Interface for checking whether a merge has been aborted. Implementations should throw {@link | ||
| * MergeAbortedException} if the merge should stop. | ||
| * | ||
| * @lucene.experimental | ||
| */ | ||
| public static final class AbortChecker { | ||
|
|
||
| /** | ||
| * Creates an {@link AbortChecker} for the given merge. If {@code abortCheckIntervalBytes} is | ||
| * zero or negative, returns {@link AbortChecker#NO_OP}. | ||
| * | ||
| * @param merge the merge to check for abort | ||
| * @param abortCheckIntervalBytes interval in bytes between abort checks, or 0 to disable | ||
| */ | ||
| public static AbortChecker create(OneMerge merge, int abortCheckIntervalBytes) { | ||
| if (merge != null && abortCheckIntervalBytes > 0) { | ||
| return new AbortChecker(merge, abortCheckIntervalBytes); | ||
| } | ||
| return AbortChecker.NO_OP; | ||
| } | ||
|
|
||
| /** A no-op checker */ | ||
| public static final AbortChecker NO_OP = new AbortChecker(null, 0); | ||
|
|
||
| private final OneMerge oneMerge; | ||
| private final int abortCheckIntervalBytes; | ||
|
|
||
| public AbortChecker(OneMerge oneMerge, int abortCheckIntervalBytes) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's make this private and do all construction via the factory method. |
||
| assert oneMerge != null || abortCheckIntervalBytes == 0 : abortCheckIntervalBytes; | ||
| this.oneMerge = oneMerge; | ||
| this.abortCheckIntervalBytes = abortCheckIntervalBytes; | ||
| } | ||
|
|
||
| /** Checks if the merge should be aborted, throwing MergeAbortedException if so. */ | ||
| public void checkAborted() throws MergeAbortedException { | ||
| if (oneMerge != null) { | ||
| oneMerge.checkAborted(); | ||
| } | ||
| } | ||
|
|
||
| /** Interval in bytes between abort checks during merge integrity verification. */ | ||
| public int getAbortCheckIntervalBytes() { | ||
| return abortCheckIntervalBytes; | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Thrown when a merge was explicitly aborted because {@link IndexWriter#abortMerges} was called. | ||
| * Normally this exception is privately caught and suppressed by {@link IndexWriter}. | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm, you're adding another level of buffering / copying, when someone turns on this new setting? I think? Can we subclass
FilterIndexInputinstead, so it's simply/only the added accounting (tracking total bytes written) and not more buffering / copying?