From ccf51437aec030d6e835f93874b7fbf3890af768 Mon Sep 17 00:00:00 2001 From: Shimpei Kodama Date: Mon, 2 Feb 2026 21:53:34 +0000 Subject: [PATCH 1/6] Avoid unnecessary DisjunctionMaxBulkScorer overhead This change inspects clause bulk scorers up front and only uses DisjunctionMaxBulkScorer if at least one clause provides a non-default BulkScorer. Otherwise, we fall back to the scorer-based path. c88f9334e5c99abbeb4f233f9606873e5037c118 made DisjunctionMaxQuery use DisjunctionMaxBulkScorer when tieBreakerMultiplier == 0 and scoreMode == TOP_SCORES. However, this bulk path primarily pays off when at least one clause implements a specialized BulkScorer. When all clauses return DefaultBulkScorer, the bulk windowing and replay logic adds overhead while preventing effective use of minCompetitiveScore and block-max optimizations that the scorer-based DisjunctionMaxScorer supports in TOP_SCORES mode. In such cases, falling back to the scorer-based path typically results in better performance and restores competitive-score-based skipping. Fixes: https://github.com/apache/lucene/issues/15658 Related PR: https://github.com/apache/lucene/pull/14040 --- .../lucene/search/DisjunctionMaxQuery.java | 27 ++++++++++++++----- .../java/org/apache/lucene/search/Weight.java | 4 +++ 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java index 025a563ba01b..efbe0484ca4e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java @@ -160,14 +160,29 @@ public Scorer get(long leadCost) throws IOException { @Override public BulkScorer bulkScorer() throws IOException { - if (tieBreakerMultiplier == 0f && scoreMode == ScoreMode.TOP_SCORES) { - List scorers = new ArrayList<>(); - for (ScorerSupplier ss : scorerSuppliers) { - scorers.add(ss.bulkScorer()); + if (tieBreakerMultiplier != 0f || scoreMode != ScoreMode.TOP_SCORES) { + return super.bulkScorer(); + } + + List bulkScorers = new ArrayList<>(scorerSuppliers.size()); + for (ScorerSupplier ss : scorerSuppliers) { + bulkScorers.add(ss.bulkScorer()); + } + + // If all are DefaultBulkScorer, combine the underlying scorers directly. + // This avoids the overhead of DisjunctionMaxBulkScorer. + List scorers = new ArrayList<>(bulkScorers.size()); + for (BulkScorer bs : bulkScorers) { + if (bs instanceof Weight.DefaultBulkScorer dbs) { + scorers.add(dbs.getScorer()); + } else { + return new DisjunctionMaxBulkScorer(bulkScorers); } - return new DisjunctionMaxBulkScorer(scorers); } - return super.bulkScorer(); + + return new Weight.DefaultBulkScorer( + new DisjunctionMaxScorer(tieBreakerMultiplier, scorers, scoreMode, Long.MAX_VALUE) + ); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/Weight.java b/lucene/core/src/java/org/apache/lucene/search/Weight.java index c0add319ac5c..903d67586187 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Weight.java +++ b/lucene/core/src/java/org/apache/lucene/search/Weight.java @@ -243,6 +243,10 @@ public DefaultBulkScorer(Scorer scorer) { } } + Scorer getScorer() { + return scorer; + } + @Override public long cost() { return iterator.cost(); From 6231d375dac002bf3e613673689387e451df6fdd Mon Sep 17 00:00:00 2001 From: Shimpei Kodama Date: Mon, 2 Feb 2026 21:59:11 +0000 Subject: [PATCH 2/6] Add changes.txt --- lucene/CHANGES.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index d83e82ee71ca..a3a79437149c 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -342,6 +342,9 @@ Optimizations * GITHUB#15607: Utilize bulk scoring for diversity checking when building HNSW vector indices. This results in some performance improvements during indexing and segment merges. (Ben Trent) +* GITHUB#15658: Avoid DisjunctionMaxQuery bulk scoring regressions when all clauses only + provide DefaultBulkScorer. (Shimpei Kodama) + Bug Fixes --------------------- * GITHUB#14161: PointInSetQuery's constructor now throws IllegalArgumentException From 68ada56464381a7acf094b57f6056899eec408dd Mon Sep 17 00:00:00 2001 From: Shimpei Kodama Date: Tue, 3 Feb 2026 11:36:43 +0000 Subject: [PATCH 3/6] ./gradlew tidy --rerun-tasks --- .../src/java/org/apache/lucene/search/DisjunctionMaxQuery.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java index efbe0484ca4e..9f8c81fd9734 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java @@ -181,8 +181,7 @@ public BulkScorer bulkScorer() throws IOException { } return new Weight.DefaultBulkScorer( - new DisjunctionMaxScorer(tieBreakerMultiplier, scorers, scoreMode, Long.MAX_VALUE) - ); + new DisjunctionMaxScorer(tieBreakerMultiplier, scorers, scoreMode, Long.MAX_VALUE)); } @Override From d828ce2f0c497eeacdc9aba6f9944c7bb6b7ce7a Mon Sep 17 00:00:00 2001 From: Shimpei Kodama Date: Sat, 14 Feb 2026 13:39:48 +0000 Subject: [PATCH 4/6] Revert "./gradlew tidy --rerun-tasks" This reverts commit 68ada56464381a7acf094b57f6056899eec408dd. --- .../src/java/org/apache/lucene/search/DisjunctionMaxQuery.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java index 9f8c81fd9734..efbe0484ca4e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java @@ -181,7 +181,8 @@ public BulkScorer bulkScorer() throws IOException { } return new Weight.DefaultBulkScorer( - new DisjunctionMaxScorer(tieBreakerMultiplier, scorers, scoreMode, Long.MAX_VALUE)); + new DisjunctionMaxScorer(tieBreakerMultiplier, scorers, scoreMode, Long.MAX_VALUE) + ); } @Override From 96ae1854a23fe77047760dc31058971eab427e0d Mon Sep 17 00:00:00 2001 From: Shimpei Kodama Date: Sat, 14 Feb 2026 13:39:50 +0000 Subject: [PATCH 5/6] Revert "Avoid unnecessary DisjunctionMaxBulkScorer overhead" This reverts commit ccf51437aec030d6e835f93874b7fbf3890af768. --- .../lucene/search/DisjunctionMaxQuery.java | 27 +++++-------------- .../java/org/apache/lucene/search/Weight.java | 4 --- 2 files changed, 6 insertions(+), 25 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java index efbe0484ca4e..025a563ba01b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java @@ -160,29 +160,14 @@ public Scorer get(long leadCost) throws IOException { @Override public BulkScorer bulkScorer() throws IOException { - if (tieBreakerMultiplier != 0f || scoreMode != ScoreMode.TOP_SCORES) { - return super.bulkScorer(); - } - - List bulkScorers = new ArrayList<>(scorerSuppliers.size()); - for (ScorerSupplier ss : scorerSuppliers) { - bulkScorers.add(ss.bulkScorer()); - } - - // If all are DefaultBulkScorer, combine the underlying scorers directly. - // This avoids the overhead of DisjunctionMaxBulkScorer. - List scorers = new ArrayList<>(bulkScorers.size()); - for (BulkScorer bs : bulkScorers) { - if (bs instanceof Weight.DefaultBulkScorer dbs) { - scorers.add(dbs.getScorer()); - } else { - return new DisjunctionMaxBulkScorer(bulkScorers); + if (tieBreakerMultiplier == 0f && scoreMode == ScoreMode.TOP_SCORES) { + List scorers = new ArrayList<>(); + for (ScorerSupplier ss : scorerSuppliers) { + scorers.add(ss.bulkScorer()); } + return new DisjunctionMaxBulkScorer(scorers); } - - return new Weight.DefaultBulkScorer( - new DisjunctionMaxScorer(tieBreakerMultiplier, scorers, scoreMode, Long.MAX_VALUE) - ); + return super.bulkScorer(); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/Weight.java b/lucene/core/src/java/org/apache/lucene/search/Weight.java index 903d67586187..c0add319ac5c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Weight.java +++ b/lucene/core/src/java/org/apache/lucene/search/Weight.java @@ -243,10 +243,6 @@ public DefaultBulkScorer(Scorer scorer) { } } - Scorer getScorer() { - return scorer; - } - @Override public long cost() { return iterator.cost(); From fe86cebb57300d858012fe983fbe8461949aca35 Mon Sep 17 00:00:00 2001 From: Shimpei Kodama Date: Sat, 14 Feb 2026 13:48:11 +0000 Subject: [PATCH 6/6] Propagate minCompetitiveScore earlier in DisjunctionMaxBulkScorer The previous DisjunctionMaxBulkScorer uses a fixed 4096-doc window. minCompetitiveScore is only propagated to sub-scorers at window boundaries, so up to 4096 docs may be scored before a new threshold reaches them. For specific types of queries that can eliminate sub-scorers earlier than 4096 evaluation, DisjunctionMaxBulkScorer is slower than the non-bulk DisjunctionMaxScorer. This change update DisjunctionMaxBulkScorer to start with window size 1 and double each iteration, capping at 4096. Small initial windows let minCompetitiveScore propagate frequently while the threshold is rising and sub-scorers are being eliminated. Expected to help most when TOP_SCORES has low N, the query has high match rate, and the disjunction has many clauses. The 13 sub-4096 windows add minor per-window overhead for queries where full-size windows would have been fine. --- lucene/CHANGES.txt | 5 +++-- .../org/apache/lucene/search/DisjunctionMaxBulkScorer.java | 7 ++++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index a3a79437149c..08014f638014 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -342,8 +342,9 @@ Optimizations * GITHUB#15607: Utilize bulk scoring for diversity checking when building HNSW vector indices. This results in some performance improvements during indexing and segment merges. (Ben Trent) -* GITHUB#15658: Avoid DisjunctionMaxQuery bulk scoring regressions when all clauses only - provide DefaultBulkScorer. (Shimpei Kodama) +* GITHUB#15658: DisjunctionMaxBulkScorer now uses progressive window sizing sub-scorers to be skipped + sooner when their scores are no longer competitive (Shimpei Kodama) + Bug Fixes --------------------- diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxBulkScorer.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxBulkScorer.java index e6dc70502709..413494d8090c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxBulkScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxBulkScorer.java @@ -46,6 +46,7 @@ private static class BulkScorerAndNext { private final float[] windowScores = new float[WINDOW_SIZE]; private final PriorityQueue scorers; private final SimpleScorable topLevelScorable = new SimpleScorable(); + private int currentWindowSize = 1; DisjunctionMaxBulkScorer(List scorers) { if (scorers.size() < 2) { @@ -64,7 +65,7 @@ public int score(LeafCollector collector, Bits acceptDocs, int min, int max) thr while (top.next < max) { final int windowMin = Math.max(top.next, min); - final int windowMax = MathUtil.unsignedMin(max, windowMin + WINDOW_SIZE); + final int windowMax = MathUtil.unsignedMin(max, windowMin + currentWindowSize); // First compute matches / scores in the window do { @@ -105,6 +106,10 @@ public void collect(int doc) throws IOException { collector.collect(doc); } + if (currentWindowSize < WINDOW_SIZE) { + currentWindowSize = Math.min(currentWindowSize << 1, WINDOW_SIZE); + } + // Finally clean up state windowMatches.clear(); Arrays.fill(windowScores, 0f);