From f7eb42826923867b3b20b766ff6f86c938adb79f Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Wed, 20 May 2026 15:54:31 +0200 Subject: [PATCH 1/6] Replace deprecated search(Query, Collector) with CollectorManager in JoinUtil For the ScoreMode.None + no min/max path, replace the deprecated IndexSearcher#search(Query, Collector) call with a CollectorManager that creates a GlobalOrdinalsCollector per slice and merges their LongBitSets with OR in reduce(). This also enables true parallel collection when the searcher has an executor. Switch testSimpleOrdinalsJoin and testOrdinalsJoinExplainNoMatches to use newSearcher() so the concurrent merge path is covered probabilistically. --- lucene/CHANGES.txt | 3 +++ .../apache/lucene/search/join/JoinUtil.java | 27 ++++++++++++++++--- .../lucene/search/join/TestJoinUtil.java | 4 +-- 3 files changed, 28 insertions(+), 6 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 7c6b010af124..4f78322f039a 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -137,6 +137,9 @@ Improvements * GITHUB#15558: Refactor QueryCache for performance. (Sagar Upadhyaya) +* GITHUB#XXXX: Replace deprecated IndexSearcher#search(Query, Collector) usage with CollectorManager + in JoinUtil#createJoinQuery for the GlobalOrdinalsCollector case. (Luca Cavanna) + Optimizations --------------------- * GITHUB#15681, GITHUB#15833, GITHUB#16056: Replace pre-sized array or empty array with lambda expression to call Collection#toArray. (Zhou Hui) diff --git a/lucene/join/src/java/org/apache/lucene/search/join/JoinUtil.java b/lucene/join/src/java/org/apache/lucene/search/join/JoinUtil.java index 32f4f58e6c0a..87b02d4e7b6f 100644 --- a/lucene/join/src/java/org/apache/lucene/search/join/JoinUtil.java +++ b/lucene/join/src/java/org/apache/lucene/search/join/JoinUtil.java @@ -18,6 +18,7 @@ import java.io.IOException; import java.util.Arrays; +import java.util.Collection; import java.util.Iterator; import java.util.Locale; import org.apache.lucene.document.DoublePoint; @@ -39,6 +40,7 @@ import org.apache.lucene.internal.hppc.LongHashSet; import org.apache.lucene.internal.hppc.LongIntHashMap; import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.PointInSetQuery; @@ -47,6 +49,7 @@ import org.apache.lucene.search.SimpleCollector; import org.apache.lucene.search.join.DocValuesTermsCollector.Function; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LongBitSet; /** * Utility for query time joining. @@ -530,6 +533,7 @@ public static Query createJoinQuery( final Query rewrittenFromQuery = searcher.rewrite(fromQuery); final Query rewrittenToQuery = searcher.rewrite(toQuery); + final OrdinalMap finalOrdinalMap = ordinalMap; GlobalOrdinalsWithScoreCollector globalOrdinalsWithScoreCollector; switch (scoreMode) { case Total: @@ -550,11 +554,26 @@ public static Query createJoinQuery( break; case None: if (min <= 1 && max == Integer.MAX_VALUE) { - GlobalOrdinalsCollector globalOrdinalsCollector = - new GlobalOrdinalsCollector(joinField, ordinalMap, valueCount); - searcher.search(rewrittenFromQuery, globalOrdinalsCollector); + LongBitSet collectedOrds = + searcher.search( + rewrittenFromQuery, + new CollectorManager() { + @Override + public GlobalOrdinalsCollector newCollector() { + return new GlobalOrdinalsCollector(joinField, finalOrdinalMap, valueCount); + } + + @Override + public LongBitSet reduce(Collection collectors) { + LongBitSet result = new LongBitSet(valueCount); + for (GlobalOrdinalsCollector c : collectors) { + result.or(c.getCollectorOrdinals()); + } + return result; + } + }); return new GlobalOrdinalsQuery( - globalOrdinalsCollector.getCollectorOrdinals(), + collectedOrds, joinField, ordinalMap, rewrittenToQuery, diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java b/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java index 2f815a8357fd..f4418a92c5c9 100644 --- a/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java +++ b/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java @@ -282,7 +282,7 @@ public void testSimpleOrdinalsJoin() throws Exception { doc.add(new SortedDocValuesField(joinField, new BytesRef("2"))); w.addDocument(doc); - IndexSearcher indexSearcher = new IndexSearcher(w.getReader()); + IndexSearcher indexSearcher = newSearcher(w.getReader()); w.close(); IndexReader r = indexSearcher.getIndexReader(); @@ -397,7 +397,7 @@ public void testOrdinalsJoinExplainNoMatches() throws Exception { w.addDocument(doc); IndexReader r = DirectoryReader.open(w); - IndexSearcher indexSearcher = new IndexSearcher(r); + IndexSearcher indexSearcher = newSearcher(r); SortedDocValues[] values = new SortedDocValues[r.leaves().size()]; for (int i = 0; i < values.length; i++) { LeafReader leafReader = r.leaves().get(i).reader(); From 9912bcc4c91bb22eddebbdd63563bf74a7d62262 Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Wed, 20 May 2026 16:01:39 +0200 Subject: [PATCH 2/6] changes entry --- lucene/CHANGES.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 4f78322f039a..e748fd5eba48 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -137,9 +137,6 @@ Improvements * GITHUB#15558: Refactor QueryCache for performance. (Sagar Upadhyaya) -* GITHUB#XXXX: Replace deprecated IndexSearcher#search(Query, Collector) usage with CollectorManager - in JoinUtil#createJoinQuery for the GlobalOrdinalsCollector case. (Luca Cavanna) - Optimizations --------------------- * GITHUB#15681, GITHUB#15833, GITHUB#16056: Replace pre-sized array or empty array with lambda expression to call Collection#toArray. (Zhou Hui) @@ -346,6 +343,9 @@ Improvements * GITHUB#16000: Clarify that Accountable#ramBytesUsed() reports JVM heap memory only. (Luca Cavanna) +* GITHUB#16091: Replace deprecated IndexSearcher#search(Query, Collector) usage with CollectorManager + in JoinUtil#createJoinQuery for the GlobalOrdinalsCollector case. (Luca Cavanna) + Optimizations --------------------- * GITHUB#15861: Optimise PhraseScorer by short circuiting non competitive documents in TOP_SCORES mode. (Prithvi S) From 34d109051ed7f72311693940a2324b59ec95d475 Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Wed, 27 May 2026 14:41:02 +0200 Subject: [PATCH 3/6] iter --- .../search/join/GlobalOrdinalsCollector.java | 108 ---------------- .../join/GlobalOrdinalsCollectorManager.java | 117 ++++++++++++++++++ .../apache/lucene/search/join/JoinUtil.java | 24 +--- 3 files changed, 119 insertions(+), 130 deletions(-) delete mode 100644 lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsCollector.java create mode 100644 lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsCollectorManager.java diff --git a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsCollector.java b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsCollector.java deleted file mode 100644 index 4ae8849f1ea6..000000000000 --- a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsCollector.java +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.lucene.search.join; - -import java.io.IOException; -import org.apache.lucene.index.DocValues; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.OrdinalMap; -import org.apache.lucene.index.SortedDocValues; -import org.apache.lucene.search.Collector; -import org.apache.lucene.search.LeafCollector; -import org.apache.lucene.search.Scorable; -import org.apache.lucene.util.LongBitSet; -import org.apache.lucene.util.LongValues; - -/** - * A collector that collects all ordinals from a specified field matching the query. - * - * @lucene.experimental - */ -final class GlobalOrdinalsCollector implements Collector { - - final String field; - final LongBitSet collectedOrds; - final OrdinalMap ordinalMap; - - GlobalOrdinalsCollector(String field, OrdinalMap ordinalMap, long valueCount) { - this.field = field; - this.ordinalMap = ordinalMap; - this.collectedOrds = new LongBitSet(valueCount); - } - - public LongBitSet getCollectorOrdinals() { - return collectedOrds; - } - - @Override - public org.apache.lucene.search.ScoreMode scoreMode() { - return org.apache.lucene.search.ScoreMode.COMPLETE_NO_SCORES; - } - - @Override - public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { - SortedDocValues docTermOrds = DocValues.getSorted(context.reader(), field); - if (ordinalMap != null) { - LongValues segmentOrdToGlobalOrdLookup = ordinalMap.getGlobalOrds(context.ord); - return new OrdinalMapCollector(docTermOrds, segmentOrdToGlobalOrdLookup); - } else { - return new SegmentOrdinalCollector(docTermOrds); - } - } - - final class OrdinalMapCollector implements LeafCollector { - - private final SortedDocValues docTermOrds; - private final LongValues segmentOrdToGlobalOrdLookup; - - OrdinalMapCollector(SortedDocValues docTermOrds, LongValues segmentOrdToGlobalOrdLookup) { - this.docTermOrds = docTermOrds; - this.segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdLookup; - } - - @Override - public void collect(int doc) throws IOException { - if (docTermOrds.advanceExact(doc)) { - long segmentOrd = docTermOrds.ordValue(); - long globalOrd = segmentOrdToGlobalOrdLookup.get(segmentOrd); - collectedOrds.set(globalOrd); - } - } - - @Override - public void setScorer(Scorable scorer) throws IOException {} - } - - final class SegmentOrdinalCollector implements LeafCollector { - - private final SortedDocValues docTermOrds; - - SegmentOrdinalCollector(SortedDocValues docTermOrds) { - this.docTermOrds = docTermOrds; - } - - @Override - public void collect(int doc) throws IOException { - if (docTermOrds.advanceExact(doc)) { - collectedOrds.set(docTermOrds.ordValue()); - } - } - - @Override - public void setScorer(Scorable scorer) throws IOException {} - } -} diff --git a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsCollectorManager.java b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsCollectorManager.java new file mode 100644 index 000000000000..b42764fdf170 --- /dev/null +++ b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsCollectorManager.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search.join; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.OrdinalMap; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; +import org.apache.lucene.search.LeafCollector; +import org.apache.lucene.search.Scorable; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.util.LongBitSet; +import org.apache.lucene.util.LongValues; + +/** + * A {@link CollectorManager} that collects all ordinals from a specified field matching the query. + * Each per-slice collector tracks only segment-local ordinals (sized to the segment's value count), + * and {@link #reduce} remaps them to global ordinals via the {@link OrdinalMap}. + */ +final class GlobalOrdinalsCollectorManager + implements CollectorManager { + + private final String field; + private final OrdinalMap ordinalMap; + private final long valueCount; + + GlobalOrdinalsCollectorManager(String field, OrdinalMap ordinalMap, long valueCount) { + this.field = field; + this.ordinalMap = ordinalMap; + this.valueCount = valueCount; + } + + @Override + public SegmentLocalCollector newCollector() { + return new SegmentLocalCollector(); + } + + @Override + public LongBitSet reduce(Collection collectors) throws IOException { + LongBitSet result = new LongBitSet(valueCount); + for (SegmentLocalCollector collector : collectors) { + for (int i = 0; i < collector.segmentBits.size(); i++) { + LongBitSet segmentBits = collector.segmentBits.get(i); + if (ordinalMap != null) { + LongValues segToGlobal = ordinalMap.getGlobalOrds(collector.segmentOrds.get(i)); + for (long ord = segmentBits.nextSetBit(0); ord != -1; ) { + result.set(segToGlobal.get(ord)); + long next = ord + 1; + ord = next < segmentBits.length() ? segmentBits.nextSetBit(next) : -1; + } + } else { + result.or(segmentBits); + } + } + } + return result; + } + + final class SegmentLocalCollector implements Collector { + + final ArrayList segmentOrds = new ArrayList<>(); + final ArrayList segmentBits = new ArrayList<>(); + + @Override + public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { + SortedDocValues docTermOrds = DocValues.getSorted(context.reader(), field); + long segmentValueCount = docTermOrds.getValueCount(); + if (segmentValueCount == 0) { + return new LeafCollector() { + @Override + public void setScorer(Scorable scorer) {} + + @Override + public void collect(int doc) {} + }; + } + LongBitSet bits = new LongBitSet(segmentValueCount); + segmentOrds.add(context.ord); + segmentBits.add(bits); + return new LeafCollector() { + @Override + public void setScorer(Scorable scorer) {} + + @Override + public void collect(int doc) throws IOException { + if (docTermOrds.advanceExact(doc)) { + bits.set(docTermOrds.ordValue()); + } + } + }; + } + + @Override + public ScoreMode scoreMode() { + return ScoreMode.COMPLETE_NO_SCORES; + } + } +} diff --git a/lucene/join/src/java/org/apache/lucene/search/join/JoinUtil.java b/lucene/join/src/java/org/apache/lucene/search/join/JoinUtil.java index 87b02d4e7b6f..a911cac68240 100644 --- a/lucene/join/src/java/org/apache/lucene/search/join/JoinUtil.java +++ b/lucene/join/src/java/org/apache/lucene/search/join/JoinUtil.java @@ -18,7 +18,6 @@ import java.io.IOException; import java.util.Arrays; -import java.util.Collection; import java.util.Iterator; import java.util.Locale; import org.apache.lucene.document.DoublePoint; @@ -40,7 +39,6 @@ import org.apache.lucene.internal.hppc.LongHashSet; import org.apache.lucene.internal.hppc.LongIntHashMap; import org.apache.lucene.search.Collector; -import org.apache.lucene.search.CollectorManager; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.PointInSetQuery; @@ -49,7 +47,6 @@ import org.apache.lucene.search.SimpleCollector; import org.apache.lucene.search.join.DocValuesTermsCollector.Function; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.LongBitSet; /** * Utility for query time joining. @@ -533,7 +530,6 @@ public static Query createJoinQuery( final Query rewrittenFromQuery = searcher.rewrite(fromQuery); final Query rewrittenToQuery = searcher.rewrite(toQuery); - final OrdinalMap finalOrdinalMap = ordinalMap; GlobalOrdinalsWithScoreCollector globalOrdinalsWithScoreCollector; switch (scoreMode) { case Total: @@ -554,26 +550,10 @@ public static Query createJoinQuery( break; case None: if (min <= 1 && max == Integer.MAX_VALUE) { - LongBitSet collectedOrds = + return new GlobalOrdinalsQuery( searcher.search( rewrittenFromQuery, - new CollectorManager() { - @Override - public GlobalOrdinalsCollector newCollector() { - return new GlobalOrdinalsCollector(joinField, finalOrdinalMap, valueCount); - } - - @Override - public LongBitSet reduce(Collection collectors) { - LongBitSet result = new LongBitSet(valueCount); - for (GlobalOrdinalsCollector c : collectors) { - result.or(c.getCollectorOrdinals()); - } - return result; - } - }); - return new GlobalOrdinalsQuery( - collectedOrds, + new GlobalOrdinalsCollectorManager(joinField, ordinalMap, valueCount)), joinField, ordinalMap, rewrittenToQuery, From 88fe6f6ef1a696da9c4483376da93ab3a08c165e Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Thu, 4 Jun 2026 10:59:38 +0200 Subject: [PATCH 4/6] iter --- .../join/GlobalOrdinalsCollectorManager.java | 34 +++++++++++++------ 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsCollectorManager.java b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsCollectorManager.java index b42764fdf170..a510058afebd 100644 --- a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsCollectorManager.java +++ b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsCollectorManager.java @@ -19,6 +19,8 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collection; +import java.util.HashMap; +import java.util.Map; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.OrdinalMap; @@ -57,19 +59,31 @@ public SegmentLocalCollector newCollector() { @Override public LongBitSet reduce(Collection collectors) throws IOException { LongBitSet result = new LongBitSet(valueCount); + // Group bitsets by segment ord. When allowSegmentPartitions=true splits a segment across + // multiple slices, each slice produces a separate collector for the same segment. Merging + // them here ensures we call getGlobalOrds() exactly once per segment. + Map mergedBySegment = new HashMap<>(); for (SegmentLocalCollector collector : collectors) { for (int i = 0; i < collector.segmentBits.size(); i++) { - LongBitSet segmentBits = collector.segmentBits.get(i); - if (ordinalMap != null) { - LongValues segToGlobal = ordinalMap.getGlobalOrds(collector.segmentOrds.get(i)); - for (long ord = segmentBits.nextSetBit(0); ord != -1; ) { - result.set(segToGlobal.get(ord)); - long next = ord + 1; - ord = next < segmentBits.length() ? segmentBits.nextSetBit(next) : -1; - } - } else { - result.or(segmentBits); + int segOrd = collector.segmentOrds.get(i); + LongBitSet bits = collector.segmentBits.get(i); + mergedBySegment.merge(segOrd, bits, (a, b) -> { + a.or(b); + return a; + }); + } + } + for (Map.Entry entry : mergedBySegment.entrySet()) { + LongBitSet segmentBits = entry.getValue(); + if (ordinalMap != null) { + LongValues segToGlobal = ordinalMap.getGlobalOrds(entry.getKey()); + for (long ord = segmentBits.nextSetBit(0); ord != -1; ) { + result.set(segToGlobal.get(ord)); + long next = ord + 1; + ord = next < segmentBits.length() ? segmentBits.nextSetBit(next) : -1; } + } else { + result.or(segmentBits); } } return result; From e27377c2e3520c65e837d77b6cf056b0b81f2845 Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Thu, 4 Jun 2026 12:06:46 +0200 Subject: [PATCH 5/6] tidy --- .../search/join/GlobalOrdinalsCollectorManager.java | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsCollectorManager.java b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsCollectorManager.java index a510058afebd..9cda473e4aa7 100644 --- a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsCollectorManager.java +++ b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsCollectorManager.java @@ -67,10 +67,13 @@ public LongBitSet reduce(Collection collectors) throws IO for (int i = 0; i < collector.segmentBits.size(); i++) { int segOrd = collector.segmentOrds.get(i); LongBitSet bits = collector.segmentBits.get(i); - mergedBySegment.merge(segOrd, bits, (a, b) -> { - a.or(b); - return a; - }); + mergedBySegment.merge( + segOrd, + bits, + (a, b) -> { + a.or(b); + return a; + }); } } for (Map.Entry entry : mergedBySegment.entrySet()) { From b22df3ef6a9f182f9adc290710a367b2c5e84352 Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Tue, 16 Jun 2026 09:05:42 +0200 Subject: [PATCH 6/6] fix changes --- lucene/CHANGES.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 5d6bf5e943ea..ea06f0dd005a 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -408,7 +408,7 @@ Improvements * GITHUB#16091: Replace deprecated IndexSearcher#search(Query, Collector) usage with CollectorManager in JoinUtil#createJoinQuery for the GlobalOrdinalsCollector case. (Luca Cavanna) - + Optimizations --------------------- * GITHUB#16222: MultiTermQuery constant-score wrapper now defers term collection to ScorerSupplier#get()