diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 42a6e8073576..05a661cb723f 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -287,7 +287,8 @@ New Features Improvements --------------------- -(No changes) +* GITHUB#16091: Replace deprecated IndexSearcher#search(Query, Collector) usage with CollectorManager + in JoinUtil#createJoinQuery for the GlobalOrdinalsCollector case. (Luca Cavanna) Optimizations --------------------- diff --git a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsCollector.java b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsCollector.java deleted file mode 100644 index 4ae8849f1ea6..000000000000 --- a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsCollector.java +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.lucene.search.join; - -import java.io.IOException; -import org.apache.lucene.index.DocValues; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.OrdinalMap; -import org.apache.lucene.index.SortedDocValues; -import org.apache.lucene.search.Collector; -import org.apache.lucene.search.LeafCollector; -import org.apache.lucene.search.Scorable; -import org.apache.lucene.util.LongBitSet; -import org.apache.lucene.util.LongValues; - -/** - * A collector that collects all ordinals from a specified field matching the query. - * - * @lucene.experimental - */ -final class GlobalOrdinalsCollector implements Collector { - - final String field; - final LongBitSet collectedOrds; - final OrdinalMap ordinalMap; - - GlobalOrdinalsCollector(String field, OrdinalMap ordinalMap, long valueCount) { - this.field = field; - this.ordinalMap = ordinalMap; - this.collectedOrds = new LongBitSet(valueCount); - } - - public LongBitSet getCollectorOrdinals() { - return collectedOrds; - } - - @Override - public org.apache.lucene.search.ScoreMode scoreMode() { - return org.apache.lucene.search.ScoreMode.COMPLETE_NO_SCORES; - } - - @Override - public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { - SortedDocValues docTermOrds = DocValues.getSorted(context.reader(), field); - if (ordinalMap != null) { - LongValues segmentOrdToGlobalOrdLookup = ordinalMap.getGlobalOrds(context.ord); - return new OrdinalMapCollector(docTermOrds, segmentOrdToGlobalOrdLookup); - } else { - return new SegmentOrdinalCollector(docTermOrds); - } - } - - final class OrdinalMapCollector implements LeafCollector { - - private final SortedDocValues docTermOrds; - private final LongValues segmentOrdToGlobalOrdLookup; - - OrdinalMapCollector(SortedDocValues docTermOrds, LongValues segmentOrdToGlobalOrdLookup) { - this.docTermOrds = docTermOrds; - this.segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdLookup; - } - - @Override - public void collect(int doc) throws IOException { - if (docTermOrds.advanceExact(doc)) { - long segmentOrd = docTermOrds.ordValue(); - long globalOrd = segmentOrdToGlobalOrdLookup.get(segmentOrd); - collectedOrds.set(globalOrd); - } - } - - @Override - public void setScorer(Scorable scorer) throws IOException {} - } - - final class SegmentOrdinalCollector implements LeafCollector { - - private final SortedDocValues docTermOrds; - - SegmentOrdinalCollector(SortedDocValues docTermOrds) { - this.docTermOrds = docTermOrds; - } - - @Override - public void collect(int doc) throws IOException { - if (docTermOrds.advanceExact(doc)) { - collectedOrds.set(docTermOrds.ordValue()); - } - } - - @Override - public void setScorer(Scorable scorer) throws IOException {} - } -} diff --git a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsCollectorManager.java b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsCollectorManager.java new file mode 100644 index 000000000000..9cda473e4aa7 --- /dev/null +++ b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsCollectorManager.java @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search.join; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.OrdinalMap; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; +import org.apache.lucene.search.LeafCollector; +import org.apache.lucene.search.Scorable; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.util.LongBitSet; +import org.apache.lucene.util.LongValues; + +/** + * A {@link CollectorManager} that collects all ordinals from a specified field matching the query. + * Each per-slice collector tracks only segment-local ordinals (sized to the segment's value count), + * and {@link #reduce} remaps them to global ordinals via the {@link OrdinalMap}. + */ +final class GlobalOrdinalsCollectorManager + implements CollectorManager { + + private final String field; + private final OrdinalMap ordinalMap; + private final long valueCount; + + GlobalOrdinalsCollectorManager(String field, OrdinalMap ordinalMap, long valueCount) { + this.field = field; + this.ordinalMap = ordinalMap; + this.valueCount = valueCount; + } + + @Override + public SegmentLocalCollector newCollector() { + return new SegmentLocalCollector(); + } + + @Override + public LongBitSet reduce(Collection collectors) throws IOException { + LongBitSet result = new LongBitSet(valueCount); + // Group bitsets by segment ord. When allowSegmentPartitions=true splits a segment across + // multiple slices, each slice produces a separate collector for the same segment. Merging + // them here ensures we call getGlobalOrds() exactly once per segment. + Map mergedBySegment = new HashMap<>(); + for (SegmentLocalCollector collector : collectors) { + for (int i = 0; i < collector.segmentBits.size(); i++) { + int segOrd = collector.segmentOrds.get(i); + LongBitSet bits = collector.segmentBits.get(i); + mergedBySegment.merge( + segOrd, + bits, + (a, b) -> { + a.or(b); + return a; + }); + } + } + for (Map.Entry entry : mergedBySegment.entrySet()) { + LongBitSet segmentBits = entry.getValue(); + if (ordinalMap != null) { + LongValues segToGlobal = ordinalMap.getGlobalOrds(entry.getKey()); + for (long ord = segmentBits.nextSetBit(0); ord != -1; ) { + result.set(segToGlobal.get(ord)); + long next = ord + 1; + ord = next < segmentBits.length() ? segmentBits.nextSetBit(next) : -1; + } + } else { + result.or(segmentBits); + } + } + return result; + } + + final class SegmentLocalCollector implements Collector { + + final ArrayList segmentOrds = new ArrayList<>(); + final ArrayList segmentBits = new ArrayList<>(); + + @Override + public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { + SortedDocValues docTermOrds = DocValues.getSorted(context.reader(), field); + long segmentValueCount = docTermOrds.getValueCount(); + if (segmentValueCount == 0) { + return new LeafCollector() { + @Override + public void setScorer(Scorable scorer) {} + + @Override + public void collect(int doc) {} + }; + } + LongBitSet bits = new LongBitSet(segmentValueCount); + segmentOrds.add(context.ord); + segmentBits.add(bits); + return new LeafCollector() { + @Override + public void setScorer(Scorable scorer) {} + + @Override + public void collect(int doc) throws IOException { + if (docTermOrds.advanceExact(doc)) { + bits.set(docTermOrds.ordValue()); + } + } + }; + } + + @Override + public ScoreMode scoreMode() { + return ScoreMode.COMPLETE_NO_SCORES; + } + } +} diff --git a/lucene/join/src/java/org/apache/lucene/search/join/JoinUtil.java b/lucene/join/src/java/org/apache/lucene/search/join/JoinUtil.java index e30aab554d07..1991d733a71a 100644 --- a/lucene/join/src/java/org/apache/lucene/search/join/JoinUtil.java +++ b/lucene/join/src/java/org/apache/lucene/search/join/JoinUtil.java @@ -550,11 +550,10 @@ public static Query createJoinQuery( break; case None: if (min <= 1 && max == Integer.MAX_VALUE) { - GlobalOrdinalsCollector globalOrdinalsCollector = - new GlobalOrdinalsCollector(joinField, ordinalMap, valueCount); - searcher.search(rewrittenFromQuery, globalOrdinalsCollector); return new GlobalOrdinalsQuery( - globalOrdinalsCollector.getCollectorOrdinals(), + searcher.search( + rewrittenFromQuery, + new GlobalOrdinalsCollectorManager(joinField, ordinalMap, valueCount)), joinField, ordinalMap, rewrittenToQuery, diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java b/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java index 6ec736089ed5..005532c3270c 100644 --- a/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java +++ b/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java @@ -283,7 +283,7 @@ public void testSimpleOrdinalsJoin() throws Exception { doc.add(new SortedDocValuesField(joinField, new BytesRef("2"))); w.addDocument(doc); - IndexSearcher indexSearcher = new IndexSearcher(w.getReader()); + IndexSearcher indexSearcher = newSearcher(w.getReader()); w.close(); IndexReader r = indexSearcher.getIndexReader(); @@ -398,7 +398,7 @@ public void testOrdinalsJoinExplainNoMatches() throws Exception { w.addDocument(doc); IndexReader r = DirectoryReader.open(w); - IndexSearcher indexSearcher = new IndexSearcher(r); + IndexSearcher indexSearcher = newSearcher(r); SortedDocValues[] values = new SortedDocValues[r.leaves().size()]; for (int i = 0; i < values.length; i++) { LeafReader leafReader = r.leaves().get(i).reader();