diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 42a6e8073576..42dca09d7fe3 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -287,7 +287,9 @@ New Features Improvements --------------------- -(No changes) +* GITHUB#16292: Introduce TermGroupFacetCollectorManager to enable concurrent search with grouped + faceting, removing usages of the deprecated IndexSearcher#search(Query, Collector). + (Luca Cavanna) Optimizations --------------------- diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/GroupFacetCollector.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/GroupFacetCollector.java index c2f613f3631e..fcad0a701931 100644 --- a/lucene/grouping/src/java/org/apache/lucene/search/grouping/GroupFacetCollector.java +++ b/lucene/grouping/src/java/org/apache/lucene/search/grouping/GroupFacetCollector.java @@ -33,7 +33,11 @@ * Base class for computing grouped facets. * * @lucene.experimental + * @deprecated Use {@code TermGroupFacetCollectorManager} instead, which supports concurrent search. + * {@code GroupedFacetResult} and {@code GroupedFacetResult.FacetEntry} are now top-level + * classes. */ +@Deprecated public abstract class GroupFacetCollector extends SimpleCollector { protected final String groupField; @@ -64,7 +68,13 @@ protected GroupFacetCollector(String groupField, String facetField, BytesRef fac * then the facets are sorted lexicographically in ascending order. * @return grouped facet results * @throws IOException If I/O related errors occur during merging segment grouped facet counts. + * @deprecated Use {@link TermGroupFacetCollectorManager} instead. The {@code size}, {@code + * minCount}, and {@code orderByCount} parameters are now passed to its constructor, and + * merging happens automatically inside {@link + * org.apache.lucene.search.IndexSearcher#search(org.apache.lucene.search.Query, + * org.apache.lucene.search.CollectorManager)}. */ + @Deprecated public GroupedFacetResult mergeSegmentResults(int size, int minCount, boolean orderByCount) throws IOException { int totalCount = 0; diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/GroupedFacetResult.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/GroupedFacetResult.java new file mode 100644 index 000000000000..29d110495444 --- /dev/null +++ b/lucene/grouping/src/java/org/apache/lucene/search/grouping/GroupedFacetResult.java @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search.grouping; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.NavigableSet; +import java.util.TreeSet; +import org.apache.lucene.util.BytesRef; + +/** + * The grouped facet result. Containing grouped facet entries, total count and total missing count. + * + * @lucene.experimental + */ +public class GroupedFacetResult { + + private final int maxSize; + private final NavigableSet facetEntries; + private final int totalMissingCount; + private final int totalCount; + + private int currentMin; + + public GroupedFacetResult( + int size, int minCount, boolean orderByCount, int totalCount, int totalMissingCount) { + this.facetEntries = + new TreeSet<>( + orderByCount + ? (a, b) -> { + int cmp = b.count - a.count; // Highest count first! + if (cmp != 0) { + return cmp; + } + return a.value.compareTo(b.value); + } + : (a, b) -> a.value.compareTo(b.value)); + this.totalMissingCount = totalMissingCount; + this.totalCount = totalCount; + maxSize = size; + currentMin = minCount; + } + + public void addFacetCount(BytesRef facetValue, int count) { + if (count < currentMin) { + return; + } + + FacetEntry facetEntry = new FacetEntry(facetValue, count); + if (facetEntries.size() == maxSize) { + if (facetEntries.higher(facetEntry) == null) { + return; + } + facetEntries.pollLast(); + } + facetEntries.add(facetEntry); + + if (facetEntries.size() == maxSize) { + currentMin = facetEntries.last().count; + } + } + + /** + * Returns a list of facet entries to be rendered based on the specified offset and limit. The + * facet entries are retrieved from the facet entries collected during merging. + * + * @param offset The offset in the collected facet entries during merging + * @param limit The number of facets to return starting from the offset. + * @return a list of facet entries to be rendered based on the specified offset and limit + */ + public List getFacetEntries(int offset, int limit) { + if (offset >= facetEntries.size()) { + return Collections.emptyList(); + } + + List entries = new ArrayList<>(Math.min(limit, facetEntries.size() - offset)); + + int skipped = 0; + int included = 0; + for (FacetEntry facetEntry : facetEntries) { + if (skipped < offset) { + skipped++; + continue; + } + if (included++ >= limit) { + break; + } + entries.add(facetEntry); + } + return entries; + } + + /** + * Returns the sum of all facet entries counts. + * + * @return the sum of all facet entries counts + */ + public int getTotalCount() { + return totalCount; + } + + /** + * Returns the number of groups that didn't have a facet value. + * + * @return the number of groups that didn't have a facet value + */ + public int getTotalMissingCount() { + return totalMissingCount; + } + + /** Represents a facet entry with a value and a count. */ + public record FacetEntry(BytesRef value, int count) { + + @Override + public String toString() { + return "FacetEntry{" + "value=" + value.utf8ToString() + ", count=" + count + '}'; + } + } +} diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/TermGroupFacetCollector.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/TermGroupFacetCollector.java index 33052db43905..6a4831b59c07 100644 --- a/lucene/grouping/src/java/org/apache/lucene/search/grouping/TermGroupFacetCollector.java +++ b/lucene/grouping/src/java/org/apache/lucene/search/grouping/TermGroupFacetCollector.java @@ -34,7 +34,9 @@ * indexed terms from DocValues. * * @lucene.experimental + * @deprecated Use {@code TermGroupFacetCollectorManager} instead, which supports concurrent search. */ +@Deprecated public abstract class TermGroupFacetCollector extends GroupFacetCollector { final List groupedFacetHits; @@ -54,7 +56,9 @@ public abstract class TermGroupFacetCollector extends GroupFacetCollector { * which should roughly match the total number of expected unique groups. Be aware that the * heap usage is 4 bytes * initialSize. * @return TermGroupFacetCollector implementation + * @deprecated Use {@code TermGroupFacetCollectorManager} instead. */ + @Deprecated public static TermGroupFacetCollector createTermGroupFacetCollector( String groupField, String facetField, diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/TermGroupFacetCollectorManager.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/TermGroupFacetCollectorManager.java new file mode 100644 index 000000000000..e35c9f2e13cf --- /dev/null +++ b/lucene/grouping/src/java/org/apache/lucene/search/grouping/TermGroupFacetCollectorManager.java @@ -0,0 +1,369 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search.grouping; + +import java.io.IOException; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.TreeSet; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.internal.hppc.LongHashSet; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; +import org.apache.lucene.search.LeafCollector; +import org.apache.lucene.search.Scorable; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefBuilder; +import org.apache.lucene.util.UnicodeUtil; + +/** + * A {@link CollectorManager} for computing grouped facet counts based on term DocValues. + * + *

For each facet value, the result counts the number of distinct group values that have at least + * one document with that facet value. Groups spanning multiple search slices are counted exactly + * once (cross-slice deduplication is handled in {@link #reduce}). + * + * @lucene.experimental + */ +public class TermGroupFacetCollectorManager + implements CollectorManager { + + private final String groupField; + private final String facetField; + private final boolean facetFieldMultivalued; + private final BytesRef facetPrefix; + private final int size; + private final int minCount; + private final boolean orderByCount; + + /** + * Creates a new TermGroupFacetCollectorManager. + * + * @param groupField the group field + * @param facetField the facet field + * @param facetFieldMultivalued whether the facet field has multiple values per document + * @param facetPrefix only include facet entries with this prefix; may be null + * @param size the maximum number of facet entries to include in the result (offset + limit) + * @param minCount minimum count for a facet entry to be included + * @param orderByCount whether to sort facet entries by count descending (vs. lexicographic) + */ + public TermGroupFacetCollectorManager( + String groupField, + String facetField, + boolean facetFieldMultivalued, + BytesRef facetPrefix, + int size, + int minCount, + boolean orderByCount) { + this.groupField = groupField; + this.facetField = facetField; + this.facetFieldMultivalued = facetFieldMultivalued; + this.facetPrefix = facetPrefix; + this.size = size; + this.minCount = minCount; + this.orderByCount = orderByCount; + } + + @Override + public FacetCollector newCollector() { + return new FacetCollector( + groupField, facetField, facetFieldMultivalued, facetPrefix, minCount == 0); + } + + @Override + public GroupedFacetResult reduce(Collection collectors) throws IOException { + // Globally deduplicate (group, facet) pairs across all per-slice collectors. + Set globalPairs = new HashSet<>(); + for (FacetCollector collector : collectors) { + globalPairs.addAll(collector.groupFacetPairs); + } + + Map facetCounts = new HashMap<>(); + int totalCount = 0; + int missingCount = 0; + for (FacetCollector.GroupFacetPair pair : globalPairs) { + if (pair.facetValue() == null) { + missingCount++; + } else { + totalCount++; + facetCounts.merge(pair.facetValue(), 1, Integer::sum); + } + } + + // Terms must be presented to addFacetCount in ascending byte order: GroupedFacetResult's + // internal currentMin optimization assumes sorted input (as mergeSegmentResults guarantees + // via its priority queue). TreeSet gives us that order for free. + Set sortedTerms = new TreeSet<>(facetCounts.keySet()); + if (minCount == 0) { + // Union in-range terms from all collectors so that zero-count terms are included. + for (FacetCollector collector : collectors) { + sortedTerms.addAll(collector.allFacetTermsInRange); + } + } + + GroupedFacetResult result = + new GroupedFacetResult(size, minCount, orderByCount, totalCount, missingCount); + for (BytesRef term : sortedTerms) { + result.addFacetCount(term, facetCounts.getOrDefault(term, 0)); + } + return result; + } + + /** + * Per-slice collector. During collection, (groupOrd, facetOrd) pairs are tracked as packed longs + * to avoid {@link BytesRef} allocations in the hot path. Ordinals are translated to term values + * once per segment at {@code finish()} time. Cross-slice deduplication is performed by {@link + * TermGroupFacetCollectorManager#reduce}. + */ + static class FacetCollector implements Collector { + + private final String groupField; + private final String facetField; + private final boolean facetFieldMultivalued; + private final BytesRef facetPrefix; + private final boolean needAllFacetTerms; + + // Accumulated across all segments processed by this collector's slice. + final Set groupFacetPairs = new HashSet<>(); + // Populated only when needAllFacetTerms=true (minCount==0), for zero-count term support. + final Set allFacetTermsInRange = new HashSet<>(); + + FacetCollector( + String groupField, + String facetField, + boolean facetFieldMultivalued, + BytesRef facetPrefix, + boolean needAllFacetTerms) { + this.groupField = groupField; + this.facetField = facetField; + this.facetFieldMultivalued = facetFieldMultivalued; + this.facetPrefix = facetPrefix; + this.needAllFacetTerms = needAllFacetTerms; + } + + @Override + public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { + return facetFieldMultivalued ? new MVLeafCollector(context) : new SVLeafCollector(context); + } + + @Override + public ScoreMode scoreMode() { + return ScoreMode.COMPLETE_NO_SCORES; + } + + // Packs (groupOrd, facetOrd) into a single long. Adding 1 to each maps the missing sentinel + // (-1) to 0, making the encoding unambiguous for non-negative ordinal values. + private static long encodePair(int groupOrd, int facetOrd) { + return ((long) (groupOrd + 1) << 32) | (facetOrd + 1); + } + + // Single-valued facet field implementation. + private class SVLeafCollector implements LeafCollector { + + private final SortedDocValues groupDV; + private final SortedDocValues facetDV; + private final int startFacetOrd; // -1 when no prefix, to allow missing facet value through + private final int endFacetOrd; + private final LongHashSet segmentPairs = new LongHashSet(); + + SVLeafCollector(LeafReaderContext context) throws IOException { + groupDV = DocValues.getSorted(context.reader(), groupField); + facetDV = DocValues.getSorted(context.reader(), facetField); + int sf, ef; + if (facetPrefix != null) { + sf = facetDV.lookupTerm(facetPrefix); + if (sf < 0) { + sf = -sf - 1; + } + BytesRefBuilder end = new BytesRefBuilder(); + end.append(facetPrefix); + end.append(UnicodeUtil.BIG_TERM); + ef = facetDV.lookupTerm(end.get()); + assert ef < 0; + ef = -ef - 1; + } else { + sf = -1; + ef = facetDV.getValueCount(); + } + startFacetOrd = sf; + endFacetOrd = ef; + } + + @Override + public void setScorer(Scorable scorer) {} + + @Override + public void collect(int doc) throws IOException { + if (doc > facetDV.docID()) { + facetDV.advance(doc); + } + int facetOrd = doc == facetDV.docID() ? facetDV.ordValue() : -1; + if (facetOrd < startFacetOrd || facetOrd >= endFacetOrd) { + return; + } + if (doc > groupDV.docID()) { + groupDV.advance(doc); + } + int groupOrd = doc == groupDV.docID() ? groupDV.ordValue() : -1; + segmentPairs.add(encodePair(groupOrd, facetOrd)); + } + + @Override + public void finish() throws IOException { + for (var cursor : segmentPairs) { + int groupOrd = (int) (cursor.value >>> 32) - 1; + int facetOrd = (int) (cursor.value & 0xFFFFFFFFL) - 1; + BytesRef groupValue = + groupOrd < 0 ? null : BytesRef.deepCopyOf(groupDV.lookupOrd(groupOrd)); + BytesRef facetValue = + facetOrd < 0 ? null : BytesRef.deepCopyOf(facetDV.lookupOrd(facetOrd)); + groupFacetPairs.add(new GroupFacetPair(groupValue, facetValue)); + } + segmentPairs.clear(); + if (needAllFacetTerms) { + int start = startFacetOrd < 0 ? 0 : startFacetOrd; + if (start < endFacetOrd) { + TermsEnum tenum = facetDV.termsEnum(); + tenum.seekExact(start); + for (int i = start; i < endFacetOrd; i++) { + BytesRef term = tenum.term(); + if (!allFacetTermsInRange.contains(term)) { + allFacetTermsInRange.add(BytesRef.deepCopyOf(term)); + } + if (i + 1 < endFacetOrd) { + tenum.next(); + } + } + } + } + } + } + + // Multi-valued facet field implementation. + private class MVLeafCollector implements LeafCollector { + + private final SortedDocValues groupDV; + private final SortedSetDocValues facetDV; + private final int facetFieldNumTerms; + private final int startFacetOrd; + private final int endFacetOrd; + private final LongHashSet segmentPairs = new LongHashSet(); + + MVLeafCollector(LeafReaderContext context) throws IOException { + groupDV = DocValues.getSorted(context.reader(), groupField); + facetDV = DocValues.getSortedSet(context.reader(), facetField); + facetFieldNumTerms = (int) facetDV.getValueCount(); + int sf, ef; + if (facetPrefix != null) { + if (facetFieldNumTerms == 0) { + sf = 0; + ef = 0; + } else { + TermsEnum tenum = facetDV.termsEnum(); + TermsEnum.SeekStatus status = tenum.seekCeil(facetPrefix); + if (status == TermsEnum.SeekStatus.END) { + sf = 0; + ef = 0; + } else { + sf = (int) tenum.ord(); + BytesRefBuilder end = new BytesRefBuilder(); + end.append(facetPrefix); + end.append(UnicodeUtil.BIG_TERM); + status = tenum.seekCeil(end.get()); + ef = (status == TermsEnum.SeekStatus.END) ? facetFieldNumTerms : (int) tenum.ord(); + } + } + } else { + sf = 0; + ef = facetFieldNumTerms; + } + startFacetOrd = sf; + endFacetOrd = ef; + } + + @Override + public void setScorer(Scorable scorer) {} + + @Override + public void collect(int doc) throws IOException { + if (doc > facetDV.docID()) { + facetDV.advance(doc); + } + boolean hasFacetDocValues = doc == facetDV.docID(); + if (hasFacetDocValues) { + int groupOrd = Integer.MIN_VALUE; // lazily fetched on first in-range facet ord + for (int i = 0; i < facetDV.docValueCount(); i++) { + int facetOrd = (int) facetDV.nextOrd(); + if (facetOrd >= startFacetOrd && facetOrd < endFacetOrd) { + if (groupOrd == Integer.MIN_VALUE) { + if (doc > groupDV.docID()) { + groupDV.advance(doc); + } + groupOrd = doc == groupDV.docID() ? groupDV.ordValue() : -1; + } + segmentPairs.add(encodePair(groupOrd, facetOrd)); + } + } + } else if (facetPrefix == null) { + // No facet values at all and no prefix: count as missing (facetOrd = -1). + if (doc > groupDV.docID()) { + groupDV.advance(doc); + } + int groupOrd = doc == groupDV.docID() ? groupDV.ordValue() : -1; + segmentPairs.add(encodePair(groupOrd, -1)); + } + } + + @Override + public void finish() throws IOException { + for (var cursor : segmentPairs) { + int groupOrd = (int) (cursor.value >>> 32) - 1; + int facetOrd = (int) (cursor.value & 0xFFFFFFFFL) - 1; + BytesRef groupValue = + groupOrd < 0 ? null : BytesRef.deepCopyOf(groupDV.lookupOrd(groupOrd)); + BytesRef facetValue = + facetOrd < 0 ? null : BytesRef.deepCopyOf(facetDV.lookupOrd((long) facetOrd)); + groupFacetPairs.add(new GroupFacetPair(groupValue, facetValue)); + } + segmentPairs.clear(); + if (needAllFacetTerms && startFacetOrd < endFacetOrd) { + TermsEnum tenum = facetDV.termsEnum(); + tenum.seekExact(startFacetOrd); + for (int i = startFacetOrd; i < endFacetOrd; i++) { + BytesRef term = tenum.term(); + if (!allFacetTermsInRange.contains(term)) { + allFacetTermsInRange.add(BytesRef.deepCopyOf(term)); + } + if (i + 1 < endFacetOrd) { + tenum.next(); + } + } + } + } + } + + record GroupFacetPair(BytesRef groupValue, BytesRef facetValue) {} + } +} diff --git a/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGroupFacetCollector.java b/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGroupFacetCollector.java index 5e8d1ca9cd8f..1a0d4df56822 100644 --- a/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGroupFacetCollector.java +++ b/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGroupFacetCollector.java @@ -99,21 +99,24 @@ public void testSimple() throws Exception { IndexSearcher indexSearcher = newSearcher(w.getReader()); - List entries; - GroupFacetCollector groupedAirportFacetCollector; - TermGroupFacetCollector.GroupedFacetResult airportResult; + List entries; + GroupedFacetResult airportResult; for (int limit : new int[] {2, 10, 100, Integer.MAX_VALUE}) { // any of these limits is plenty for the data we have - groupedAirportFacetCollector = - createRandomCollector( - useDv ? "hotel_dv" : "hotel", useDv ? "airport_dv" : "airport", null, false); - indexSearcher.search(MatchAllDocsQuery.INSTANCE, groupedAirportFacetCollector); int maxOffset = 5; airportResult = - groupedAirportFacetCollector.mergeSegmentResults( - Integer.MAX_VALUE == limit ? limit : maxOffset + limit, 0, false); + indexSearcher.search( + MatchAllDocsQuery.INSTANCE, + createRandomCollectorManager( + useDv ? "hotel_dv" : "hotel", + useDv ? "airport_dv" : "airport", + null, + false, + Integer.MAX_VALUE == limit ? limit : maxOffset + limit, + 0, + false)); assertEquals(3, airportResult.getTotalCount()); assertEquals(0, airportResult.getTotalMissingCount()); @@ -134,12 +137,17 @@ public void testSimple() throws Exception { assertEquals(1, entries.get(0).count()); } - GroupFacetCollector groupedDurationFacetCollector = - createRandomCollector( - useDv ? "hotel_dv" : "hotel", useDv ? "duration_dv" : "duration", null, false); - indexSearcher.search(MatchAllDocsQuery.INSTANCE, groupedDurationFacetCollector); - TermGroupFacetCollector.GroupedFacetResult durationResult = - groupedDurationFacetCollector.mergeSegmentResults(10, 0, false); + GroupedFacetResult durationResult = + indexSearcher.search( + MatchAllDocsQuery.INSTANCE, + createRandomCollectorManager( + useDv ? "hotel_dv" : "hotel", + useDv ? "duration_dv" : "duration", + null, + false, + 10, + 0, + false)); assertEquals(4, durationResult.getTotalCount()); assertEquals(0, durationResult.getTotalMissingCount()); @@ -183,11 +191,17 @@ public void testSimple() throws Exception { indexSearcher.getIndexReader().close(); indexSearcher = newSearcher(w.getReader()); - groupedAirportFacetCollector = - createRandomCollector( - useDv ? "hotel_dv" : "hotel", useDv ? "airport_dv" : "airport", null, !useDv); - indexSearcher.search(MatchAllDocsQuery.INSTANCE, groupedAirportFacetCollector); - airportResult = groupedAirportFacetCollector.mergeSegmentResults(3, 0, true); + airportResult = + indexSearcher.search( + MatchAllDocsQuery.INSTANCE, + createRandomCollectorManager( + useDv ? "hotel_dv" : "hotel", + useDv ? "airport_dv" : "airport", + null, + !useDv, + 3, + 0, + true)); entries = airportResult.getFacetEntries(1, 2); assertEquals(2, entries.size()); if (useDv) { @@ -206,11 +220,17 @@ public void testSimple() throws Exception { assertEquals(1, entries.get(1).count()); } - groupedDurationFacetCollector = - createRandomCollector( - useDv ? "hotel_dv" : "hotel", useDv ? "duration_dv" : "duration", null, false); - indexSearcher.search(MatchAllDocsQuery.INSTANCE, groupedDurationFacetCollector); - durationResult = groupedDurationFacetCollector.mergeSegmentResults(10, 2, true); + durationResult = + indexSearcher.search( + MatchAllDocsQuery.INSTANCE, + createRandomCollectorManager( + useDv ? "hotel_dv" : "hotel", + useDv ? "duration_dv" : "duration", + null, + false, + 10, + 2, + true)); assertEquals(5, durationResult.getTotalCount()); assertEquals(0, durationResult.getTotalMissingCount()); @@ -235,11 +255,17 @@ public void testSimple() throws Exception { indexSearcher.getIndexReader().close(); indexSearcher = newSearcher(w.getReader()); - groupedAirportFacetCollector = - createRandomCollector( - useDv ? "hotel_dv" : "hotel", useDv ? "airport_dv" : "airport", null, false); - indexSearcher.search(MatchAllDocsQuery.INSTANCE, groupedAirportFacetCollector); - airportResult = groupedAirportFacetCollector.mergeSegmentResults(10, 0, false); + airportResult = + indexSearcher.search( + MatchAllDocsQuery.INSTANCE, + createRandomCollectorManager( + useDv ? "hotel_dv" : "hotel", + useDv ? "airport_dv" : "airport", + null, + false, + 10, + 0, + false)); entries = airportResult.getFacetEntries(0, 10); if (useDv) { assertEquals(8, airportResult.getTotalCount()); @@ -265,11 +291,17 @@ public void testSimple() throws Exception { assertEquals(2, entries.get(2).count()); } - groupedDurationFacetCollector = - createRandomCollector( - useDv ? "hotel_dv" : "hotel", useDv ? "duration_dv" : "duration", "1", false); - indexSearcher.search(MatchAllDocsQuery.INSTANCE, groupedDurationFacetCollector); - durationResult = groupedDurationFacetCollector.mergeSegmentResults(10, 0, true); + durationResult = + indexSearcher.search( + MatchAllDocsQuery.INSTANCE, + createRandomCollectorManager( + useDv ? "hotel_dv" : "hotel", + useDv ? "duration_dv" : "duration", + "1", + false, + 10, + 0, + true)); assertEquals(5, durationResult.getTotalCount()); assertEquals(0, durationResult.getTotalMissingCount()); @@ -355,15 +387,14 @@ public void testMVGroupedFacetingWithDeletes() throws Exception { w.close(); IndexSearcher indexSearcher = newSearcher(DirectoryReader.open(dir)); - GroupFacetCollector groupedAirportFacetCollector = - createRandomCollector(groupField + "_dv", "airport", null, true); - indexSearcher.search(MatchAllDocsQuery.INSTANCE, groupedAirportFacetCollector); - TermGroupFacetCollector.GroupedFacetResult airportResult = - groupedAirportFacetCollector.mergeSegmentResults(10, 0, false); + GroupedFacetResult airportResult = + indexSearcher.search( + MatchAllDocsQuery.INSTANCE, + createRandomCollectorManager(groupField + "_dv", "airport", null, true, 10, 0, false)); assertEquals(3, airportResult.getTotalCount()); assertEquals(1, airportResult.getTotalMissingCount()); - List entries = airportResult.getFacetEntries(0, 10); + List entries = airportResult.getFacetEntries(0, 10); assertEquals(2, entries.size()); assertEquals("ams", entries.get(0).value().utf8ToString()); assertEquals(2, entries.get(0).count()); @@ -418,22 +449,28 @@ public void testRandom() throws Exception { } } - GroupedFacetResult expectedFacetResult = + ExpectedFacetResult expectedFacetResult = createExpectedFacetResult( searchTerm, context, offset, limit, minCount, orderByCount, facetPrefix); - GroupFacetCollector groupFacetCollector = - createRandomCollector("group", "facet", facetPrefix, multipleFacetsPerDocument); - searcher.search(new TermQuery(new Term("content", searchTerm)), groupFacetCollector); - TermGroupFacetCollector.GroupedFacetResult actualFacetResult = - groupFacetCollector.mergeSegmentResults(size, minCount, orderByCount); - - List expectedFacetEntries = + GroupedFacetResult actualFacetResult = + searcher.search( + new TermQuery(new Term("content", searchTerm)), + createRandomCollectorManager( + "group", + "facet", + facetPrefix, + multipleFacetsPerDocument, + size, + minCount, + orderByCount)); + + List expectedFacetEntries = expectedFacetResult.facetEntries(); - List actualFacetEntries = + List actualFacetEntries = actualFacetResult.getFacetEntries(offset, limit); if (VERBOSE) { - System.out.println("Collector: " + groupFacetCollector.getClass().getSimpleName()); + System.out.println("Collector: TermGroupFacetCollectorManager"); System.out.println("Num group: " + context.numGroups); System.out.println("Num doc: " + context.numDocs); System.out.println("Index iter: " + indexIter); @@ -451,7 +488,7 @@ public void testRandom() throws Exception { System.out.println("Total count " + expectedFacetResult.totalCount()); System.out.println("Total missing count " + expectedFacetResult.totalMissingCount()); int counter = 0; - for (TermGroupFacetCollector.FacetEntry expectedFacetEntry : expectedFacetEntries) { + for (GroupedFacetResult.FacetEntry expectedFacetEntry : expectedFacetEntries) { System.out.printf( Locale.ROOT, "%d. Expected facet value %s with count %d%n", @@ -464,7 +501,7 @@ public void testRandom() throws Exception { System.out.println("Total count " + actualFacetResult.getTotalCount()); System.out.println("Total missing count " + actualFacetResult.getTotalMissingCount()); counter = 0; - for (TermGroupFacetCollector.FacetEntry actualFacetEntry : actualFacetEntries) { + for (GroupedFacetResult.FacetEntry actualFacetEntry : actualFacetEntries) { System.out.printf( Locale.ROOT, "%d. Actual facet value %s with count %d%n", @@ -481,8 +518,8 @@ public void testRandom() throws Exception { expectedFacetResult.totalMissingCount(), actualFacetResult.getTotalMissingCount()); assertEquals(expectedFacetEntries.size(), actualFacetEntries.size()); for (int i = 0; i < expectedFacetEntries.size(); i++) { - TermGroupFacetCollector.FacetEntry expectedFacetEntry = expectedFacetEntries.get(i); - TermGroupFacetCollector.FacetEntry actualFacetEntry = actualFacetEntries.get(i); + GroupedFacetResult.FacetEntry expectedFacetEntry = expectedFacetEntries.get(i); + GroupedFacetResult.FacetEntry actualFacetEntry = actualFacetEntries.get(i); assertEquals( "i=" + i @@ -678,7 +715,7 @@ private IndexContext createIndexContext(boolean multipleFacetValuesPerDocument) uniqueFacetValues); } - private GroupedFacetResult createExpectedFacetResult( + private ExpectedFacetResult createExpectedFacetResult( String searchTerm, IndexContext context, int offset, @@ -705,7 +742,7 @@ private GroupedFacetResult createExpectedFacetResult( facetValues = context.facetValues; } - List entries = new ArrayList<>(facetGroups.size()); + List entries = new ArrayList<>(facetGroups.size()); // also includes facets with count 0 for (String facetValue : facetValues) { if (facetValue == null) { @@ -715,7 +752,7 @@ private GroupedFacetResult createExpectedFacetResult( Set groups = facetGroups.get(facetValue); int count = groups != null ? groups.size() : 0; if (count >= minCount) { - entries.add(new TermGroupFacetCollector.FacetEntry(new BytesRef(facetValue), count)); + entries.add(new GroupedFacetResult.FacetEntry(new BytesRef(facetValue), count)); } totalCount += count; } @@ -740,7 +777,7 @@ private GroupedFacetResult createExpectedFacetResult( }); int endOffset = offset + limit; - List entriesResult; + List entriesResult; if (offset >= entries.size()) { entriesResult = Collections.emptyList(); } else if (endOffset >= entries.size()) { @@ -748,14 +785,26 @@ private GroupedFacetResult createExpectedFacetResult( } else { entriesResult = entries.subList(offset, endOffset); } - return new GroupedFacetResult(totalCount, totalMissCount, entriesResult); + return new ExpectedFacetResult(totalCount, totalMissCount, entriesResult); } - private GroupFacetCollector createRandomCollector( - String groupField, String facetField, String facetPrefix, boolean multipleFacetsPerDocument) { + private TermGroupFacetCollectorManager createRandomCollectorManager( + String groupField, + String facetField, + String facetPrefix, + boolean multipleFacetsPerDocument, + int size, + int minCount, + boolean orderByCount) { BytesRef facetPrefixBR = facetPrefix == null ? null : new BytesRef(facetPrefix); - return TermGroupFacetCollector.createTermGroupFacetCollector( - groupField, facetField, multipleFacetsPerDocument, facetPrefixBR, random().nextInt(1024)); + return new TermGroupFacetCollectorManager( + groupField, + facetField, + multipleFacetsPerDocument, + facetPrefixBR, + size, + minCount, + orderByCount); } private String getFromSet(Set set, int index) { @@ -800,8 +849,6 @@ public IndexContext( } } - private record GroupedFacetResult( - int totalCount, - int totalMissingCount, - List facetEntries) {} + private record ExpectedFacetResult( + int totalCount, int totalMissingCount, List facetEntries) {} }