Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,9 @@ New Features

Improvements
---------------------
(No changes)
* GITHUB#16292: Introduce TermGroupFacetCollectorManager to enable concurrent search with grouped
faceting, removing usages of the deprecated IndexSearcher#search(Query, Collector).
(Luca Cavanna)

Optimizations
---------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,11 @@
* Base class for computing grouped facets.
*
* @lucene.experimental
* @deprecated Use {@code TermGroupFacetCollectorManager} instead, which supports concurrent search.
* {@code GroupedFacetResult} and {@code GroupedFacetResult.FacetEntry} are now top-level
* classes.
*/
@Deprecated
public abstract class GroupFacetCollector extends SimpleCollector {

protected final String groupField;
Expand Down Expand Up @@ -64,7 +68,13 @@ protected GroupFacetCollector(String groupField, String facetField, BytesRef fac
* </code> then the facets are sorted lexicographically in ascending order.
* @return grouped facet results
* @throws IOException If I/O related errors occur during merging segment grouped facet counts.
* @deprecated Use {@link TermGroupFacetCollectorManager} instead. The {@code size}, {@code
* minCount}, and {@code orderByCount} parameters are now passed to its constructor, and
* merging happens automatically inside {@link
* org.apache.lucene.search.IndexSearcher#search(org.apache.lucene.search.Query,
* org.apache.lucene.search.CollectorManager)}.
*/
@Deprecated
public GroupedFacetResult mergeSegmentResults(int size, int minCount, boolean orderByCount)
throws IOException {
int totalCount = 0;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.grouping;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.NavigableSet;
import java.util.TreeSet;
import org.apache.lucene.util.BytesRef;

/**
* The grouped facet result. Containing grouped facet entries, total count and total missing count.
*
* @lucene.experimental
*/
public class GroupedFacetResult {

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is temporarily duplicated: same code is also in within the now deprecated GroupFacetCollector. The idea is to backport this change cleanly to 10.x, then remove the deprecated classes from main.


private final int maxSize;
private final NavigableSet<FacetEntry> facetEntries;
private final int totalMissingCount;
private final int totalCount;

private int currentMin;

public GroupedFacetResult(
int size, int minCount, boolean orderByCount, int totalCount, int totalMissingCount) {
this.facetEntries =
new TreeSet<>(
orderByCount
? (a, b) -> {
int cmp = b.count - a.count; // Highest count first!
if (cmp != 0) {
return cmp;
}
return a.value.compareTo(b.value);
}
: (a, b) -> a.value.compareTo(b.value));
this.totalMissingCount = totalMissingCount;
this.totalCount = totalCount;
maxSize = size;
currentMin = minCount;
}

public void addFacetCount(BytesRef facetValue, int count) {
if (count < currentMin) {
return;
}

FacetEntry facetEntry = new FacetEntry(facetValue, count);
if (facetEntries.size() == maxSize) {
if (facetEntries.higher(facetEntry) == null) {
return;
}
facetEntries.pollLast();
}
facetEntries.add(facetEntry);

if (facetEntries.size() == maxSize) {
currentMin = facetEntries.last().count;
}
}

/**
* Returns a list of facet entries to be rendered based on the specified offset and limit. The
* facet entries are retrieved from the facet entries collected during merging.
*
* @param offset The offset in the collected facet entries during merging
* @param limit The number of facets to return starting from the offset.
* @return a list of facet entries to be rendered based on the specified offset and limit
*/
public List<FacetEntry> getFacetEntries(int offset, int limit) {
if (offset >= facetEntries.size()) {
return Collections.emptyList();
}

List<FacetEntry> entries = new ArrayList<>(Math.min(limit, facetEntries.size() - offset));

int skipped = 0;
int included = 0;
for (FacetEntry facetEntry : facetEntries) {
if (skipped < offset) {
skipped++;
continue;
}
if (included++ >= limit) {
break;
}
entries.add(facetEntry);
}
return entries;
}

/**
* Returns the sum of all facet entries counts.
*
* @return the sum of all facet entries counts
*/
public int getTotalCount() {
return totalCount;
}

/**
* Returns the number of groups that didn't have a facet value.
*
* @return the number of groups that didn't have a facet value
*/
public int getTotalMissingCount() {
return totalMissingCount;
}

/** Represents a facet entry with a value and a count. */
public record FacetEntry(BytesRef value, int count) {

@Override
public String toString() {
return "FacetEntry{" + "value=" + value.utf8ToString() + ", count=" + count + '}';
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@
* indexed terms from DocValues.
*
* @lucene.experimental
* @deprecated Use {@code TermGroupFacetCollectorManager} instead, which supports concurrent search.
*/
@Deprecated
public abstract class TermGroupFacetCollector extends GroupFacetCollector {

final List<GroupedFacetHit> groupedFacetHits;
Expand All @@ -54,7 +56,9 @@ public abstract class TermGroupFacetCollector extends GroupFacetCollector {
* which should roughly match the total number of expected unique groups. Be aware that the
* heap usage is 4 bytes * initialSize.
* @return <code>TermGroupFacetCollector</code> implementation
* @deprecated Use {@code TermGroupFacetCollectorManager} instead.
*/
@Deprecated
public static TermGroupFacetCollector createTermGroupFacetCollector(
String groupField,
String facetField,
Expand Down
Loading
Loading