Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.calcite.udf.udaf;

import org.opensearch.sql.calcite.udf.UserDefinedAggFunction;

/**
* Logical marker UDAF for {@code DISTINCT_COUNT_APPROX}. Lets the PPL parser produce a RelNode that
* contains the operator without committing to a JVM execution path; backends are expected to push
* it down or rewrite it before execution:
*
* <ul>
* <li>OpenSearch V3 path: {@code OpenSearchExecutionEngine#registerOpenSearchFunctions} registers
* a real HyperLogLog++ implementation in {@code PPLFuncImpTable.aggExternalFunctionRegistry},
* which overrides this marker (external registry has lookup precedence in {@code
* getImplementation}). {@code AggregateAnalyzer} then translates the operator to OpenSearch
* cardinality DSL.
* <li>Unified-query / DataFusion / analytics-engine path: backend planner rewrites the RexOver to
* {@code APPROX_COUNT_DISTINCT} (Calcite stdop) before substrait emission; the DataFusion
* substrait reader consumes that natively.
* </ul>
*
* <p>This class deliberately throws on every method. Reaching a method body means a backend either
* failed to push down or did not register an adapter — that is a configuration bug, not a runtime
* fallback. {@code RelevanceQueryFunction.RelevanceQueryImplementor} (used by {@code match}, {@code
* match_phrase}, etc.) follows the same pattern for relevance search functions that have no JVM
* execution semantics.
*/
public class DistinctCountApproxLogicalAggFunction
implements UserDefinedAggFunction<DistinctCountApproxLogicalAggFunction.MarkerAccumulator> {

private static final String NOT_EXECUTABLE =
"DISTINCT_COUNT_APPROX logical marker reached Enumerable execution; "
+ "an engine-specific implementation must be registered or rewritten before execution.";

@Override
public MarkerAccumulator init() {
throw new UnsupportedOperationException(NOT_EXECUTABLE);
}

@Override
public MarkerAccumulator add(MarkerAccumulator acc, Object... values) {
throw new UnsupportedOperationException(NOT_EXECUTABLE);
}

@Override
public Object result(MarkerAccumulator accumulator) {
throw new UnsupportedOperationException(NOT_EXECUTABLE);
}

/** Placeholder accumulator. Never actually constructed because {@link #init()} throws. */
public static class MarkerAccumulator implements Accumulator {
@Override
public Object value(Object... argList) {
throw new UnsupportedOperationException(NOT_EXECUTABLE);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.apache.calcite.sql.type.SqlTypeTransforms;
import org.apache.calcite.sql.util.ReflectiveSqlOperatorTable;
import org.apache.calcite.util.BuiltInMethod;
import org.opensearch.sql.calcite.udf.udaf.DistinctCountApproxLogicalAggFunction;
import org.opensearch.sql.calcite.udf.udaf.FirstAggFunction;
import org.opensearch.sql.calcite.udf.udaf.LastAggFunction;
import org.opensearch.sql.calcite.udf.udaf.ListAggFunction;
Expand Down Expand Up @@ -508,6 +509,22 @@ public class PPLBuiltinOperators extends ReflectiveSqlOperatorTable {
PPLReturnTypes.STRING_ARRAY,
PPLOperandTypes.ANY_SCALAR_OPTIONAL_INTEGER);

/**
* Logical marker for {@code DISTINCT_COUNT_APPROX} (also exposed as {@code dc} and {@code
* distinct_count} aliases). PPL parser uses this to produce a RelNode; backends override or
* rewrite it before execution. {@code OpenSearchExecutionEngine} registers a real HyperLogLog++
* implementation in the external registry of {@code PPLFuncImpTable}, which has lookup precedence
* and serves the OpenSearch V3 path. Other backends (DataFusion / analytics-engine) rewrite the
* operator on their own. Operand metadata is {@code null} to match the existing external
* registration's permissive policy and avoid introducing new type rejections.
*/
public static final SqlAggFunction DISTINCT_COUNT_APPROX =
createUserDefinedAggFunction(
DistinctCountApproxLogicalAggFunction.class,
"DISTINCT_COUNT_APPROX",
ReturnTypes.BIGINT_FORCE_NULLABLE,
null);

public static final SqlOperator ENHANCED_COALESCE =
new EnhancedCoalesceFunction().toUDF("COALESCE");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
import static org.opensearch.sql.expression.function.BuiltinFunctionName.DAY_OF_WEEK;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.DAY_OF_YEAR;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.DEGREES;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.DISTINCT_COUNT_APPROX;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.DIVIDE;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.DIVIDEFUNCTION;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.DUR2SEC;
Expand Down Expand Up @@ -1426,6 +1427,13 @@ void populate() {
registerOperator(INTERNAL_PATTERN, PPLBuiltinOperators.INTERNAL_PATTERN);
registerOperator(LIST, PPLBuiltinOperators.LIST);
registerOperator(VALUES, PPLBuiltinOperators.VALUES);
// Logical marker so PPL parser succeeds on dc()/distinct_count()/distinct_count_approx()
// regardless of which execution path the query takes. OpenSearchExecutionEngine registers
// a real HyperLogLog++ implementation in aggExternalFunctionRegistry which overrides this
// marker via the external-first lookup precedence in getImplementation(). Other backends
// (DataFusion / analytics-engine) rewrite the operator before substrait emission and never
// execute the marker.
registerOperator(DISTINCT_COUNT_APPROX, PPLBuiltinOperators.DISTINCT_COUNT_APPROX);

register(
AVG,
Expand Down
Loading