diff --git a/core/src/main/java/org/opensearch/sql/calcite/udf/udaf/DistinctCountApproxLogicalAggFunction.java b/core/src/main/java/org/opensearch/sql/calcite/udf/udaf/DistinctCountApproxLogicalAggFunction.java new file mode 100644 index 00000000000..4a18fe35a5d --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/calcite/udf/udaf/DistinctCountApproxLogicalAggFunction.java @@ -0,0 +1,61 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.udf.udaf; + +import org.opensearch.sql.calcite.udf.UserDefinedAggFunction; + +/** + * Logical marker UDAF for {@code DISTINCT_COUNT_APPROX}. Lets the PPL parser produce a RelNode that + * contains the operator without committing to a JVM execution path; backends are expected to push + * it down or rewrite it before execution: + * + * + * + *

This class deliberately throws on every method. Reaching a method body means a backend either + * failed to push down or did not register an adapter — that is a configuration bug, not a runtime + * fallback. {@code RelevanceQueryFunction.RelevanceQueryImplementor} (used by {@code match}, {@code + * match_phrase}, etc.) follows the same pattern for relevance search functions that have no JVM + * execution semantics. + */ +public class DistinctCountApproxLogicalAggFunction + implements UserDefinedAggFunction { + + private static final String NOT_EXECUTABLE = + "DISTINCT_COUNT_APPROX logical marker reached Enumerable execution; " + + "an engine-specific implementation must be registered or rewritten before execution."; + + @Override + public MarkerAccumulator init() { + throw new UnsupportedOperationException(NOT_EXECUTABLE); + } + + @Override + public MarkerAccumulator add(MarkerAccumulator acc, Object... values) { + throw new UnsupportedOperationException(NOT_EXECUTABLE); + } + + @Override + public Object result(MarkerAccumulator accumulator) { + throw new UnsupportedOperationException(NOT_EXECUTABLE); + } + + /** Placeholder accumulator. Never actually constructed because {@link #init()} throws. */ + public static class MarkerAccumulator implements Accumulator { + @Override + public Object value(Object... argList) { + throw new UnsupportedOperationException(NOT_EXECUTABLE); + } + } +} diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java index 4b7ee41f016..60172e70c84 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java @@ -29,6 +29,7 @@ import org.apache.calcite.sql.type.SqlTypeTransforms; import org.apache.calcite.sql.util.ReflectiveSqlOperatorTable; import org.apache.calcite.util.BuiltInMethod; +import org.opensearch.sql.calcite.udf.udaf.DistinctCountApproxLogicalAggFunction; import org.opensearch.sql.calcite.udf.udaf.FirstAggFunction; import org.opensearch.sql.calcite.udf.udaf.LastAggFunction; import org.opensearch.sql.calcite.udf.udaf.ListAggFunction; @@ -508,6 +509,22 @@ public class PPLBuiltinOperators extends ReflectiveSqlOperatorTable { PPLReturnTypes.STRING_ARRAY, PPLOperandTypes.ANY_SCALAR_OPTIONAL_INTEGER); + /** + * Logical marker for {@code DISTINCT_COUNT_APPROX} (also exposed as {@code dc} and {@code + * distinct_count} aliases). PPL parser uses this to produce a RelNode; backends override or + * rewrite it before execution. {@code OpenSearchExecutionEngine} registers a real HyperLogLog++ + * implementation in the external registry of {@code PPLFuncImpTable}, which has lookup precedence + * and serves the OpenSearch V3 path. Other backends (DataFusion / analytics-engine) rewrite the + * operator on their own. Operand metadata is {@code null} to match the existing external + * registration's permissive policy and avoid introducing new type rejections. + */ + public static final SqlAggFunction DISTINCT_COUNT_APPROX = + createUserDefinedAggFunction( + DistinctCountApproxLogicalAggFunction.class, + "DISTINCT_COUNT_APPROX", + ReturnTypes.BIGINT_FORCE_NULLABLE, + null); + public static final SqlOperator ENHANCED_COALESCE = new EnhancedCoalesceFunction().toUDF("COALESCE"); diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index c8f50c60596..6c8aecffec1 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -60,6 +60,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.DAY_OF_WEEK; import static org.opensearch.sql.expression.function.BuiltinFunctionName.DAY_OF_YEAR; import static org.opensearch.sql.expression.function.BuiltinFunctionName.DEGREES; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.DISTINCT_COUNT_APPROX; import static org.opensearch.sql.expression.function.BuiltinFunctionName.DIVIDE; import static org.opensearch.sql.expression.function.BuiltinFunctionName.DIVIDEFUNCTION; import static org.opensearch.sql.expression.function.BuiltinFunctionName.DUR2SEC; @@ -1426,6 +1427,13 @@ void populate() { registerOperator(INTERNAL_PATTERN, PPLBuiltinOperators.INTERNAL_PATTERN); registerOperator(LIST, PPLBuiltinOperators.LIST); registerOperator(VALUES, PPLBuiltinOperators.VALUES); + // Logical marker so PPL parser succeeds on dc()/distinct_count()/distinct_count_approx() + // regardless of which execution path the query takes. OpenSearchExecutionEngine registers + // a real HyperLogLog++ implementation in aggExternalFunctionRegistry which overrides this + // marker via the external-first lookup precedence in getImplementation(). Other backends + // (DataFusion / analytics-engine) rewrite the operator before substrait emission and never + // execute the marker. + registerOperator(DISTINCT_COUNT_APPROX, PPLBuiltinOperators.DISTINCT_COUNT_APPROX); register( AVG,