diff --git a/core/src/main/java/org/opensearch/sql/calcite/udf/udaf/DistinctCountApproxLogicalAggFunction.java b/core/src/main/java/org/opensearch/sql/calcite/udf/udaf/DistinctCountApproxLogicalAggFunction.java
new file mode 100644
index 00000000000..4a18fe35a5d
--- /dev/null
+++ b/core/src/main/java/org/opensearch/sql/calcite/udf/udaf/DistinctCountApproxLogicalAggFunction.java
@@ -0,0 +1,61 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.calcite.udf.udaf;
+
+import org.opensearch.sql.calcite.udf.UserDefinedAggFunction;
+
+/**
+ * Logical marker UDAF for {@code DISTINCT_COUNT_APPROX}. Lets the PPL parser produce a RelNode that
+ * contains the operator without committing to a JVM execution path; backends are expected to push
+ * it down or rewrite it before execution:
+ *
+ *
+ * - OpenSearch V3 path: {@code OpenSearchExecutionEngine#registerOpenSearchFunctions} registers
+ * a real HyperLogLog++ implementation in {@code PPLFuncImpTable.aggExternalFunctionRegistry},
+ * which overrides this marker (external registry has lookup precedence in {@code
+ * getImplementation}). {@code AggregateAnalyzer} then translates the operator to OpenSearch
+ * cardinality DSL.
+ *
- Unified-query / DataFusion / analytics-engine path: backend planner rewrites the RexOver to
+ * {@code APPROX_COUNT_DISTINCT} (Calcite stdop) before substrait emission; the DataFusion
+ * substrait reader consumes that natively.
+ *
+ *
+ * This class deliberately throws on every method. Reaching a method body means a backend either
+ * failed to push down or did not register an adapter — that is a configuration bug, not a runtime
+ * fallback. {@code RelevanceQueryFunction.RelevanceQueryImplementor} (used by {@code match}, {@code
+ * match_phrase}, etc.) follows the same pattern for relevance search functions that have no JVM
+ * execution semantics.
+ */
+public class DistinctCountApproxLogicalAggFunction
+ implements UserDefinedAggFunction {
+
+ private static final String NOT_EXECUTABLE =
+ "DISTINCT_COUNT_APPROX logical marker reached Enumerable execution; "
+ + "an engine-specific implementation must be registered or rewritten before execution.";
+
+ @Override
+ public MarkerAccumulator init() {
+ throw new UnsupportedOperationException(NOT_EXECUTABLE);
+ }
+
+ @Override
+ public MarkerAccumulator add(MarkerAccumulator acc, Object... values) {
+ throw new UnsupportedOperationException(NOT_EXECUTABLE);
+ }
+
+ @Override
+ public Object result(MarkerAccumulator accumulator) {
+ throw new UnsupportedOperationException(NOT_EXECUTABLE);
+ }
+
+ /** Placeholder accumulator. Never actually constructed because {@link #init()} throws. */
+ public static class MarkerAccumulator implements Accumulator {
+ @Override
+ public Object value(Object... argList) {
+ throw new UnsupportedOperationException(NOT_EXECUTABLE);
+ }
+ }
+}
diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java
index 4b7ee41f016..60172e70c84 100644
--- a/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java
+++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java
@@ -29,6 +29,7 @@
import org.apache.calcite.sql.type.SqlTypeTransforms;
import org.apache.calcite.sql.util.ReflectiveSqlOperatorTable;
import org.apache.calcite.util.BuiltInMethod;
+import org.opensearch.sql.calcite.udf.udaf.DistinctCountApproxLogicalAggFunction;
import org.opensearch.sql.calcite.udf.udaf.FirstAggFunction;
import org.opensearch.sql.calcite.udf.udaf.LastAggFunction;
import org.opensearch.sql.calcite.udf.udaf.ListAggFunction;
@@ -508,6 +509,22 @@ public class PPLBuiltinOperators extends ReflectiveSqlOperatorTable {
PPLReturnTypes.STRING_ARRAY,
PPLOperandTypes.ANY_SCALAR_OPTIONAL_INTEGER);
+ /**
+ * Logical marker for {@code DISTINCT_COUNT_APPROX} (also exposed as {@code dc} and {@code
+ * distinct_count} aliases). PPL parser uses this to produce a RelNode; backends override or
+ * rewrite it before execution. {@code OpenSearchExecutionEngine} registers a real HyperLogLog++
+ * implementation in the external registry of {@code PPLFuncImpTable}, which has lookup precedence
+ * and serves the OpenSearch V3 path. Other backends (DataFusion / analytics-engine) rewrite the
+ * operator on their own. Operand metadata is {@code null} to match the existing external
+ * registration's permissive policy and avoid introducing new type rejections.
+ */
+ public static final SqlAggFunction DISTINCT_COUNT_APPROX =
+ createUserDefinedAggFunction(
+ DistinctCountApproxLogicalAggFunction.class,
+ "DISTINCT_COUNT_APPROX",
+ ReturnTypes.BIGINT_FORCE_NULLABLE,
+ null);
+
public static final SqlOperator ENHANCED_COALESCE =
new EnhancedCoalesceFunction().toUDF("COALESCE");
diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java
index c8f50c60596..6c8aecffec1 100644
--- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java
+++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java
@@ -60,6 +60,7 @@
import static org.opensearch.sql.expression.function.BuiltinFunctionName.DAY_OF_WEEK;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.DAY_OF_YEAR;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.DEGREES;
+import static org.opensearch.sql.expression.function.BuiltinFunctionName.DISTINCT_COUNT_APPROX;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.DIVIDE;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.DIVIDEFUNCTION;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.DUR2SEC;
@@ -1426,6 +1427,13 @@ void populate() {
registerOperator(INTERNAL_PATTERN, PPLBuiltinOperators.INTERNAL_PATTERN);
registerOperator(LIST, PPLBuiltinOperators.LIST);
registerOperator(VALUES, PPLBuiltinOperators.VALUES);
+ // Logical marker so PPL parser succeeds on dc()/distinct_count()/distinct_count_approx()
+ // regardless of which execution path the query takes. OpenSearchExecutionEngine registers
+ // a real HyperLogLog++ implementation in aggExternalFunctionRegistry which overrides this
+ // marker via the external-first lookup precedence in getImplementation(). Other backends
+ // (DataFusion / analytics-engine) rewrite the operator before substrait emission and never
+ // execute the marker.
+ registerOperator(DISTINCT_COUNT_APPROX, PPLBuiltinOperators.DISTINCT_COUNT_APPROX);
register(
AVG,