From b7fd95b0c1661c502e91870e2faaad4de960764e Mon Sep 17 00:00:00 2001 From: Vinay Krishna Pudyodu Date: Fri, 22 May 2026 16:58:12 +0000 Subject: [PATCH 1/2] Added UDT for IP and Binary support Signed-off-by: Vinay Krishna Pudyodu --- .../calcite/utils/OpenSearchTypeFactory.java | 25 ++++++ .../analytics/AnalyticsExecutionEngine.java | 48 +++++++++++- .../expression/function/PPLFuncImpTable.java | 49 ++---------- .../utils/OpenSearchTypeFactoryTest.java | 58 ++++++++++++++ .../AnalyticsExecutionEngineTest.java | 77 +++++++++++++++++++ 5 files changed, 213 insertions(+), 44 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java b/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java index e796d346449..72470dbefe4 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java @@ -46,6 +46,8 @@ import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.sql.type.SqlTypeUtil; import org.checkerframework.checker.nullness.qual.Nullable; +import org.opensearch.analytics.schema.BinaryType; +import org.opensearch.analytics.schema.IpType; import org.opensearch.sql.calcite.type.AbstractExprRelDataType; import org.opensearch.sql.calcite.type.ExprBinaryType; import org.opensearch.sql.calcite.type.ExprDateType; @@ -275,6 +277,29 @@ public static ExprType convertRelDataTypeToExprType(RelDataType type) { return exprType; } + /** + * Same as {@link #convertRelDataTypeToExprType(RelDataType)} but additionally recognizes the + * analytics-engine {@link IpType} / {@link BinaryType} markers and returns {@link + * ExprCoreType#IP} / {@link ExprCoreType#BINARY} for them. + * + *

This is intentionally not done in the general {@link #convertRelDataTypeToExprType} + * path: that function is consulted by Calcite's planner- internal type coercion, which would then + * call {@link #convertExprTypeToRelDataType} to generate {@code CAST(... AS ExprIPType)} casts + * that the analytics-engine substrait converter can't handle. Restrict the UDT recognition to the + * result-schema build site (currently {@code AnalyticsExecutionEngine.buildSchema}) so the + * planner sees plain {@code BINARY} the way it did before, while the response schema still + * reports {@code "type":"ip"} / {@code "type":"binary"}. + */ + public static ExprType convertResultColumnRelDataTypeToExprType(RelDataType type) { + if (type instanceof IpType) { + return IP; + } + if (type instanceof BinaryType) { + return BINARY; + } + return convertRelDataTypeToExprType(type); + } + public static ExprValue getExprValueByExprType(ExprType type, Object value) { switch (type) { case UNDEFINED: diff --git a/core/src/main/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngine.java b/core/src/main/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngine.java index ddfe5fd3556..9fc90d09c42 100644 --- a/core/src/main/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngine.java +++ b/core/src/main/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngine.java @@ -5,7 +5,10 @@ package org.opensearch.sql.executor.analytics; +import java.net.InetAddress; +import java.net.UnknownHostException; import java.util.ArrayList; +import java.util.Base64; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -14,6 +17,9 @@ import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeField; import org.opensearch.analytics.exec.QueryPlanExecutor; +import org.opensearch.analytics.schema.BinaryType; +import org.opensearch.analytics.schema.IpType; +import org.opensearch.common.network.InetAddresses; import org.opensearch.core.action.ActionListener; import org.opensearch.sql.ast.statement.ExplainMode; import org.opensearch.sql.calcite.CalcitePlanContext; @@ -123,15 +129,48 @@ private List convertRows(Iterable rows, List valueMap = new LinkedHashMap<>(); for (int i = 0; i < fields.size(); i++) { - String columnName = fields.get(i).getName(); + RelDataTypeField field = fields.get(i); Object value = (i < row.length) ? row[i] : null; - valueMap.put(columnName, ExprValueUtils.fromObjectValue(value)); + valueMap.put(field.getName(), toExprValue(value, field.getType())); } results.add(ExprTupleValue.fromExprValueMap(valueMap)); } return results; } + /** + * Converts a single result cell to an {@link ExprValue}, dispatching on the column's UDT when + * present so {@code byte[]} payloads are rendered correctly: + * + *

+ * + *

Without this dispatch, {@code fromObjectValue} throws {@code unsupported object class [B} on + * byte[] cells, and IP buffers leak through as raw 16-byte ipv4-mapped-ipv6 garbage. + */ + private static ExprValue toExprValue(Object value, RelDataType type) { + if (value instanceof byte[] bytes) { + if (type instanceof IpType) { + try { + return ExprValueUtils.stringValue( + InetAddresses.toAddrString(InetAddress.getByAddress(bytes))); + } catch (UnknownHostException e) { + // Defensive: backend gave us a buffer that isn't 4 or 16 bytes. Fall through to + // the default handling so the user sees a clear error rather than a malformed + // address string. + } + } else if (type instanceof BinaryType) { + return ExprValueUtils.stringValue(Base64.getEncoder().encodeToString(bytes)); + } + } + return ExprValueUtils.fromObjectValue(value); + } + private Schema buildSchema(List fields) { List columns = new ArrayList<>(); for (RelDataTypeField field : fields) { @@ -143,7 +182,10 @@ private Schema buildSchema(List fields) { private ExprType convertType(RelDataType type) { try { - return OpenSearchTypeFactory.convertRelDataTypeToExprType(type); + // Use the result-column variant so the response schema reports `ip` / `binary` for + // analytics-engine UDTs, while the planner's coercion path keeps using the basic + // {@link OpenSearchTypeFactory#convertRelDataTypeToExprType} that maps VARBINARY → BINARY. + return OpenSearchTypeFactory.convertResultColumnRelDataTypeToExprType(type); } catch (IllegalArgumentException e) { return org.opensearch.sql.data.type.ExprCoreType.UNKNOWN; } diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index c8f50c60596..e13bb9eb2ca 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -269,7 +269,6 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.YEARWEEK; import com.google.common.collect.ImmutableMap; -import inet.ipaddr.IPAddress; import java.math.BigDecimal; import java.util.ArrayList; import java.util.Arrays; @@ -284,7 +283,6 @@ import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nullable; -import org.apache.calcite.avatica.util.ByteString; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexLambda; @@ -314,10 +312,8 @@ import org.opensearch.sql.calcite.utils.PlanUtils; import org.opensearch.sql.calcite.utils.UserDefinedFunctionUtils; import org.opensearch.sql.exception.ExpressionEvaluationException; -import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.executor.QueryType; import org.opensearch.sql.expression.function.CollectionUDF.MVIndexFunctionImp; -import org.opensearch.sql.utils.IPUtils; public class PPLFuncImpTable { private static final Logger logger = LogManager.getLogger(PPLFuncImpTable.class); @@ -912,29 +908,18 @@ void populate() { registerDivideFunction(DIVIDE); registerDivideFunction(DIVIDEFUNCTION); registerOperator(SHA2, PPLBuiltinOperators.SHA2); + // CIDRMATCH typecheck-only registration. The runtime UDF (CidrMatchFunction) + // accepts (IP, STRING) and (STRING, STRING); the second register(...) below + // adds (BINARY, STRING) so calls against ip/binary columns (VARBINARY in the + // analytics-engine schema) typecheck and survive. The actual dispatch — literal + // const-fold and byte-range rewrite — happens downstream in + // analytics-backend-datafusion's CidrMatchFunctionAdapter, which runs in the + // analytics-engine plan walk and intercepts the call before Substrait conversion. registerOperator(CIDRMATCH, PPLBuiltinOperators.CIDRMATCH); - // (VARBINARY, VARCHAR) overload for ip / binary columns. The lambda parses the cidr - // literal at plan time and emits AND(col >= low, col <= high) directly. - // Only literal cidrs are expanded. register( CIDRMATCH, (FunctionImp2) - (builder, col, cidr) -> { - if (cidr instanceof RexLiteral lit - && col.getType().getSqlTypeName() == SqlTypeName.VARBINARY) { - byte[][] range = parseCidrToIpv6Range(lit.getValueAs(String.class)); - RelDataType varbinary = - builder.getTypeFactory().createSqlType(SqlTypeName.VARBINARY); - RexNode low = builder.makeLiteral(new ByteString(range[0]), varbinary, false); - RexNode high = builder.makeLiteral(new ByteString(range[1]), varbinary, false); - // makeCall(AND, ...) auto-flattens at construction, so no Filter.isFlat issue. - return builder.makeCall( - SqlStdOperatorTable.AND, - builder.makeCall(SqlStdOperatorTable.GREATER_THAN_OR_EQUAL, col, low), - builder.makeCall(SqlStdOperatorTable.LESS_THAN_OR_EQUAL, col, high)); - } - return builder.makeCall(PPLBuiltinOperators.CIDRMATCH, col, cidr); - }, + (builder, col, cidr) -> builder.makeCall(PPLBuiltinOperators.CIDRMATCH, col, cidr), PPLTypeChecker.family(SqlTypeFamily.BINARY, SqlTypeFamily.STRING)); registerOperator(INTERNAL_GROK, PPLBuiltinOperators.GROK); registerOperator(INTERNAL_PARSE, PPLBuiltinOperators.PARSE); @@ -1619,22 +1604,4 @@ private static SqlOperandTypeChecker extractTypeCheckerFromUDF(SqlOperator opera } return typeChecker; } - - /** - * Parses a CIDR string and returns its lower and upper bounds in canonical 16-byte IPv6-mapped - * form. Used by the (BINARY, STRING) {@code cidrmatch} overload to expand into a byte-range - * conjunction at plan time. - * - *

Delegates to {@link IPUtils#toRange(String)} for parsing; converts both bounds to IPv6 to - * guarantee 16-byte output regardless of whether the input cidr is IPv4 or IPv6. - */ - private static byte[][] parseCidrToIpv6Range(String cidr) { - if (cidr == null) { - throw new SemanticCheckException("cidrmatch range argument is null"); - } - IPAddress range = IPUtils.toRange(cidr); - byte[] low = range.getLower().toIPv6().getBytes(); - byte[] high = range.getUpper().toIPv6().getBytes(); - return new byte[][] {low, high}; - } } diff --git a/core/src/test/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactoryTest.java b/core/src/test/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactoryTest.java index 69ded62ed5c..93eef89922f 100644 --- a/core/src/test/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactoryTest.java +++ b/core/src/test/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactoryTest.java @@ -16,9 +16,12 @@ import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.sql.type.SqlTypeName; import org.junit.jupiter.api.Test; +import org.opensearch.analytics.schema.BinaryType; +import org.opensearch.analytics.schema.IpType; import org.opensearch.sql.calcite.type.AbstractExprRelDataType; import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.ExprUDT; import org.opensearch.sql.data.type.ExprCoreType; +import org.opensearch.sql.data.type.ExprType; public class OpenSearchTypeFactoryTest { @@ -282,4 +285,59 @@ public void testConvertExprTypeBinaryToNullableVarbinary() { assertEquals(SqlTypeName.VARBINARY, result.getSqlTypeName()); assertTrue(result.isNullable()); } + + // ---------- convertResultColumnRelDataTypeToExprType ---------- + // + // The result-column variant is the one called from + // {@code AnalyticsExecutionEngine.buildSchema} so the response reports + // {@code "type": "ip"} / {@code "binary"} for projected analytics-engine UDTs. + // It must add UDT recognition without disturbing the planner-internal + // {@link OpenSearchTypeFactory#convertRelDataTypeToExprType} path that + // Calcite's coercion machinery uses (a previous attempt to merge them broke + // {@code where host = '1.2.3.4'} with synthetic {@code IP(string)} casts). + + @Test + public void testConvertResultColumnIpTypeReturnsIpExprType() { + ExprType result = + OpenSearchTypeFactory.convertResultColumnRelDataTypeToExprType(new IpType(true)); + assertEquals(ExprCoreType.IP, result); + } + + @Test + public void testConvertResultColumnBinaryTypeReturnsBinaryExprType() { + ExprType result = + OpenSearchTypeFactory.convertResultColumnRelDataTypeToExprType(new BinaryType(true)); + assertEquals(ExprCoreType.BINARY, result); + } + + @Test + public void testConvertResultColumnPlainVarbinaryFallsBackToBinary() { + // Plain VARBINARY (no UDT marker) must keep returning BINARY ExprType — verifies + // the new function delegates to the original convertRelDataTypeToExprType for + // non-UDT inputs rather than diverging. + RelDataType varbinary = TYPE_FACTORY.createSqlType(SqlTypeName.VARBINARY); + ExprType result = OpenSearchTypeFactory.convertResultColumnRelDataTypeToExprType(varbinary); + assertEquals(ExprCoreType.BINARY, result); + } + + @Test + public void testConvertResultColumnDelegatesParityForNonUdtTypes() { + // For every non-UDT RelDataType the result-column variant must produce the + // same ExprType as the planner-internal variant. Drift here would mean the + // response schema labels diverge from what Calcite's coercion sees. + RelDataType[] samples = + new RelDataType[] { + TYPE_FACTORY.createSqlType(SqlTypeName.BIGINT), + TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR), + TYPE_FACTORY.createSqlType(SqlTypeName.BOOLEAN), + TYPE_FACTORY.createSqlType(SqlTypeName.DOUBLE), + TYPE_FACTORY.createSqlType(SqlTypeName.TIMESTAMP), + }; + for (RelDataType t : samples) { + assertEquals( + OpenSearchTypeFactory.convertRelDataTypeToExprType(t), + OpenSearchTypeFactory.convertResultColumnRelDataTypeToExprType(t), + "Result-column variant must agree with the general variant for " + t.getSqlTypeName()); + } + } } diff --git a/core/src/test/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngineTest.java b/core/src/test/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngineTest.java index 4de596fb375..9a9a5c299f3 100644 --- a/core/src/test/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngineTest.java +++ b/core/src/test/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngineTest.java @@ -28,6 +28,8 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.opensearch.analytics.exec.QueryPlanExecutor; +import org.opensearch.analytics.schema.BinaryType; +import org.opensearch.analytics.schema.IpType; import org.opensearch.core.action.ActionListener; import org.opensearch.sql.calcite.CalcitePlanContext; import org.opensearch.sql.calcite.SysLimit; @@ -180,6 +182,67 @@ void executeRelNode_temporalTypes() { // Query size limit is now enforced in the RelNode plan (LogicalSystemLimit) before it reaches // AnalyticsExecutionEngine. The engine trusts the executor to honor the limit. + /** + * IP columns ship from the analytics-engine backend as raw 16-byte ipv6-mapped buffers. {@code + * AnalyticsExecutionEngine.toExprValue} must format them as canonical IP strings (matching {@code + * InetAddresses.toAddrString}) and the schema must report {@code "type": "ip"} via the {@code + * IpType} UDT recognition path. + */ + @Test + void executeRelNode_ipColumnRendersAsAddressString() { + RelNode relNode = mockRelNodeWithType("host", new IpType(true)); + // 1.2.3.4 in ipv4-mapped-ipv6 form: 10 zero bytes + ff ff + 4 IPv4 bytes. + byte[] ipv4 = new byte[16]; + ipv4[10] = (byte) 0xff; + ipv4[11] = (byte) 0xff; + ipv4[12] = 1; + ipv4[13] = 2; + ipv4[14] = 3; + ipv4[15] = 4; + // ::1 in pure ipv6 form. + byte[] ipv6 = new byte[16]; + ipv6[15] = 1; + Iterable rows = Arrays.asList(new Object[] {ipv4}, new Object[] {ipv6}); + stubExecutorWith(relNode, rows); + + QueryResponse response = executeAndCapture(relNode); + String dump = dumpResponse(response); + + // Schema: column reports "ip", not "binary". + assertEquals(ExprCoreType.IP, response.getSchema().getColumns().get(0).getExprType(), dump); + // Cells: byte[] → formatted address string. + assertEquals( + "1.2.3.4", + response.getResults().get(0).tupleValue().get("host").value(), + "ipv4-mapped IPv6 buffer should render as dotted quad. " + dump); + assertEquals( + "::1", + response.getResults().get(1).tupleValue().get("host").value(), + "pure IPv6 buffer should render as RFC 5952 compressed form. " + dump); + } + + /** + * Binary columns ship from the analytics-engine backend as raw byte buffers. The dispatch must + * base64-encode them (matching the OpenSearch {@code binary} field wire contract) and the schema + * must report {@code "type": "binary"} via the {@code BinaryType} UDT recognition path. + */ + @Test + void executeRelNode_binaryColumnRendersAsBase64() { + RelNode relNode = mockRelNodeWithType("blob", new BinaryType(true)); + Iterable rows = + Collections.singletonList(new Object[] {"Some binary blob".getBytes()}); + stubExecutorWith(relNode, rows); + + QueryResponse response = executeAndCapture(relNode); + String dump = dumpResponse(response); + + assertEquals(ExprCoreType.BINARY, response.getSchema().getColumns().get(0).getExprType(), dump); + assertEquals( + "U29tZSBiaW5hcnkgYmxvYg==", + response.getResults().get(0).tupleValue().get("blob").value(), + "byte[] should base64-encode to match OpenSearch binary wire format. " + dump); + } + @Test void executeRelNode_emptyResults() { RelNode relNode = mockRelNode("name", SqlTypeName.VARCHAR); @@ -380,6 +443,20 @@ private RelNode mockRelNode(Object... nameTypePairs) { return relNode; } + /** + * Same shape as {@link #mockRelNode} but accepts a fully-built {@link RelDataType} per column, so + * callers can pass analytics-engine UDTs ({@link IpType}, {@link BinaryType}) — those don't have + * a matching {@link SqlTypeName} the type factory can construct. + */ + private RelNode mockRelNodeWithType(String name, RelDataType type) { + SqlTypeFactoryImpl typeFactory = new SqlTypeFactoryImpl(RelDataTypeSystem.DEFAULT); + RelDataType rowType = typeFactory.builder().add(name, type).build(); + + RelNode relNode = mock(RelNode.class); + when(relNode.getRowType()).thenReturn(rowType); + return relNode; + } + private ResponseListener captureListener(AtomicReference ref) { return new ResponseListener() { @Override From 0918d6b0bc4c69509b4c42d1d311a8bc431ee582 Mon Sep 17 00:00:00 2001 From: Vinay Krishna Pudyodu Date: Fri, 22 May 2026 20:22:02 +0000 Subject: [PATCH 2/2] refactored cidr and updated comments Signed-off-by: Vinay Krishna Pudyodu --- .../calcite/utils/OpenSearchTypeFactory.java | 18 ++++----- .../analytics/AnalyticsExecutionEngine.java | 9 +---- .../expression/function/PPLFuncImpTable.java | 12 ------ .../function/udf/ip/CidrMatchFunction.java | 6 ++- .../utils/OpenSearchTypeFactoryTest.java | 38 +++++++------------ .../AnalyticsExecutionEngineTest.java | 19 ++-------- 6 files changed, 31 insertions(+), 71 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java b/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java index 72470dbefe4..9c15c1485c1 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java @@ -278,19 +278,15 @@ public static ExprType convertRelDataTypeToExprType(RelDataType type) { } /** - * Same as {@link #convertRelDataTypeToExprType(RelDataType)} but additionally recognizes the - * analytics-engine {@link IpType} / {@link BinaryType} markers and returns {@link - * ExprCoreType#IP} / {@link ExprCoreType#BINARY} for them. + * Result-schema-only variant of {@link #convertRelDataTypeToExprType} that recognizes the + * analytics-engine {@link IpType} / {@link BinaryType} markers as {@link ExprCoreType#IP} / + * {@link ExprCoreType#BINARY}. * - *

This is intentionally not done in the general {@link #convertRelDataTypeToExprType} - * path: that function is consulted by Calcite's planner- internal type coercion, which would then - * call {@link #convertExprTypeToRelDataType} to generate {@code CAST(... AS ExprIPType)} casts - * that the analytics-engine substrait converter can't handle. Restrict the UDT recognition to the - * result-schema build site (currently {@code AnalyticsExecutionEngine.buildSchema}) so the - * planner sees plain {@code BINARY} the way it did before, while the response schema still - * reports {@code "type":"ip"} / {@code "type":"binary"}. + *

Kept off the general path because Calcite's planner-internal coercion would round-trip + * through {@link #convertExprTypeToRelDataType} and synthesize {@code CAST(... AS ExprIPType)} + * casts the substrait converter can't handle. */ - public static ExprType convertResultColumnRelDataTypeToExprType(RelDataType type) { + public static ExprType convertAnalyticsEngineRelDataTypeToExprType(RelDataType type) { if (type instanceof IpType) { return IP; } diff --git a/core/src/main/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngine.java b/core/src/main/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngine.java index 9fc90d09c42..1e997b15677 100644 --- a/core/src/main/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngine.java +++ b/core/src/main/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngine.java @@ -160,9 +160,7 @@ private static ExprValue toExprValue(Object value, RelDataType type) { return ExprValueUtils.stringValue( InetAddresses.toAddrString(InetAddress.getByAddress(bytes))); } catch (UnknownHostException e) { - // Defensive: backend gave us a buffer that isn't 4 or 16 bytes. Fall through to - // the default handling so the user sees a clear error rather than a malformed - // address string. + throw new IllegalStateException("invalid IP buffer length: " + bytes.length, e); } } else if (type instanceof BinaryType) { return ExprValueUtils.stringValue(Base64.getEncoder().encodeToString(bytes)); @@ -182,10 +180,7 @@ private Schema buildSchema(List fields) { private ExprType convertType(RelDataType type) { try { - // Use the result-column variant so the response schema reports `ip` / `binary` for - // analytics-engine UDTs, while the planner's coercion path keeps using the basic - // {@link OpenSearchTypeFactory#convertRelDataTypeToExprType} that maps VARBINARY → BINARY. - return OpenSearchTypeFactory.convertResultColumnRelDataTypeToExprType(type); + return OpenSearchTypeFactory.convertAnalyticsEngineRelDataTypeToExprType(type); } catch (IllegalArgumentException e) { return org.opensearch.sql.data.type.ExprCoreType.UNKNOWN; } diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index e13bb9eb2ca..9255fd622d8 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -908,19 +908,7 @@ void populate() { registerDivideFunction(DIVIDE); registerDivideFunction(DIVIDEFUNCTION); registerOperator(SHA2, PPLBuiltinOperators.SHA2); - // CIDRMATCH typecheck-only registration. The runtime UDF (CidrMatchFunction) - // accepts (IP, STRING) and (STRING, STRING); the second register(...) below - // adds (BINARY, STRING) so calls against ip/binary columns (VARBINARY in the - // analytics-engine schema) typecheck and survive. The actual dispatch — literal - // const-fold and byte-range rewrite — happens downstream in - // analytics-backend-datafusion's CidrMatchFunctionAdapter, which runs in the - // analytics-engine plan walk and intercepts the call before Substrait conversion. registerOperator(CIDRMATCH, PPLBuiltinOperators.CIDRMATCH); - register( - CIDRMATCH, - (FunctionImp2) - (builder, col, cidr) -> builder.makeCall(PPLBuiltinOperators.CIDRMATCH, col, cidr), - PPLTypeChecker.family(SqlTypeFamily.BINARY, SqlTypeFamily.STRING)); registerOperator(INTERNAL_GROK, PPLBuiltinOperators.GROK); registerOperator(INTERNAL_PARSE, PPLBuiltinOperators.PARSE); registerOperator(MATCH, PPLBuiltinOperators.MATCH); diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/ip/CidrMatchFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/ip/CidrMatchFunction.java index c3a4fe4efe6..11d1bad8b43 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/ip/CidrMatchFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/ip/CidrMatchFunction.java @@ -30,6 +30,9 @@ *

*/ public class CidrMatchFunction extends ImplementorUDF { @@ -50,7 +53,8 @@ public UDFOperandMetadata getOperandMetadata() { return UDFOperandMetadata.wrapUDT( List.of( List.of(ExprCoreType.IP, ExprCoreType.STRING), - List.of(ExprCoreType.STRING, ExprCoreType.STRING))); + List.of(ExprCoreType.STRING, ExprCoreType.STRING), + List.of(ExprCoreType.BINARY, ExprCoreType.STRING))); } public static class CidrMatchImplementor implements NotNullImplementor { diff --git a/core/src/test/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactoryTest.java b/core/src/test/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactoryTest.java index 93eef89922f..7e57aaa9bf9 100644 --- a/core/src/test/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactoryTest.java +++ b/core/src/test/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactoryTest.java @@ -286,45 +286,35 @@ public void testConvertExprTypeBinaryToNullableVarbinary() { assertTrue(result.isNullable()); } - // ---------- convertResultColumnRelDataTypeToExprType ---------- - // - // The result-column variant is the one called from - // {@code AnalyticsExecutionEngine.buildSchema} so the response reports - // {@code "type": "ip"} / {@code "binary"} for projected analytics-engine UDTs. - // It must add UDT recognition without disturbing the planner-internal - // {@link OpenSearchTypeFactory#convertRelDataTypeToExprType} path that - // Calcite's coercion machinery uses (a previous attempt to merge them broke - // {@code where host = '1.2.3.4'} with synthetic {@code IP(string)} casts). + // ---------- convertAnalyticsEngineRelDataTypeToExprType ---------- + // UDT-aware variant for the response-schema path. Must agree with the + // planner-internal convertRelDataTypeToExprType on every non-UDT input. @Test - public void testConvertResultColumnIpTypeReturnsIpExprType() { + public void testConvertAnalyticsEngineIpTypeReturnsIpExprType() { ExprType result = - OpenSearchTypeFactory.convertResultColumnRelDataTypeToExprType(new IpType(true)); + OpenSearchTypeFactory.convertAnalyticsEngineRelDataTypeToExprType(new IpType(true)); assertEquals(ExprCoreType.IP, result); } @Test - public void testConvertResultColumnBinaryTypeReturnsBinaryExprType() { + public void testConvertAnalyticsEngineBinaryTypeReturnsBinaryExprType() { ExprType result = - OpenSearchTypeFactory.convertResultColumnRelDataTypeToExprType(new BinaryType(true)); + OpenSearchTypeFactory.convertAnalyticsEngineRelDataTypeToExprType(new BinaryType(true)); assertEquals(ExprCoreType.BINARY, result); } @Test - public void testConvertResultColumnPlainVarbinaryFallsBackToBinary() { - // Plain VARBINARY (no UDT marker) must keep returning BINARY ExprType — verifies - // the new function delegates to the original convertRelDataTypeToExprType for - // non-UDT inputs rather than diverging. + public void testConvertAnalyticsEnginePlainVarbinaryFallsBackToBinary() { + // Plain VARBINARY (no UDT) must still resolve to BINARY via the delegated path. RelDataType varbinary = TYPE_FACTORY.createSqlType(SqlTypeName.VARBINARY); - ExprType result = OpenSearchTypeFactory.convertResultColumnRelDataTypeToExprType(varbinary); + ExprType result = OpenSearchTypeFactory.convertAnalyticsEngineRelDataTypeToExprType(varbinary); assertEquals(ExprCoreType.BINARY, result); } @Test - public void testConvertResultColumnDelegatesParityForNonUdtTypes() { - // For every non-UDT RelDataType the result-column variant must produce the - // same ExprType as the planner-internal variant. Drift here would mean the - // response schema labels diverge from what Calcite's coercion sees. + public void testConvertAnalyticsEngineDelegatesParityForNonUdtTypes() { + // Parity check: drift would mean response-schema labels diverge from Calcite's view. RelDataType[] samples = new RelDataType[] { TYPE_FACTORY.createSqlType(SqlTypeName.BIGINT), @@ -336,8 +326,8 @@ public void testConvertResultColumnDelegatesParityForNonUdtTypes() { for (RelDataType t : samples) { assertEquals( OpenSearchTypeFactory.convertRelDataTypeToExprType(t), - OpenSearchTypeFactory.convertResultColumnRelDataTypeToExprType(t), - "Result-column variant must agree with the general variant for " + t.getSqlTypeName()); + OpenSearchTypeFactory.convertAnalyticsEngineRelDataTypeToExprType(t), + "Analytics-engine variant must agree with the general variant for " + t.getSqlTypeName()); } } } diff --git a/core/src/test/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngineTest.java b/core/src/test/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngineTest.java index 9a9a5c299f3..2fa48489df8 100644 --- a/core/src/test/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngineTest.java +++ b/core/src/test/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngineTest.java @@ -182,12 +182,7 @@ void executeRelNode_temporalTypes() { // Query size limit is now enforced in the RelNode plan (LogicalSystemLimit) before it reaches // AnalyticsExecutionEngine. The engine trusts the executor to honor the limit. - /** - * IP columns ship from the analytics-engine backend as raw 16-byte ipv6-mapped buffers. {@code - * AnalyticsExecutionEngine.toExprValue} must format them as canonical IP strings (matching {@code - * InetAddresses.toAddrString}) and the schema must report {@code "type": "ip"} via the {@code - * IpType} UDT recognition path. - */ + /** Raw 16-byte ipv6-mapped buffer + IpType → canonical IP string + schema reports "ip". */ @Test void executeRelNode_ipColumnRendersAsAddressString() { RelNode relNode = mockRelNodeWithType("host", new IpType(true)); @@ -221,11 +216,7 @@ void executeRelNode_ipColumnRendersAsAddressString() { "pure IPv6 buffer should render as RFC 5952 compressed form. " + dump); } - /** - * Binary columns ship from the analytics-engine backend as raw byte buffers. The dispatch must - * base64-encode them (matching the OpenSearch {@code binary} field wire contract) and the schema - * must report {@code "type": "binary"} via the {@code BinaryType} UDT recognition path. - */ + /** Raw byte buffer + BinaryType → base64 string + schema reports "binary". */ @Test void executeRelNode_binaryColumnRendersAsBase64() { RelNode relNode = mockRelNodeWithType("blob", new BinaryType(true)); @@ -443,11 +434,7 @@ private RelNode mockRelNode(Object... nameTypePairs) { return relNode; } - /** - * Same shape as {@link #mockRelNode} but accepts a fully-built {@link RelDataType} per column, so - * callers can pass analytics-engine UDTs ({@link IpType}, {@link BinaryType}) — those don't have - * a matching {@link SqlTypeName} the type factory can construct. - */ + /** Variant of {@link #mockRelNode} that accepts a pre-built RelDataType (e.g. UDTs). */ private RelNode mockRelNodeWithType(String name, RelDataType type) { SqlTypeFactoryImpl typeFactory = new SqlTypeFactoryImpl(RelDataTypeSystem.DEFAULT); RelDataType rowType = typeFactory.builder().add(name, type).build();