diff --git a/auron-flink-extension/auron-flink-planner/pom.xml b/auron-flink-extension/auron-flink-planner/pom.xml index bce3ae821..4f2a9b18b 100644 --- a/auron-flink-extension/auron-flink-planner/pom.xml +++ b/auron-flink-extension/auron-flink-planner/pom.xml @@ -303,6 +303,7 @@ **/*ITCase.java + **/*Test.java diff --git a/auron-flink-extension/auron-flink-planner/src/main/java/org/apache/auron/flink/table/planner/converter/RexCallConverter.java b/auron-flink-extension/auron-flink-planner/src/main/java/org/apache/auron/flink/table/planner/converter/RexCallConverter.java new file mode 100644 index 000000000..ca5954759 --- /dev/null +++ b/auron-flink-extension/auron-flink-planner/src/main/java/org/apache/auron/flink/table/planner/converter/RexCallConverter.java @@ -0,0 +1,268 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.auron.flink.table.planner.converter; + +import java.util.EnumSet; +import java.util.Optional; +import java.util.Set; +import org.apache.auron.flink.utils.SchemaConverters; +import org.apache.auron.protobuf.PhysicalBinaryExprNode; +import org.apache.auron.protobuf.PhysicalExprNode; +import org.apache.auron.protobuf.PhysicalNegativeNode; +import org.apache.auron.protobuf.PhysicalTryCastNode; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rel.type.RelDataTypeSystem; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.type.SqlTypeFactoryImpl; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.sql.type.SqlTypeUtil; +import org.apache.flink.table.planner.calcite.FlinkTypeFactory; +import org.apache.flink.table.types.logical.LogicalType; + +/** + * Converts a Calcite {@link RexCall} (operator expression) to an Auron native + * {@link PhysicalExprNode}. + * + *

Handles arithmetic operators ({@code +}, {@code -}, {@code *}, {@code /}, + * {@code %}), unary minus/plus, and {@code CAST}. Binary arithmetic operands + * are promoted to a common type before conversion, and the result is cast to + * the output type if it differs from the common type. + */ +public class RexCallConverter implements FlinkRexNodeConverter { + + private static final RelDataTypeFactory TYPE_FACTORY = new SqlTypeFactoryImpl(RelDataTypeSystem.DEFAULT); + + /** Binary arithmetic kinds that require numeric result type. */ + private static final Set BINARY_ARITHMETIC_KINDS = + EnumSet.of(SqlKind.PLUS, SqlKind.MINUS, SqlKind.TIMES, SqlKind.DIVIDE, SqlKind.MOD); + + /** All supported SqlKinds including unary and cast. */ + private static final Set SUPPORTED_KINDS = EnumSet.of( + SqlKind.PLUS, + SqlKind.MINUS, + SqlKind.TIMES, + SqlKind.DIVIDE, + SqlKind.MOD, + SqlKind.MINUS_PREFIX, + SqlKind.PLUS_PREFIX, + SqlKind.CAST); + + private final FlinkNodeConverterFactory factory; + + /** + * Creates a new converter that delegates operand conversion to the given + * factory. + * + * @param factory the factory used for recursive operand conversion + */ + public RexCallConverter(FlinkNodeConverterFactory factory) { + this.factory = factory; + } + + /** {@inheritDoc} */ + @Override + public Class getNodeClass() { + return RexCall.class; + } + + /** + * Returns {@code true} if the call's {@link SqlKind} is supported. + * + *

For binary arithmetic kinds, the call's result type must also be + * numeric to reject non-arithmetic uses (e.g., TIMESTAMP + INTERVAL). + */ + @Override + public boolean isSupported(RexNode node, ConverterContext context) { + RexCall call = (RexCall) node; + SqlKind kind = call.getKind(); + if (!SUPPORTED_KINDS.contains(kind)) { + return false; + } + if (BINARY_ARITHMETIC_KINDS.contains(kind)) { + return SqlTypeUtil.isNumeric(call.getType()); + } + return true; + } + + /** + * Converts the given {@link RexCall} to a native {@link PhysicalExprNode}. + * + *

Dispatches by {@link SqlKind}: + *

+ * + * @throws IllegalArgumentException if the SqlKind is not supported + */ + @Override + public PhysicalExprNode convert(RexNode node, ConverterContext context) { + RexCall call = (RexCall) node; + SqlKind kind = call.getKind(); + switch (kind) { + case PLUS: + return buildBinaryExpr(call, "Plus", context); + case MINUS: + return buildBinaryExpr(call, "Minus", context); + case TIMES: + return buildBinaryExpr(call, "Multiply", context); + case DIVIDE: + return buildBinaryExpr(call, "Divide", context); + case MOD: + return buildBinaryExpr(call, "Modulo", context); + case MINUS_PREFIX: + return buildNegative(call, context); + case PLUS_PREFIX: + return convertOperand(call.getOperands().get(0), context); + case CAST: + return buildTryCast(call, context); + default: + throw new IllegalArgumentException("Unsupported SqlKind: " + kind); + } + } + + /** + * Builds a binary expression with type promotion between operands. + * + *

Operands are promoted to a common type. If the call's output type + * differs from the common type, the result is wrapped in a TryCast. + */ + private PhysicalExprNode buildBinaryExpr(RexCall call, String op, ConverterContext context) { + RexNode left = call.getOperands().get(0); + RexNode right = call.getOperands().get(1); + RelDataType outputType = call.getType(); + + RelDataType compatibleType = getCommonTypeForComparison(left.getType(), right.getType(), TYPE_FACTORY); + if (compatibleType == null) { + throw new IllegalStateException("Incompatible types: " + + left.getType().getSqlTypeName() + + " and " + + right.getType().getSqlTypeName()); + } + + PhysicalExprNode leftExpr = castIfNecessary(left, compatibleType, context); + PhysicalExprNode rightExpr = castIfNecessary(right, compatibleType, context); + + PhysicalExprNode binaryExpr = PhysicalExprNode.newBuilder() + .setBinaryExpr(PhysicalBinaryExprNode.newBuilder() + .setL(leftExpr) + .setR(rightExpr) + .setOp(op)) + .build(); + + if (!outputType.getSqlTypeName().equals(compatibleType.getSqlTypeName())) { + return wrapInTryCast(binaryExpr, outputType); + } + return binaryExpr; + } + + /** + * Computes the common type for two operand types during arithmetic + * promotion. + * + *

Rules: + *

+ * + * @param type1 left operand type + * @param type2 right operand type + * @param typeFactory factory for creating result types + * @return the common type, or {@code null} if incompatible + */ + static RelDataType getCommonTypeForComparison( + RelDataType type1, RelDataType type2, RelDataTypeFactory typeFactory) { + if (type1.getSqlTypeName().equals(type2.getSqlTypeName())) { + return type1; + } + if (SqlTypeUtil.isNumeric(type1) && SqlTypeUtil.isNumeric(type2)) { + SqlTypeName t1 = type1.getSqlTypeName(); + SqlTypeName t2 = type2.getSqlTypeName(); + if (t1 == SqlTypeName.DECIMAL || t2 == SqlTypeName.DECIMAL) { + return typeFactory.createSqlType(SqlTypeName.DECIMAL); + } + if (notApproxType(t1) && notApproxType(t2)) { + return typeFactory.createSqlType(SqlTypeName.BIGINT); + } + return typeFactory.createSqlType(SqlTypeName.DOUBLE); + } + if (SqlTypeUtil.inCharFamily(type1) && SqlTypeUtil.inCharFamily(type2)) { + return typeFactory.createSqlType(SqlTypeName.VARCHAR); + } + return null; + } + + private static boolean notApproxType(SqlTypeName typeName) { + return typeName != SqlTypeName.FLOAT && typeName != SqlTypeName.DOUBLE; + } + + /** + * Wraps the converted operand in a TryCast if its type differs from the + * target type. + */ + private PhysicalExprNode castIfNecessary(RexNode expr, RelDataType targetType, ConverterContext context) { + PhysicalExprNode converted = convertOperand(expr, context); + if (expr.getType().getSqlTypeName().equals(targetType.getSqlTypeName())) { + return converted; + } + return wrapInTryCast(converted, targetType); + } + + /** + * Delegates operand conversion to the factory. + * + * @throws IllegalStateException if no converter is registered for + * the operand + */ + private PhysicalExprNode convertOperand(RexNode operand, ConverterContext context) { + Optional result = factory.convertRexNode(operand, context); + if (!result.isPresent()) { + throw new IllegalStateException("Failed to convert operand: " + operand + " (no converter registered)"); + } + return result.get(); + } + + private static PhysicalExprNode wrapInTryCast(PhysicalExprNode expr, RelDataType targetType) { + LogicalType logicalType = FlinkTypeFactory.toLogicalType(targetType); + org.apache.auron.protobuf.ArrowType arrowType = SchemaConverters.convertToAuronArrowType(logicalType); + return PhysicalExprNode.newBuilder() + .setTryCast(PhysicalTryCastNode.newBuilder().setExpr(expr).setArrowType(arrowType)) + .build(); + } + + private PhysicalExprNode buildNegative(RexCall call, ConverterContext context) { + PhysicalExprNode operand = convertOperand(call.getOperands().get(0), context); + return PhysicalExprNode.newBuilder() + .setNegative(PhysicalNegativeNode.newBuilder().setExpr(operand)) + .build(); + } + + private PhysicalExprNode buildTryCast(RexCall call, ConverterContext context) { + PhysicalExprNode operand = convertOperand(call.getOperands().get(0), context); + return wrapInTryCast(operand, call.getType()); + } +} diff --git a/auron-flink-extension/auron-flink-planner/src/main/java/org/apache/auron/flink/table/planner/converter/RexInputRefConverter.java b/auron-flink-extension/auron-flink-planner/src/main/java/org/apache/auron/flink/table/planner/converter/RexInputRefConverter.java new file mode 100644 index 000000000..b65a8b4d7 --- /dev/null +++ b/auron-flink-extension/auron-flink-planner/src/main/java/org/apache/auron/flink/table/planner/converter/RexInputRefConverter.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.auron.flink.table.planner.converter; + +import org.apache.auron.protobuf.PhysicalColumn; +import org.apache.auron.protobuf.PhysicalExprNode; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexNode; + +/** + * Converts a Calcite {@link RexInputRef} (column reference) to an Auron native {@link + * PhysicalExprNode} containing a {@link PhysicalColumn}. + * + *

Column references are supported when the index is within the input schema bounds. Every valid + * {@code RexInputRef} maps directly to a named, indexed column in the input schema provided by the + * {@link ConverterContext}. + */ +public class RexInputRefConverter implements FlinkRexNodeConverter { + + /** {@inheritDoc} */ + @Override + public Class getNodeClass() { + return RexInputRef.class; + } + + /** + * Returns {@code true} if the column index is within the input schema bounds. + * + * @param node the RexNode to check (must be a {@link RexInputRef}) + * @param context shared conversion state + * @return {@code true} if the index is valid + */ + @Override + public boolean isSupported(RexNode node, ConverterContext context) { + RexInputRef inputRef = (RexInputRef) node; + return inputRef.getIndex() < context.getInputType().getFieldCount(); + } + + /** + * Converts the given {@link RexInputRef} to a {@link PhysicalExprNode} with a {@link + * PhysicalColumn}. + * + *

Resolves the column name from the input schema via {@link + * ConverterContext#getInputType()}. + * + * @param node the RexNode to convert (must be a {@link RexInputRef}) + * @param context shared conversion state containing the input schema + * @return a {@link PhysicalExprNode} wrapping a {@link PhysicalColumn} with name and index + * @throws IllegalArgumentException if the node is not a {@link RexInputRef} + */ + @Override + public PhysicalExprNode convert(RexNode node, ConverterContext context) { + RexInputRef inputRef = (RexInputRef) node; + int index = inputRef.getIndex(); + String name = context.getInputType().getFieldNames().get(index); + + return PhysicalExprNode.newBuilder() + .setColumn(PhysicalColumn.newBuilder().setName(name).setIndex(index)) + .build(); + } +} diff --git a/auron-flink-extension/auron-flink-planner/src/main/java/org/apache/auron/flink/table/planner/converter/RexLiteralConverter.java b/auron-flink-extension/auron-flink-planner/src/main/java/org/apache/auron/flink/table/planner/converter/RexLiteralConverter.java new file mode 100644 index 000000000..bb80b3e16 --- /dev/null +++ b/auron-flink-extension/auron-flink-planner/src/main/java/org/apache/auron/flink/table/planner/converter/RexLiteralConverter.java @@ -0,0 +1,224 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.auron.flink.table.planner.converter; + +import com.google.protobuf.ByteString; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.math.BigDecimal; +import java.nio.charset.StandardCharsets; +import java.util.Collections; +import java.util.EnumSet; +import java.util.Set; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.BigIntVector; +import org.apache.arrow.vector.BitVector; +import org.apache.arrow.vector.DecimalVector; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.Float4Vector; +import org.apache.arrow.vector.Float8Vector; +import org.apache.arrow.vector.IntVector; +import org.apache.arrow.vector.SmallIntVector; +import org.apache.arrow.vector.TinyIntVector; +import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.ipc.ArrowStreamWriter; +import org.apache.arrow.vector.types.FloatingPointPrecision; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.Schema; +import org.apache.auron.protobuf.PhysicalExprNode; +import org.apache.auron.protobuf.ScalarValue; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.type.SqlTypeName; + +/** + * Converts a Calcite {@link RexLiteral} to an Auron native {@link PhysicalExprNode} + * containing a {@link ScalarValue} with Arrow IPC bytes. + * + *

The literal value is serialized as a single-element Arrow vector in IPC stream format, + * following the same pattern as the Spark implementation in {@code NativeConverters}. + * + *

Supported types: {@code TINYINT}, {@code SMALLINT}, {@code INTEGER}, {@code BIGINT}, + * {@code FLOAT}, {@code DOUBLE}, {@code DECIMAL}, {@code BOOLEAN}, {@code CHAR}, + * {@code VARCHAR}, and {@code NULL} (of a supported type). + */ +public class RexLiteralConverter implements FlinkRexNodeConverter { + + private static final Set SUPPORTED_TYPES = EnumSet.of( + SqlTypeName.TINYINT, + SqlTypeName.SMALLINT, + SqlTypeName.INTEGER, + SqlTypeName.BIGINT, + SqlTypeName.FLOAT, + SqlTypeName.DOUBLE, + SqlTypeName.DECIMAL, + SqlTypeName.BOOLEAN, + SqlTypeName.CHAR, + SqlTypeName.VARCHAR); + + /** {@inheritDoc} */ + @Override + public Class getNodeClass() { + return RexLiteral.class; + } + + /** + * Returns {@code true} if the literal's SQL type is supported for native conversion. + * + *

For null literals, the underlying type is still checked — a null of an unsupported + * type (e.g., TIMESTAMP) returns {@code false}. + */ + @Override + public boolean isSupported(RexNode node, ConverterContext context) { + RexLiteral literal = (RexLiteral) node; + SqlTypeName typeName = literal.getType().getSqlTypeName(); + return isSupportedType(typeName); + } + + /** + * Converts the given {@link RexLiteral} to a {@link PhysicalExprNode} with Arrow IPC bytes. + * + * @throws IllegalArgumentException if the literal type is not supported + */ + @Override + public PhysicalExprNode convert(RexNode node, ConverterContext context) { + RexLiteral literal = (RexLiteral) node; + byte[] ipcBytes = serializeToIpc(literal); + return PhysicalExprNode.newBuilder() + .setLiteral(ScalarValue.newBuilder().setIpcBytes(ByteString.copyFrom(ipcBytes))) + .build(); + } + + private static boolean isSupportedType(SqlTypeName typeName) { + return SUPPORTED_TYPES.contains(typeName); + } + + /** + * Serializes the literal value as a single-element Arrow vector in IPC stream format. + */ + private static byte[] serializeToIpc(RexLiteral literal) { + Field field = arrowFieldForType(literal); + Schema schema = new Schema(Collections.singletonList(field)); + + try (BufferAllocator allocator = new RootAllocator(); + VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { + + root.allocateNew(); + FieldVector vector = root.getVector(0); + + if (literal.isNull()) { + vector.setNull(0); + } else { + setVectorValue(literal, vector); + } + + vector.setValueCount(1); + root.setRowCount(1); + + return writeIpcBytes(root); + } catch (IOException e) { + throw new IllegalStateException("Failed to serialize literal to Arrow IPC", e); + } + } + + /** + * Returns the Arrow {@link Field} corresponding to the literal's Calcite type. + */ + private static Field arrowFieldForType(RexLiteral literal) { + SqlTypeName typeName = literal.getType().getSqlTypeName(); + switch (typeName) { + case TINYINT: + return Field.nullable("v", new ArrowType.Int(8, true)); + case SMALLINT: + return Field.nullable("v", new ArrowType.Int(16, true)); + case INTEGER: + return Field.nullable("v", new ArrowType.Int(32, true)); + case BIGINT: + return Field.nullable("v", new ArrowType.Int(64, true)); + case FLOAT: + return Field.nullable("v", new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)); + case DOUBLE: + return Field.nullable("v", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)); + case DECIMAL: + int precision = literal.getType().getPrecision(); + int scale = literal.getType().getScale(); + return Field.nullable("v", new ArrowType.Decimal(precision, scale, 128)); + case BOOLEAN: + return Field.nullable("v", ArrowType.Bool.INSTANCE); + case CHAR: + case VARCHAR: + return Field.nullable("v", ArrowType.Utf8.INSTANCE); + default: + throw new IllegalArgumentException("Unsupported type: " + typeName); + } + } + + /** + * Sets the value at index 0 of the given vector based on the literal's type. + */ + private static void setVectorValue(RexLiteral literal, FieldVector vector) { + SqlTypeName typeName = literal.getType().getSqlTypeName(); + switch (typeName) { + case TINYINT: + ((TinyIntVector) vector).set(0, literal.getValueAs(Byte.class)); + break; + case SMALLINT: + ((SmallIntVector) vector).set(0, literal.getValueAs(Short.class)); + break; + case INTEGER: + ((IntVector) vector).set(0, literal.getValueAs(Integer.class)); + break; + case BIGINT: + ((BigIntVector) vector).set(0, literal.getValueAs(Long.class)); + break; + case FLOAT: + ((Float4Vector) vector).set(0, literal.getValueAs(Float.class)); + break; + case DOUBLE: + ((Float8Vector) vector).set(0, literal.getValueAs(Double.class)); + break; + case DECIMAL: + ((DecimalVector) vector).set(0, literal.getValueAs(BigDecimal.class)); + break; + case BOOLEAN: + ((BitVector) vector).set(0, literal.getValueAs(Boolean.class) ? 1 : 0); + break; + case CHAR: + case VARCHAR: + ((VarCharVector) vector).set(0, literal.getValueAs(String.class).getBytes(StandardCharsets.UTF_8)); + break; + default: + throw new IllegalArgumentException("Unsupported type: " + typeName); + } + } + + /** + * Writes the given {@link VectorSchemaRoot} to Arrow IPC stream format. + */ + private static byte[] writeIpcBytes(VectorSchemaRoot root) throws IOException { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + try (ArrowStreamWriter writer = new ArrowStreamWriter(root, null, out)) { + writer.start(); + writer.writeBatch(); + writer.end(); + } + return out.toByteArray(); + } +} diff --git a/auron-flink-extension/auron-flink-planner/src/test/java/org/apache/auron/flink/table/planner/converter/RexCallConverterTest.java b/auron-flink-extension/auron-flink-planner/src/test/java/org/apache/auron/flink/table/planner/converter/RexCallConverterTest.java new file mode 100644 index 000000000..9a4c8a8e8 --- /dev/null +++ b/auron-flink-extension/auron-flink-planner/src/test/java/org/apache/auron/flink/table/planner/converter/RexCallConverterTest.java @@ -0,0 +1,280 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.auron.flink.table.planner.converter; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.math.BigDecimal; +import java.util.Arrays; +import org.apache.auron.protobuf.PhysicalExprNode; +import org.apache.calcite.jdbc.JavaTypeFactoryImpl; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.flink.configuration.Configuration; +import org.apache.flink.table.types.logical.BigIntType; +import org.apache.flink.table.types.logical.IntType; +import org.apache.flink.table.types.logical.LogicalType; +import org.apache.flink.table.types.logical.RowType; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +/** Tests for {@link RexCallConverter}. */ +class RexCallConverterTest { + + private static final RelDataTypeFactory TYPE_FACTORY = new JavaTypeFactoryImpl(); + private static final RexBuilder REX_BUILDER = new RexBuilder(TYPE_FACTORY); + + private FlinkNodeConverterFactory factory; + private RexCallConverter converter; + private ConverterContext context; + + @BeforeEach + void setUp() { + factory = new FlinkNodeConverterFactory(); + converter = new RexCallConverter(factory); + factory.registerRexConverter(new RexInputRefConverter()); + factory.registerRexConverter(new RexLiteralConverter()); + factory.registerRexConverter(converter); + + RowType inputType = RowType.of(new LogicalType[] {new IntType(), new BigIntType()}, new String[] {"f0", "f1"}); + context = new ConverterContext(new Configuration(), null, getClass().getClassLoader(), inputType); + } + + @Test + void testGetNodeClass() { + assertEquals(RexCall.class, converter.getNodeClass()); + } + + @Test + void testConvertPlus() { + RexNode plus = makeCall(intType(), SqlStdOperatorTable.PLUS, makeIntRef(0), makeIntRef(0)); + + PhysicalExprNode result = converter.convert(plus, context); + + assertTrue(result.hasBinaryExpr()); + assertEquals("Plus", result.getBinaryExpr().getOp()); + } + + @Test + void testConvertMinus() { + RexNode minus = makeCall(intType(), SqlStdOperatorTable.MINUS, makeIntRef(0), makeIntRef(0)); + + PhysicalExprNode result = converter.convert(minus, context); + + assertTrue(result.hasBinaryExpr()); + assertEquals("Minus", result.getBinaryExpr().getOp()); + } + + @Test + void testConvertTimes() { + RexNode times = makeCall(intType(), SqlStdOperatorTable.MULTIPLY, makeIntRef(0), makeIntRef(0)); + + PhysicalExprNode result = converter.convert(times, context); + + assertTrue(result.hasBinaryExpr()); + assertEquals("Multiply", result.getBinaryExpr().getOp()); + } + + @Test + void testConvertDivide() { + RexNode divide = makeCall(intType(), SqlStdOperatorTable.DIVIDE, makeIntRef(0), makeIntRef(0)); + + PhysicalExprNode result = converter.convert(divide, context); + + assertTrue(result.hasBinaryExpr()); + assertEquals("Divide", result.getBinaryExpr().getOp()); + } + + @Test + void testConvertMod() { + RexNode mod = makeCall(intType(), SqlStdOperatorTable.MOD, makeIntRef(0), makeIntRef(0)); + + PhysicalExprNode result = converter.convert(mod, context); + + assertTrue(result.hasBinaryExpr()); + assertEquals("Modulo", result.getBinaryExpr().getOp()); + } + + @Test + void testConvertUnaryMinus() { + RexNode neg = REX_BUILDER.makeCall(SqlStdOperatorTable.UNARY_MINUS, makeIntRef(0)); + + PhysicalExprNode result = converter.convert(neg, context); + + assertTrue(result.hasNegative()); + assertTrue(result.getNegative().getExpr().hasColumn()); + } + + @Test + void testConvertUnaryPlus() { + RexNode pos = REX_BUILDER.makeCall(SqlStdOperatorTable.UNARY_PLUS, makeIntRef(0)); + + PhysicalExprNode result = converter.convert(pos, context); + + // Unary plus is identity — passthrough to operand + assertTrue(result.hasColumn()); + assertEquals("f0", result.getColumn().getName()); + } + + @Test + void testConvertCast() { + RexNode cast = makeCall(bigintType(), SqlStdOperatorTable.CAST, makeIntRef(0)); + + PhysicalExprNode result = converter.convert(cast, context); + + assertTrue(result.hasTryCast()); + assertTrue(result.getTryCast().getExpr().hasColumn()); + assertTrue(result.getTryCast().hasArrowType()); + } + + @Test + void testConvertMixedTypePromotion() { + // INT (f0) + BIGINT (f1) — left operand should be promoted + RexNode intRef = makeIntRef(0); + RexNode bigintRef = REX_BUILDER.makeInputRef(bigintType(), 1); + RexNode mixedPlus = makeCall(bigintType(), SqlStdOperatorTable.PLUS, intRef, bigintRef); + + PhysicalExprNode result = converter.convert(mixedPlus, context); + + assertTrue(result.hasBinaryExpr()); + assertEquals("Plus", result.getBinaryExpr().getOp()); + // Left operand (INT) should be wrapped in TryCast to BIGINT + PhysicalExprNode left = result.getBinaryExpr().getL(); + assertTrue(left.hasTryCast(), "Left operand should be cast from INT to BIGINT"); + // Right operand (BIGINT) should be plain column + PhysicalExprNode right = result.getBinaryExpr().getR(); + assertTrue(right.hasColumn(), "Right operand should be plain column (already BIGINT)"); + } + + @Test + void testConvertOutputTypeCast() { + // INT + INT where output type is BIGINT → result wrapped in TryCast + RexNode plus = makeCall(bigintType(), SqlStdOperatorTable.PLUS, makeIntRef(0), makeIntRef(0)); + + PhysicalExprNode result = converter.convert(plus, context); + + // Both operands are INT, compatible type is INT, + // but output type is BIGINT → outer TryCast + assertTrue(result.hasTryCast(), "Result should be wrapped in TryCast when output " + "!= compatible type"); + assertTrue(result.getTryCast().getExpr().hasBinaryExpr()); + } + + @Test + void testConvertNestedExpr() { + // (f0 + 1) * f0 + RexNode f0 = makeIntRef(0); + RexNode one = REX_BUILDER.makeExactLiteral(BigDecimal.ONE, intType()); + RexNode innerPlus = makeCall(intType(), SqlStdOperatorTable.PLUS, f0, one); + RexNode outer = makeCall(intType(), SqlStdOperatorTable.MULTIPLY, innerPlus, makeIntRef(0)); + + PhysicalExprNode result = converter.convert(outer, context); + + assertTrue(result.hasBinaryExpr()); + assertEquals("Multiply", result.getBinaryExpr().getOp()); + // Left child is the inner (f0 + 1) + PhysicalExprNode leftChild = result.getBinaryExpr().getL(); + assertTrue(leftChild.hasBinaryExpr()); + assertEquals("Plus", leftChild.getBinaryExpr().getOp()); + } + + @Test + void testIsSupportedNumericArithmetic() { + RexNode plus = makeCall(intType(), SqlStdOperatorTable.PLUS, makeIntRef(0), makeIntRef(0)); + + assertTrue(converter.isSupported(plus, context)); + } + + @Test + void testIsNotSupportedNonNumericKind() { + // EQUALS is not in the supported set + RexNode eq = REX_BUILDER.makeCall(SqlStdOperatorTable.EQUALS, makeIntRef(0), makeIntRef(0)); + + assertFalse(converter.isSupported(eq, context)); + } + + // ---- getCommonTypeForComparison direct tests ---- + + @Test + void testCommonTypeDecimalWins() { + RelDataType intType = TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER); + RelDataType decType = TYPE_FACTORY.createSqlType(SqlTypeName.DECIMAL, 10, 2); + RelDataType result = RexCallConverter.getCommonTypeForComparison(intType, decType, TYPE_FACTORY); + + assertEquals(SqlTypeName.DECIMAL, result.getSqlTypeName()); + } + + @Test + void testCommonTypeExactIntegerPromotesToBigint() { + // TINYINT + INTEGER → BIGINT (both exact, promoted to widest exact type) + RelDataType tinyintType = TYPE_FACTORY.createSqlType(SqlTypeName.TINYINT); + RelDataType integerType = TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER); + RelDataType result = RexCallConverter.getCommonTypeForComparison(tinyintType, integerType, TYPE_FACTORY); + + assertEquals(SqlTypeName.BIGINT, result.getSqlTypeName()); + } + + @Test + void testCommonTypeApproxFallbackToDouble() { + // INT + FLOAT → DOUBLE (FLOAT is approx, so exact integer rule skipped) + RelDataType intType = TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER); + RelDataType floatType = TYPE_FACTORY.createSqlType(SqlTypeName.FLOAT); + RelDataType result = RexCallConverter.getCommonTypeForComparison(intType, floatType, TYPE_FACTORY); + + assertEquals(SqlTypeName.DOUBLE, result.getSqlTypeName()); + } + + @Test + void testCommonTypeIncompatible() { + // BOOLEAN + INTEGER → null (incompatible) + RelDataType boolType = TYPE_FACTORY.createSqlType(SqlTypeName.BOOLEAN); + RelDataType intType = TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER); + RelDataType result = RexCallConverter.getCommonTypeForComparison(boolType, intType, TYPE_FACTORY); + + assertNull(result, "Incompatible types should return null"); + } + + // ---- Helpers ---- + + private static RexNode makeIntRef(int index) { + return REX_BUILDER.makeInputRef(intType(), index); + } + + private static RelDataType intType() { + return TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER); + } + + private static RelDataType bigintType() { + return TYPE_FACTORY.createSqlType(SqlTypeName.BIGINT); + } + + /** + * Creates a {@link org.apache.calcite.rex.RexCall} with an explicit + * return type using the List-based {@code makeCall} overload. + */ + private static RexNode makeCall( + RelDataType returnType, org.apache.calcite.sql.SqlOperator op, RexNode... operands) { + return REX_BUILDER.makeCall(returnType, op, Arrays.asList(operands)); + } +} diff --git a/auron-flink-extension/auron-flink-planner/src/test/java/org/apache/auron/flink/table/planner/converter/RexInputRefConverterTest.java b/auron-flink-extension/auron-flink-planner/src/test/java/org/apache/auron/flink/table/planner/converter/RexInputRefConverterTest.java new file mode 100644 index 000000000..db22cf5c4 --- /dev/null +++ b/auron-flink-extension/auron-flink-planner/src/test/java/org/apache/auron/flink/table/planner/converter/RexInputRefConverterTest.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.auron.flink.table.planner.converter; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import org.apache.auron.protobuf.PhysicalExprNode; +import org.apache.calcite.jdbc.JavaTypeFactoryImpl; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.flink.configuration.Configuration; +import org.apache.flink.table.types.logical.BigIntType; +import org.apache.flink.table.types.logical.IntType; +import org.apache.flink.table.types.logical.LogicalType; +import org.apache.flink.table.types.logical.RowType; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +/** Tests for {@link RexInputRefConverter}. */ +class RexInputRefConverterTest { + + private static final RelDataTypeFactory TYPE_FACTORY = new JavaTypeFactoryImpl(); + private static final RexBuilder REX_BUILDER = new RexBuilder(TYPE_FACTORY); + + private RexInputRefConverter converter; + private ConverterContext context; + + @BeforeEach + void setUp() { + converter = new RexInputRefConverter(); + RowType inputType = RowType.of(new LogicalType[] {new IntType(), new BigIntType()}, new String[] {"f0", "f1"}); + context = new ConverterContext(new Configuration(), null, getClass().getClassLoader(), inputType); + } + + @Test + void testGetNodeClass() { + assertEquals(RexInputRef.class, converter.getNodeClass()); + } + + @Test + void testIsSupportedValidIndex() { + RexNode inputRef = REX_BUILDER.makeInputRef(TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER), 0); + assertTrue(converter.isSupported(inputRef, context)); + } + + @Test + void testIsNotSupportedOutOfRangeIndex() { + // Schema has 2 fields (f0, f1) — index 5 is out of range + RexNode inputRef = REX_BUILDER.makeInputRef(TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER), 5); + assertFalse(converter.isSupported(inputRef, context)); + } + + @Test + void testConvertFirstColumn() { + RexNode inputRef = REX_BUILDER.makeInputRef(TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER), 0); + + PhysicalExprNode result = converter.convert(inputRef, context); + + assertTrue(result.hasColumn()); + assertEquals("f0", result.getColumn().getName()); + assertEquals(0, result.getColumn().getIndex()); + } + + @Test + void testConvertSecondColumn() { + RexNode inputRef = REX_BUILDER.makeInputRef(TYPE_FACTORY.createSqlType(SqlTypeName.BIGINT), 1); + + PhysicalExprNode result = converter.convert(inputRef, context); + + assertTrue(result.hasColumn()); + assertEquals("f1", result.getColumn().getName()); + assertEquals(1, result.getColumn().getIndex()); + } +} diff --git a/auron-flink-extension/auron-flink-planner/src/test/java/org/apache/auron/flink/table/planner/converter/RexLiteralConverterTest.java b/auron-flink-extension/auron-flink-planner/src/test/java/org/apache/auron/flink/table/planner/converter/RexLiteralConverterTest.java new file mode 100644 index 000000000..b408272ba --- /dev/null +++ b/auron-flink-extension/auron-flink-planner/src/test/java/org/apache/auron/flink/table/planner/converter/RexLiteralConverterTest.java @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.auron.flink.table.planner.converter; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.math.BigDecimal; +import org.apache.auron.protobuf.PhysicalExprNode; +import org.apache.calcite.jdbc.JavaTypeFactoryImpl; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.flink.configuration.Configuration; +import org.apache.flink.table.types.logical.IntType; +import org.apache.flink.table.types.logical.RowType; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +/** Tests for {@link RexLiteralConverter}. */ +class RexLiteralConverterTest { + + private static final RelDataTypeFactory TYPE_FACTORY = new JavaTypeFactoryImpl(); + private static final RexBuilder REX_BUILDER = new RexBuilder(TYPE_FACTORY); + + private RexLiteralConverter converter; + private ConverterContext context; + + @BeforeEach + void setUp() { + converter = new RexLiteralConverter(); + context = + new ConverterContext(new Configuration(), null, getClass().getClassLoader(), RowType.of(new IntType())); + } + + @Test + void testGetNodeClass() { + assertEquals(RexLiteral.class, converter.getNodeClass()); + } + + @Test + void testConvertTinyIntLiteral() { + RexLiteral lit = (RexLiteral) + REX_BUILDER.makeExactLiteral(BigDecimal.valueOf(7), TYPE_FACTORY.createSqlType(SqlTypeName.TINYINT)); + + assertTrue(converter.isSupported(lit, context)); + PhysicalExprNode result = converter.convert(lit, context); + assertTrue(result.hasLiteral()); + assertTrue(result.getLiteral().getIpcBytes().size() > 0); + } + + @Test + void testConvertSmallIntLiteral() { + RexLiteral lit = (RexLiteral) + REX_BUILDER.makeExactLiteral(BigDecimal.valueOf(256), TYPE_FACTORY.createSqlType(SqlTypeName.SMALLINT)); + + assertTrue(converter.isSupported(lit, context)); + PhysicalExprNode result = converter.convert(lit, context); + assertTrue(result.hasLiteral()); + assertTrue(result.getLiteral().getIpcBytes().size() > 0); + } + + @Test + void testConvertIntLiteral() { + RexLiteral intLit = (RexLiteral) + REX_BUILDER.makeExactLiteral(BigDecimal.valueOf(42), TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER)); + + assertTrue(converter.isSupported(intLit, context)); + PhysicalExprNode result = converter.convert(intLit, context); + assertTrue(result.hasLiteral()); + assertTrue(result.getLiteral().getIpcBytes().size() > 0); + } + + @Test + void testConvertLongLiteral() { + RexLiteral longLit = (RexLiteral) REX_BUILDER.makeExactLiteral( + BigDecimal.valueOf(123456789L), TYPE_FACTORY.createSqlType(SqlTypeName.BIGINT)); + + assertTrue(converter.isSupported(longLit, context)); + PhysicalExprNode result = converter.convert(longLit, context); + assertTrue(result.hasLiteral()); + assertTrue(result.getLiteral().getIpcBytes().size() > 0); + } + + @Test + void testConvertFloatLiteral() { + RexLiteral lit = (RexLiteral) + REX_BUILDER.makeApproxLiteral(BigDecimal.valueOf(2.5f), TYPE_FACTORY.createSqlType(SqlTypeName.FLOAT)); + + assertTrue(converter.isSupported(lit, context)); + PhysicalExprNode result = converter.convert(lit, context); + assertTrue(result.hasLiteral()); + assertTrue(result.getLiteral().getIpcBytes().size() > 0); + } + + @Test + void testConvertDoubleLiteral() { + RexLiteral doubleLit = (RexLiteral) + REX_BUILDER.makeApproxLiteral(BigDecimal.valueOf(3.14), TYPE_FACTORY.createSqlType(SqlTypeName.DOUBLE)); + + assertTrue(converter.isSupported(doubleLit, context)); + PhysicalExprNode result = converter.convert(doubleLit, context); + assertTrue(result.hasLiteral()); + assertTrue(result.getLiteral().getIpcBytes().size() > 0); + } + + @Test + void testConvertBooleanTrue() { + RexLiteral boolLit = (RexLiteral) REX_BUILDER.makeLiteral(true); + + assertTrue(converter.isSupported(boolLit, context)); + PhysicalExprNode result = converter.convert(boolLit, context); + assertTrue(result.hasLiteral()); + assertTrue(result.getLiteral().getIpcBytes().size() > 0); + } + + @Test + void testConvertStringLiteral() { + RexLiteral strLit = (RexLiteral) REX_BUILDER.makeLiteral("hello"); + + assertTrue(converter.isSupported(strLit, context)); + PhysicalExprNode result = converter.convert(strLit, context); + assertTrue(result.hasLiteral()); + assertTrue(result.getLiteral().getIpcBytes().size() > 0); + } + + @Test + void testConvertDecimalLiteral() { + RexLiteral decLit = (RexLiteral) REX_BUILDER.makeExactLiteral( + new BigDecimal("123.45"), TYPE_FACTORY.createSqlType(SqlTypeName.DECIMAL, 10, 2)); + + assertTrue(converter.isSupported(decLit, context)); + PhysicalExprNode result = converter.convert(decLit, context); + assertTrue(result.hasLiteral()); + assertTrue(result.getLiteral().getIpcBytes().size() > 0); + } + + @Test + void testConvertNullLiteral() { + RexLiteral nullLit = (RexLiteral) REX_BUILDER.makeNullLiteral(TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER)); + + assertTrue(converter.isSupported(nullLit, context)); + PhysicalExprNode result = converter.convert(nullLit, context); + assertTrue(result.hasLiteral()); + assertTrue(result.getLiteral().getIpcBytes().size() > 0); + } + + @Test + void testUnsupportedTypeNotSupported() { + RexNode tsLit = REX_BUILDER.makeNullLiteral(TYPE_FACTORY.createSqlType(SqlTypeName.TIMESTAMP)); + + assertFalse(converter.isSupported(tsLit, context)); + } +} diff --git a/docs/PR-AURON-1859/AURON-1859-DESIGN.md b/docs/PR-AURON-1859/AURON-1859-DESIGN.md new file mode 100644 index 000000000..d3c3a1278 --- /dev/null +++ b/docs/PR-AURON-1859/AURON-1859-DESIGN.md @@ -0,0 +1,503 @@ +# Design — AURON-1859: Convert Math Operators to Auron Native Operators + +**Rev 1** — 2026-04-01 +**Issue**: https://github.com/apache/auron/issues/1859 +**Prerequisite**: #1856 (Flink Node Converter Tools) — PR #2146 merged + +--- + +## 1. Problem Statement + +The Flink integration track needs concrete expression converters that translate Flink/Calcite `RexNode` expressions into Auron native `PhysicalExprNode` protobuf representations. Issue #1859 targets math operators (`+`, `-`, `*`, `/`, `%`) as the first converters, unlocking #1857 (FlinkAuronCalcOperator) and #1853 (StreamExecCalc rewrite). + +The converter framework from #1856 provides the dispatch infrastructure (`FlinkNodeConverterFactory`, `FlinkRexNodeConverter`, `ConverterContext`) but has zero concrete converter implementations. This PR delivers the first three. + +--- + +## 2. Approach Candidates + +### Candidate A: Single RexCallConverter with SqlKind Switch + +One `RexCallConverter` registered for `RexCall.class`, dispatching by `SqlKind` internally. + +**Pros**: Matches the factory's class-based dispatch design. Simple, one registration call. +**Cons**: Class will grow as more operators are added in future PRs (#1860, #1861, #1864). + +### Candidate B: Per-Operator Converter Classes (Gluten-style) + +Separate converter class for each operator (e.g., `PlusConverter`, `MinusConverter`). + +**Pros**: Small focused classes. +**Cons**: Cannot work with our factory — it keys by `RexNode` subclass (`RexCall.class`), so only ONE converter can be registered for `RexCall`. Would require redesigning the factory. + +**Evidence**: The factory throws `IllegalArgumentException` on duplicate registration: +```java +// FlinkNodeConverterFactory.java:77-79 +if (rexConverterMap.containsKey(nodeClass)) { + throw new IllegalArgumentException("Duplicate RexNode converter for " + nodeClass.getName()); +} +``` + +### Candidate C: Sub-dispatch Registry Inside RexCallConverter (Gluten hybrid) + +One `RexCallConverter` for the factory, but internally uses a `Map` to delegate to handler methods. Future PRs add entries to this map. + +**Pros**: Extensible without modifying the switch statement. Separates concerns. +**Cons**: More infrastructure than needed for 7 operators. Over-engineered for Phase 1. + +### Decision: **Candidate A** — Single RexCallConverter with SqlKind Switch + +**Rationale**: +- Matches Spark Auron's pattern (single `convertExprWithFallback()` with pattern match — `NativeConverters.scala:395-1183`) +- The factory's class-based dispatch forces one converter per RexNode subclass anyway +- Simple switch statement is readable and maintainable up to ~20 cases +- Future PRs (#1860, #1861, #1864) just add new `case` clauses — additive, no structural changes +- If it grows beyond ~30 cases, we can refactor to Candidate C later + +--- + +## 3. Detailed Design + +### 3.1 Converter Classes + +Three new Java classes in `org.apache.auron.flink.table.planner.converter`: + +#### 3.1.1 RexInputRefConverter + +Converts column references to native `PhysicalColumn` protobuf. + +**Prior art**: +- **Gluten-Flink**: `RexNodeConverter.toTypedExpr()` handles `RexInputRef` by resolving the column name from `context.getInputAttributeNames()` using the index, then creates `FieldAccessTypedExpr(type, name)` — Source: `gluten-flink/planner/src/main/java/org/apache/gluten/rexnode/RexNodeConverter.java` +- **Spark Auron**: Column references are converted elsewhere (in plan-level conversion, not expression-level) — Source: `NativeConverters.scala:1319-1325` + +**Design**: +```java +public class RexInputRefConverter implements FlinkRexNodeConverter { + + @Override + public Class getNodeClass() { + return RexInputRef.class; + } + + @Override + public boolean isSupported(RexNode node, ConverterContext context) { + RexInputRef ref = (RexInputRef) node; + return ref.getIndex() < context.getInputType().getFieldCount(); + } + + @Override + public PhysicalExprNode convert(RexNode node, ConverterContext context) { + RexInputRef ref = (RexInputRef) node; + int index = ref.getIndex(); + String name = context.getInputType().getFieldNames().get(index); + return PhysicalExprNode.newBuilder() + .setColumn(PhysicalColumn.newBuilder() + .setName(name) + .setIndex(index) + .build()) + .build(); + } +} +``` + +**Target protobuf**: `PhysicalColumn` (name + index) — Source: `auron.proto:512-515` + +#### 3.1.2 RexLiteralConverter + +Converts scalar literal values to native `ScalarValue` protobuf using Arrow IPC serialization. + +**Prior art**: +- **Spark Auron**: Serializes literals as Arrow IPC bytes via `ArrowStreamWriter` → `ScalarValue.ipc_bytes` — Source: `NativeConverters.scala:409-430` +- **Gluten-Flink**: Uses individually typed `Variant` values (`IntegerValue`, `BigIntValue`, etc.) via `toVariant()` — Source: `gluten-flink/planner/src/main/java/org/apache/gluten/rexnode/RexNodeConverter.java` +- **Native engine (Rust)**: Deserializes `ScalarValue.ipc_bytes` via Arrow IPC `StreamReader` → `ScalarValue::try_from_array` — Source: `auron-planner/src/lib.rs:446-455` + +**Decision: Arrow IPC bytes (Spark pattern)** + +Auron's `ScalarValue` protobuf has only `ipc_bytes` (no typed fields) — Source: `auron.proto:873-875`. We must use Arrow IPC serialization. Gluten's approach (typed Variant objects) targets Velox, not DataFusion. + +**Design**: +```java +public class RexLiteralConverter implements FlinkRexNodeConverter { + + @Override + public Class getNodeClass() { + return RexLiteral.class; + } + + @Override + public boolean isSupported(RexNode node, ConverterContext context) { + RexLiteral literal = (RexLiteral) node; + if (literal.isNull()) { + return true; // null literals always supported + } + switch (literal.getType().getSqlTypeName()) { + case TINYINT: case SMALLINT: case INTEGER: case BIGINT: + case FLOAT: case DOUBLE: case DECIMAL: + case BOOLEAN: case CHAR: case VARCHAR: + return true; + default: + return false; + } + } + + @Override + public PhysicalExprNode convert(RexNode node, ConverterContext context) { + RexLiteral literal = (RexLiteral) node; + byte[] ipcBytes = serializeToArrowIpc(literal); + return PhysicalExprNode.newBuilder() + .setLiteral(ScalarValue.newBuilder() + .setIpcBytes(ByteString.copyFrom(ipcBytes)) + .build()) + .build(); + } + + // Creates a single-row Arrow IPC stream from the literal value + private byte[] serializeToArrowIpc(RexLiteral literal) { + // 1. Map SqlTypeName to Arrow Field + // 2. Create VectorSchemaRoot with one column + // 3. Set value into the appropriate vector (or leave null) + // 4. Serialize via ArrowStreamWriter to ByteArrayOutputStream + // 5. Return bytes + } +} +``` + +**Type mapping for value extraction**: + +| SqlTypeName | Arrow Vector | RexLiteral extraction | Evidence | +|---|---|---|---| +| `TINYINT` | `TinyIntVector` | `getValueAs(Byte.class)` | Gluten: `Integer.valueOf(literal.getValue().toString())` | +| `SMALLINT` | `SmallIntVector` | `getValueAs(Short.class)` | Gluten: same pattern | +| `INTEGER` | `IntVector` | `getValueAs(Integer.class)` | Gluten: same pattern | +| `BIGINT` | `BigIntVector` | `getValueAs(Long.class)` | Gluten: `Long.valueOf(literal.getValue().toString())` | +| `FLOAT` | `Float4Vector` | `getValueAs(Float.class)` | (Gluten lacks FLOAT — gap in their impl) | +| `DOUBLE` | `Float8Vector` | `getValueAs(Double.class)` | Gluten: `Double.valueOf(literal.getValue().toString())` | +| `DECIMAL` | `DecimalVector` | `getValueAs(BigDecimal.class)` | Gluten: `literal.getValueAs(BigDecimal.class)` | +| `BOOLEAN` | `BitVector` | `getValueAs(Boolean.class)` | Gluten: `(boolean) literal.getValue()` | +| `CHAR`/`VARCHAR` | `VarCharVector` | `getValueAs(String.class)` | Gluten: `literal.getValueAs(String.class)` | +| `NULL` | (any nullable) | N/A — leave vector null | (Gluten lacks null handling — gap) | + +**Arrow type construction**: Reuse `SchemaConverters.convertToAuronArrowType()` for proto ArrowType, but for Arrow Java `Field`, use the standard `ArrowType` mapping (e.g., `new ArrowType.Int(32, true)` for INT32). A helper method `sqlTypeNameToArrowField()` will map `SqlTypeName` → Arrow Java `Field`. + +**Null handling**: For null literals, create the vector but don't call `setSafe()`. The validity buffer defaults to null. This is the same pattern as Spark — Source: `NativeConverters.scala:412` (`e.eval(null)` returns null for null literals). + +#### 3.1.3 RexCallConverter + +Converts function/operator calls to native expressions. Dispatches by `SqlKind`. + +**Prior art**: +- **Gluten-Flink**: `RexCallConverterFactory` dispatches by operator name string. `BasicArithmeticOperatorRexCallConverter` handles `+`, `-`, `*`. Each converter calls `getParams()` which recursively converts operands via `RexNodeConverter.toTypedExpr()` — Source: `gluten-flink/planner/src/main/java/org/apache/gluten/rexnode/functions/BaseRexCallConverters.java` +- **Spark Auron**: Pattern match per expression class, calls `buildBinaryExprNode(lhs, rhs, opString)` which recursively calls `convertExprWithFallback()` — Source: `NativeConverters.scala:576-774` + +**Design**: +```java +public class RexCallConverter implements FlinkRexNodeConverter { + + private static final Set SUPPORTED_KINDS = EnumSet.of( + SqlKind.PLUS, SqlKind.MINUS, SqlKind.TIMES, + SqlKind.DIVIDE, SqlKind.MOD, + SqlKind.MINUS_PREFIX, // unary minus + SqlKind.PLUS_PREFIX, // unary plus (identity) + SqlKind.CAST // basic numeric cast + ); + + @Override + public Class getNodeClass() { + return RexCall.class; + } + + @Override + public boolean isSupported(RexNode node, ConverterContext context) { + RexCall call = (RexCall) node; + if (!SUPPORTED_KINDS.contains(call.getKind())) { + return false; + } + // For arithmetic ops, verify the result type is numeric. + // This prevents accepting TIMESTAMP + INTERVAL (also SqlKind.PLUS) + // or other non-numeric uses of arithmetic SqlKinds. + switch (call.getKind()) { + case PLUS: case MINUS: case TIMES: case DIVIDE: case MOD: + case MINUS_PREFIX: + return isNumericType(call.getType()); + case CAST: + return isNumericType(call.getType()); + case PLUS_PREFIX: + return true; + default: + return false; + } + } + + private static boolean isNumericType(RelDataType type) { + switch (type.getSqlTypeName()) { + case TINYINT: case SMALLINT: case INTEGER: case BIGINT: + case FLOAT: case DOUBLE: case DECIMAL: + return true; + default: + return false; + } + } + + @Override + public PhysicalExprNode convert(RexNode node, ConverterContext context) { + RexCall call = (RexCall) node; + switch (call.getKind()) { + case PLUS: return buildBinaryExpr(call, "Plus", context); + case MINUS: return buildBinaryExpr(call, "Minus", context); + case TIMES: return buildBinaryExpr(call, "Multiply", context); + case DIVIDE: return buildDivide(call, context); + case MOD: return buildModulo(call, context); + case MINUS_PREFIX: return buildNegative(call, context); + case PLUS_PREFIX: return convertOperand(call.getOperands().get(0), context); + case CAST: return buildCast(call, context); + default: + throw new UnsupportedOperationException( + "Unsupported RexCall kind: " + call.getKind()); + } + } +} +``` + +**Recursive operand conversion**: + +Following Gluten-Flink's two-layer recursion pattern (converter calls back into factory for operands — Source: `RexNodeConverter.java` → `BaseRexCallConverters.getParams()` → `RexNodeConverter.toTypedExpr()`), the `RexCallConverter` converts operands via the factory. + +**Factory injection** (not singleton access): The `RexCallConverter` takes a `FlinkNodeConverterFactory` reference in its constructor rather than calling `getInstance()`. This aligns with #1856's testability design — the factory test (`FlinkNodeConverterFactoryTest.java:53`) uses a package-private constructor to create fresh instances for test isolation. Our tests do the same: create a fresh factory, register all 3 converters, test against that instance. + +```java +public class RexCallConverter implements FlinkRexNodeConverter { + private final FlinkNodeConverterFactory factory; + + public RexCallConverter(FlinkNodeConverterFactory factory) { + this.factory = Objects.requireNonNull(factory, "factory must not be null"); + } + + private PhysicalExprNode convertOperand(RexNode operand, ConverterContext context) { + return factory.convertRexNode(operand, context) + .orElseThrow(() -> new IllegalStateException( + "Failed to convert operand: " + operand)); + } +``` + +This handles nested expressions naturally: `(a + 1) * b` → the outer `TIMES` RexCall converts its left operand (a `PLUS` RexCall), which recursively converts `a` (RexInputRef) and `1` (RexLiteral). + +**Binary expression builder**: + +```java +private PhysicalExprNode buildBinaryExpr( + RexCall call, String op, ConverterContext context) { + PhysicalExprNode left = convertOperand(call.getOperands().get(0), context); + PhysicalExprNode right = convertOperand(call.getOperands().get(1), context); + return PhysicalExprNode.newBuilder() + .setBinaryExpr(PhysicalBinaryExprNode.newBuilder() + .setL(left) + .setR(right) + .setOp(op) + .build()) + .build(); +} +``` + +**Evidence**: Spark uses the same pattern — `buildBinaryExprNode(lhs, rhs, "Plus")` at `NativeConverters.scala:612`. + +### 3.2 Division by Zero Handling + +**Problem**: DataFusion errors on integer division by zero. Flink SQL expects NULL. + +**Prior art**: +- **Spark Auron**: Wraps the divisor in `Spark_NullIfZero` extended scalar function — Source: `NativeConverters.scala:735,748,771` +- **Gluten-Flink**: Does NOT add any div-by-zero protection. Relies on Velox returning NULL natively — Source: `gluten-flink/.../BaseRexCallConverters.java` +- **Native engine naming convention** (`datafusion-ext-functions/src/lib.rs:43-44`): + ```rust + // auron ext functions, if used for spark should be start with 'Spark_', + // if used for flink should be start with 'Flink_', + // same to other engines. + ``` + +**Critical finding**: The native engine's `create_auron_ext_function()` (`lib.rs:89`) has an exhaustive match that errors on unknown names: +```rust +_ => df_unimplemented_err!("spark ext function not implemented: {name}")?, +``` +Calling `Flink_NullIfZero` would hit this default and **error at runtime**. Using `Spark_NullIfZero` works but violates the project's naming convention. + +**Decision**: **Defer div-by-zero wrapping to #1857.** + +**Rationale**: +1. Adding `Flink_NullIfZero` requires a one-line Rust change (`lib.rs`), which expands this Java-only PR into a cross-language change +2. This PR is about the **converter framework** (RexNode → protobuf translation). Division-by-zero is a **runtime behavior** concern +3. The gap is documented and will be caught immediately in #1857 integration tests +4. #1857 (FlinkAuronCalcOperator) is the natural place to add `Flink_NullIfZero` since it owns the runtime pipeline and can include the Rust registration alongside the Java wrapping + +DIVIDE and MOD will be converted as straightforward `PhysicalBinaryExprNode` in this PR — same as PLUS/MINUS/TIMES. The `wrapNullIfZero` logic will be added in #1857. + +**Design for DIVIDE (this PR)**: +```java +// Simple conversion — div-by-zero handling deferred to #1857 +private PhysicalExprNode buildBinaryExpr(RexCall call, String op, ConverterContext context) { + PhysicalExprNode left = convertOperand(call.getOperands().get(0), context); + PhysicalExprNode right = convertOperand(call.getOperands().get(1), context); + return PhysicalExprNode.newBuilder() + .setBinaryExpr(PhysicalBinaryExprNode.newBuilder() + .setL(left).setR(right).setOp(op).build()) + .build(); +} +``` + +### 3.3 CAST Handling + +**Prior art**: +- **Spark Auron**: CAST is in the same `convertExprWithFallback()` as arithmetic. Uses `PhysicalTryCastNode` (not `PhysicalCastNode`) as the default — Source: `NativeConverters.scala:473-503` +- **Gluten-Flink**: CAST is registered in the same `RexCallConverterFactory` map as arithmetic via `Map.entry("CAST", Arrays.asList(() -> new DefaultRexCallConverter("cast")))` — Source: `RexCallConverterFactory.java` +- **Flink/Calcite**: Optimizer inserts explicit CAST RexCall nodes for type promotion in arithmetic (via `StandardConvertletTable.convertOperands()` → `RexBuilder.ensureType()` → `makeCast()`) — Source: `apache/calcite StandardConvertletTable.java` + +**Decision**: Include basic numeric CAST in this PR. Use `PhysicalTryCastNode` (not `PhysicalCastNode`). + +**Rev 2 update** (2026-04-04): Originally chose `PhysicalCastNode` for Flink's strict CAST semantics. Revised to `PhysicalTryCastNode` per reviewer @Tartarus0zm's PoC, which uses TryCast for both operand promotion casts and explicit CAST expressions. For numeric-to-numeric widening (INT→BIGINT), both behave identically. Aligning with the PoC keeps the codebase consistent with the reviewer's architectural direction. + +**Design**: +```java +private PhysicalExprNode buildTryCast(RexCall call, ConverterContext context) { + PhysicalExprNode operand = convertOperand(call.getOperands().get(0), context); + return wrapInTryCast(operand, call.getType()); +} +``` + +**Type conversion chain**: `RelDataType` (Calcite) → `LogicalType` (Flink) → `ArrowType` (Auron protobuf). + +The `RelDataType` → `LogicalType` conversion needs a helper. Flink provides `FlinkTypeFactory.toLogicalType(RelDataType)` but this requires a `FlinkTypeFactory` instance. Alternatively, we can map `SqlTypeName` directly to `LogicalType`: + +| SqlTypeName | Flink LogicalType | +|---|---| +| `TINYINT` | `new TinyIntType()` | +| `SMALLINT` | `new SmallIntType()` | +| `INTEGER` | `new IntType()` | +| `BIGINT` | `new BigIntType()` | +| `FLOAT` | `new FloatType()` | +| `DOUBLE` | `new DoubleType()` | +| `DECIMAL(p,s)` | `new DecimalType(p, s)` | +| `BOOLEAN` | `new BooleanType()` | +| `VARCHAR` | `new VarCharType()` | + +This mapping is straightforward and avoids a dependency on `FlinkTypeFactory`. A utility method `toLogicalType(RelDataType)` will be added to `RexCallConverter` (or a small `TypeConversionUtils` helper if shared). + +### 3.4 Type Coercion: Not Our Problem + +**Evidence that Flink inserts CASTs upstream**: + +Calcite's `StandardConvertletTable.convertOperands()` method calls `RexBuilder.ensureType()` which creates CAST RexCall nodes when the operand type doesn't match the validated return type. For `INT + BIGINT`, the validated return type is `BIGINT` (via `ReturnTypes.LEAST_RESTRICTIVE`), so the INT operand gets wrapped in `CAST(INT AS BIGINT)`. + +**Source**: `apache/calcite core/src/main/java/org/apache/calcite/sql2rel/StandardConvertletTable.java` — `convertOperands` method. + +**Result**: By the time our converter sees a `RexCall(PLUS, ...)`, operands already have matching types (or explicit CAST wrappers). Our arithmetic converter doesn't need type promotion logic. + +**Gluten-Flink confirmation**: Gluten adds its own `TypeUtils.promoteTypeForArithmeticExpressions()` as a safety net. Our design is simpler — we trust Calcite's upstream CASTs and handle them via the CAST case in our converter. If edge cases surface during integration testing (#1857), we can add a safety net then. + +--- + +## 4. Prior Art Comparison Table + +| Aspect | Spark Auron | Gluten-Flink | **Auron Flink (this PR)** | +|---|---|---|---| +| **Dispatch** | Pattern match in Scala | Factory keyed by operator name | Factory keyed by RexNode class + SqlKind switch | +| **Literal format** | Arrow IPC bytes | Typed Variant values (Velox) | **Arrow IPC bytes** (same proto) | +| **Column ref** | Plan-level (not expr) | Field name from context | **PhysicalColumn(name, index)** | +| **Recursion** | Direct recursive call | `getParams()` → `toTypedExpr()` | **`convertOperand()` → factory** | +| **Div-by-zero** | `Spark_NullIfZero` wrapper | None (Velox handles) | **Deferred to #1857** (needs `Flink_NullIfZero` Rust registration) | +| **CAST** | `PhysicalTryCastNode` | `DefaultRexCallConverter("cast")` | **`PhysicalCastNode`** | +| **Type promotion** | Manual in converter | `TypeUtils.promoteTypes()` | **Trust Calcite CASTs** | +| **Registration** | None (static pattern match) | Static `Map.ofEntries(...)` | **Dynamic `registerRexConverter()`** | + +--- + +## 5. Alignment with #1856 Framework + +This design was verified against the #1856 SPEC, PLAN, and research artifacts: + +| #1856 Contract | #1859 Compliance | Evidence | +|---|---|---| +| `FlinkRexNodeConverter` implements `FlinkNodeConverter` | All 3 converters implement `FlinkRexNodeConverter` | Design §3.1.1–3.1.3 | +| Factory dispatches by `node.getClass()` | 3 distinct classes: `RexInputRef`, `RexLiteral`, `RexCall` — no conflicts | `FlinkNodeConverterFactory.java:109` | +| `convert()` returns `PhysicalExprNode` | All converters return `PhysicalExprNode` | Per Rev 3 reviewer feedback (March 23) | +| `ConverterContext` carries `RowType inputType` | `RexInputRefConverter` uses `getInputType()` for column resolution | `ConverterContext.java:77` | +| Factory singleton with `registerRexConverter()` | Converters are standalone; registration deferred to #1857 | #1856 SPEC "Out of Scope" lists concrete converters | +| Package: `o.a.auron.flink.table.planner.converter` | All new files in same package | Consistent with existing 5 files | +| Java only | All new code is Java | #1856 research resolved Q4 | + +**Factory injection for testability**: #1856's `FlinkNodeConverterFactoryTest` uses a package-private `FlinkNodeConverterFactory()` constructor (line 53) for test isolation. `RexCallConverter` follows this pattern by accepting the factory as a constructor parameter rather than calling `getInstance()`, enabling tests with fresh factory instances. + +**Concrete converters as explicit #1856 follow-up**: The #1856 SPEC lists "Concrete converter implementations (RexLiteralConverter, RexInputRefConverter, RexCallConverter, etc.)" as out-of-scope, confirming #1859 is the intended follow-up. + +--- + +## 6. Dependencies (no new deps) + +### Existing (no new deps) + +| Artifact | Used for | Evidence | +|---|---|---| +| `arrow-vector` (in planner pom.xml) | `ArrowStreamWriter`, vectors | `pom.xml:290-293` | +| `arrow-c-data` (in planner pom.xml) | `CDataDictionaryProvider` | `pom.xml:276-278` | +| `arrow-memory-unsafe` (in planner pom.xml) | Arrow memory allocation | `pom.xml:285-288` | +| `auron-flink-runtime` (in planner pom.xml) | `SchemaConverters`, `FlinkArrowUtils` | `pom.xml:250-253` | + +### Native Engine (no changes) + +All operators and functions already registered: +- Binary ops: `from_proto_binary_op()` — Source: `auron-planner/src/lib.rs:70-101` +- `Spark_NullIfZero`: exists at `datafusion-ext-functions/src/lib.rs:49` — NOT used in this PR (deferred to #1857 as `Flink_NullIfZero`) +- Expression parsing: `try_parse_physical_expr()` — Source: `auron-planner/src/planner.rs:829-1038` + +--- + +## 7. Test Plan + +| Test Class | # Tests | What it validates | +|---|---|---| +| `RexInputRefConverterTest` | 4 | Column ref conversion, getNodeClass, isSupported | +| `RexLiteralConverterTest` | 9 | Int/Long/Double/Boolean/Null/String/Decimal literals, getNodeClass, unsupported type | +| `RexCallConverterTest` | 11 | All 5 arithmetic ops, unary minus/plus, CAST, nested expr, unsupported, getNodeClass | +| **Total** | **24** | | + +Tests will use `RexBuilder` (Calcite) to create RexNode instances and verify the output `PhysicalExprNode` protobuf structure. Following the pattern from `FlinkNodeConverterFactoryTest.java`. + +For `RexCallConverterTest`, all three converters must be registered in the factory before testing (since operand conversion is recursive through the factory). + +--- + +## 8. Alternatives Considered + +### 7.1 Skip CAST — Let #1864 Handle It + +**Rejected**. Mixed-type arithmetic won't work without CAST support. Flink's optimizer inserts CAST nodes upstream, so our converter must handle them. Including basic numeric CAST here (5 lines of code) is more pragmatic than shipping broken arithmetic. + +### 7.2 Use PhysicalTryCastNode Instead of PhysicalCastNode + +**Rejected for Flink**. Spark uses `TryCast` because Spark's CAST is lenient. Flink's CAST is strict by default — `PhysicalCastNode` is the correct match. `TRY_CAST` is a separate Flink function. + +### 7.3 Implement Type Promotion in Converter (Like Gluten) + +**Rejected**. Calcite already inserts CASTs. Adding our own promotion is redundant and risks conflicting with Calcite's decisions. If edge cases surface in integration testing (#1857), we can add a safety net then. + +### 7.4 Use Spark_NullIfZero from Flink Code + +**Rejected**. The native engine's naming convention (`lib.rs:43-44`) explicitly requires Flink functions to use `Flink_` prefix. Using `Spark_NullIfZero` would violate this convention. Registering `Flink_NullIfZero` requires a Rust change, expanding this Java-only PR's scope. Deferred to #1857 where the runtime pipeline is wired and the Rust+Java change belongs naturally. + +--- + +## 9. Scope Boundaries + +### In Scope +- `RexInputRefConverter`, `RexLiteralConverter`, `RexCallConverter` +- Arithmetic: `+`, `-`, `*`, `/`, `%` +- Unary: `-`, `+` (identity) +- Basic numeric CAST (INT, BIGINT, FLOAT, DOUBLE, DECIMAL) +- Unit tests (24 tests) + +### Out of Scope +- Logical operators (#1860) +- Comparison operators (#1861) +- Full CAST support — string, date, timestamp (#1864) +- Math functions — ABS, SQRT, etc. (new issue) +- Converter registration wiring (#1857) +- Decimal precision/scale promotion (follow-up if needed) +- Integration tests (require #1857 FlinkAuronCalcOperator first) diff --git a/docs/reviewhelper/AURON-1859/01-rex-input-ref-converter.md b/docs/reviewhelper/AURON-1859/01-rex-input-ref-converter.md new file mode 100644 index 000000000..d60c3b71e --- /dev/null +++ b/docs/reviewhelper/AURON-1859/01-rex-input-ref-converter.md @@ -0,0 +1,19 @@ +# Commit 1: RexInputRefConverter — column reference conversion + +## What it does + +Implements `RexInputRefConverter`, the first concrete `FlinkRexNodeConverter` that converts Calcite `RexInputRef` (column reference by index) into Auron's native `PhysicalExprNode` containing a `PhysicalColumn` with name and index. Also fixes the surefire configuration to discover `*Test.java` files alongside existing `*ITCase.java`. + +## Files to review + +| File | Key details | +|------|-------------| +| `RexInputRefConverter.java` | 3 methods: `getNodeClass()` → `RexInputRef.class`, `isSupported()` → always true, `convert()` → resolves name from `ConverterContext.getInputType()`, builds `PhysicalColumn{name, index}` | +| `RexInputRefConverterTest.java` | 4 tests: getNodeClass, isSupported, convert index 0, convert index 1. Uses real Calcite `RexBuilder` (not mocks). | +| `pom.xml` | +1 line: adds `**/*Test.java` to surefire includes | + +## What to look for + +1. The cast `(RexInputRef) node` in `convert()` relies on factory dispatch guarantee (factory only calls convert with matching type). No explicit `instanceof` guard — matches the framework pattern. +2. The `@throws IllegalArgumentException` Javadoc is inherited from the base `FlinkNodeConverter.convert()` interface — the actual runtime would be `ClassCastException` if bypassing the factory. Accepted as framework-level guarantee. +3. The surefire fix also enables `FlinkNodeConverterFactoryTest` from PR #2146 which was previously silently excluded from CI — verified it passes. \ No newline at end of file diff --git a/docs/reviewhelper/AURON-1859/02-rex-literal-converter.md b/docs/reviewhelper/AURON-1859/02-rex-literal-converter.md new file mode 100644 index 000000000..561f2fb4c --- /dev/null +++ b/docs/reviewhelper/AURON-1859/02-rex-literal-converter.md @@ -0,0 +1,19 @@ +# Commit 2: RexLiteralConverter — scalar literal conversion via Arrow IPC + +## What it does + +Implements `RexLiteralConverter`, converting Calcite `RexLiteral` values into Auron native `PhysicalExprNode` containing `ScalarValue` with Arrow IPC bytes. Supports numeric types (TINYINT through DOUBLE, DECIMAL), BOOLEAN, CHAR/VARCHAR, and typed NULL. Follows the Spark `NativeConverters.scala:409-430` pattern. + +## Files to review + +| File | Key details | +|------|-------------| +| `RexLiteralConverter.java` | `serializeToIpc()` creates single-element Arrow vector, serializes via `ArrowStreamWriter`. `isSupported()` checks type even for null literals (V1 bug fix). Per-call `RootAllocator` in try-with-resources. | +| `RexLiteralConverterTest.java` | 12 tests: getNodeClass + all 10 supported types (TINYINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DECIMAL, BOOLEAN, STRING, NULL) + unsupported type rejection. | + +## What to look for + +1. Arrow IPC serialization: `allocateNew()` → set value → `setValueCount(1)` → `setRowCount(1)` → write via `ArrowStreamWriter(start/writeBatch/end)`. Resource management via try-with-resources. +2. Null literal type safety: `isSupported()` checks `isSupportedType(typeName)` regardless of null status. A null TIMESTAMP returns `false`. +3. `DecimalVector` constructed with `ArrowType.Decimal(precision, scale, 128)` — 128-bit width is standard. +4. Per-call `RootAllocator` — accepted overhead, lifecycle management deferred to future PR. diff --git a/docs/reviewhelper/AURON-1859/03-rex-call-converter.md b/docs/reviewhelper/AURON-1859/03-rex-call-converter.md new file mode 100644 index 000000000..eba9753d7 --- /dev/null +++ b/docs/reviewhelper/AURON-1859/03-rex-call-converter.md @@ -0,0 +1,20 @@ +# Commit 3: RexCallConverter — arithmetic, unary minus, cast with type promotion + +## What it does + +Implements `RexCallConverter`, the most complex converter handling binary arithmetic (+,-,*,/,%), unary minus/plus, and CAST. Features explicit type promotion via `getCommonTypeForComparison()` + `castIfNecessary()` using `PhysicalTryCastNode`, following the reviewer's PoC pattern exactly. + +## Files to review + +| File | Key details | +|------|-------------| +| `RexCallConverter.java` | Constructor takes `FlinkNodeConverterFactory` for recursive operand conversion. `isSupported()` checks SqlKind + numeric guard. `buildBinaryExpr()` computes compatible type, casts operands, wraps result if output type differs. Uses `FlinkTypeFactory.toLogicalType()` + `SchemaConverters.convertToAuronArrowType()` for Arrow type mapping. | +| `RexCallConverterTest.java` | 17 tests: 5 binary ops, unary minus/plus, cast, mixed type promotion (INT+BIGINT), output type cast, nested expressions, isSupported guards, and 3 direct `getCommonTypeForComparison` tests (DECIMAL wins, DOUBLE fallback, incompatible returns null). | + +## What to look for + +1. **Type promotion logic** matches reviewer PoC blocks 1-3 exactly: `getCommonTypeForComparison()` (same-type shortcut, DECIMAL wins, BIGINT if both exact, else DOUBLE), `castIfNecessary()` (compare by SqlTypeName, wrap in TryCast), output type cast. +2. **V1 bug fix**: `getCommonTypeForComparison` returns `null` for incompatible types → `buildBinaryExpr` throws `IllegalStateException` (not silent fallback). +3. **`FlinkTypeFactory.toLogicalType()`** is Flink internal API (Scala class, static method callable from Java). Used per reviewer's PoC pattern. +4. **Static `TYPE_FACTORY`** — `SqlTypeFactoryImpl` is a static constant (V1 fix, not per-call). +5. **`isSupported()` numeric guard** — binary arithmetic kinds additionally check `SqlTypeUtil.isNumeric(call.getType())` to reject TIMESTAMP + INTERVAL.