Skip to content

Commit 72a7485

Browse files
committed
[FLINK-39233][table-runtime] Support cascaded delta join runtime
1 parent 45a04a7 commit 72a7485

File tree

11 files changed

+4075
-102
lines changed

11 files changed

+4075
-102
lines changed

flink-table/flink-table-planner/src/main/java/org/apache/flink/table/planner/plan/nodes/exec/stream/StreamExecDeltaJoin.java

Lines changed: 120 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
import org.apache.flink.table.planner.plan.nodes.exec.utils.ExecNodeUtil;
5050
import org.apache.flink.table.planner.plan.schema.TableSourceTable;
5151
import org.apache.flink.table.planner.plan.utils.DeltaJoinUtil;
52+
import org.apache.flink.table.planner.plan.utils.FunctionCallUtil;
5253
import org.apache.flink.table.planner.plan.utils.FunctionCallUtil.AsyncOptions;
5354
import org.apache.flink.table.planner.plan.utils.KeySelectorUtil;
5455
import org.apache.flink.table.planner.plan.utils.LookupJoinUtil;
@@ -60,9 +61,12 @@
6061
import org.apache.flink.table.runtime.operators.join.FlinkJoinType;
6162
import org.apache.flink.table.runtime.operators.join.deltajoin.AsyncDeltaJoinRunner;
6263
import org.apache.flink.table.runtime.operators.join.deltajoin.BinaryLookupHandler;
64+
import org.apache.flink.table.runtime.operators.join.deltajoin.CascadedLookupHandler;
65+
import org.apache.flink.table.runtime.operators.join.deltajoin.DeltaJoinHandlerBase;
6366
import org.apache.flink.table.runtime.operators.join.deltajoin.DeltaJoinHandlerChain;
6467
import org.apache.flink.table.runtime.operators.join.deltajoin.DeltaJoinRuntimeTree;
6568
import org.apache.flink.table.runtime.operators.join.deltajoin.LookupHandlerBase;
69+
import org.apache.flink.table.runtime.operators.join.deltajoin.TailOutputDataHandler;
6670
import org.apache.flink.table.runtime.typeutils.InternalSerializers;
6771
import org.apache.flink.table.runtime.typeutils.InternalTypeInfo;
6872
import org.apache.flink.table.types.logical.RowType;
@@ -499,6 +503,7 @@ protected Transformation<RowData> translateToPlanInternal(
499503

500504
private static LookupHandlerBase generateLookupHandler(
501505
boolean isBinaryLookup,
506+
@Nullable Integer id, // used for debug. `null` if it is a binary lookup
502507
DeltaJoinLookupChain.Node node,
503508
Map<Integer, GeneratedFunction<AsyncFunction<RowData, Object>>>
504509
generatedFetcherCollector,
@@ -507,6 +512,9 @@ private static LookupHandlerBase generateLookupHandler(
507512
FlinkTypeFactory typeFactory,
508513
ClassLoader classLoader,
509514
ExecNodeConfig config) {
515+
Preconditions.checkArgument(
516+
isBinaryLookup == (id == null), "Id should be null if it is binary lookup");
517+
510518
final int[] sourceInputOrdinals = node.inputTableBinaryInputOrdinals;
511519
final int lookupTableOrdinal = node.lookupTableBinaryInputOrdinal;
512520
final RowType sourceStreamType =
@@ -584,8 +592,61 @@ private static LookupHandlerBase generateLookupHandler(
584592
node.lookupTableBinaryInputOrdinal);
585593
}
586594

587-
// TODO FLINK-39233 Support cascaded delta join in runtime
588-
throw new IllegalStateException("Support later");
595+
final RowType lookupResultPassThroughCalcRowType;
596+
if (node.isLeftLookupRight()) {
597+
lookupResultPassThroughCalcRowType =
598+
combineOutputRowType(
599+
sourceStreamType,
600+
lookupSidePassThroughCalcRowType,
601+
node.joinType,
602+
typeFactory);
603+
} else {
604+
lookupResultPassThroughCalcRowType =
605+
combineOutputRowType(
606+
lookupSidePassThroughCalcRowType,
607+
sourceStreamType,
608+
swapJoinType(node.joinType),
609+
typeFactory);
610+
}
611+
612+
GeneratedFilterCondition generatedRemainingCondition =
613+
node.deltaJoinSpec
614+
.getRemainingCondition()
615+
.map(
616+
remainCond ->
617+
FilterCodeGenerator.generateFilterCondition(
618+
config,
619+
planner.getFlinkContext().getClassLoader(),
620+
remainCond,
621+
lookupResultPassThroughCalcRowType,
622+
GENERATED_JOIN_CONDITION_CLASS_NAME))
623+
.orElse(null);
624+
625+
final RowDataKeySelector streamSideLookupKeySelector =
626+
KeySelectorUtil.getRowDataSelector(
627+
classLoader,
628+
lookupKeysOnInputSide.stream()
629+
.mapToInt(
630+
key -> {
631+
Preconditions.checkState(
632+
key instanceof FunctionCallUtil.FieldRef);
633+
return ((FunctionCallUtil.FieldRef) key).index;
634+
})
635+
.toArray(),
636+
InternalTypeInfo.of(sourceStreamType));
637+
638+
return new CascadedLookupHandler(
639+
id,
640+
TypeConversions.fromLogicalToDataType(sourceStreamType),
641+
lookupSideGeneratedFetcherWithType.dataType(),
642+
TypeConversions.fromLogicalToDataType(lookupSidePassThroughCalcRowType),
643+
InternalSerializers.create(lookupSidePassThroughCalcRowType),
644+
lookupSideGeneratedCalc,
645+
generatedRemainingCondition,
646+
streamSideLookupKeySelector,
647+
node.inputTableBinaryInputOrdinals,
648+
node.lookupTableBinaryInputOrdinal,
649+
node.isLeftLookupRight());
589650
}
590651

591652
private static RowDataKeySelector getUpsertKeySelector(
@@ -600,23 +661,6 @@ private static RowDataKeySelector getUpsertKeySelector(
600661
classLoader, finalUpsertKeys, InternalTypeInfo.of(rowType));
601662
}
602663

603-
private boolean enableCache(ReadableConfig config) {
604-
return config.get(ExecutionConfigOptions.TABLE_EXEC_DELTA_JOIN_CACHE_ENABLED);
605-
}
606-
607-
/** Get the left cache size and right size. */
608-
private Tuple2<Long, Long> getCacheSize(ReadableConfig config) {
609-
long leftCacheSize =
610-
config.get(ExecutionConfigOptions.TABLE_EXEC_DELTA_JOIN_LEFT_CACHE_SIZE);
611-
long rightCacheSize =
612-
config.get(ExecutionConfigOptions.TABLE_EXEC_DELTA_JOIN_RIGHT_CACHE_SIZE);
613-
if ((leftCacheSize <= 0 || rightCacheSize <= 0) && enableCache(config)) {
614-
throw new IllegalArgumentException(
615-
"Cache size in delta join must be positive when enabling cache.");
616-
}
617-
return Tuple2.of(leftCacheSize, rightCacheSize);
618-
}
619-
620664
private abstract static class DeltaJoinOperatorFactoryBuilder {
621665
protected final PlannerBase planner;
622666
protected final ExecNodeConfig config;
@@ -651,6 +695,23 @@ public DeltaJoinOperatorFactoryBuilder(
651695
}
652696

653697
protected abstract StreamOperatorFactory<RowData> build();
698+
699+
/** Get the left cache size and right size. */
700+
protected Tuple2<Long, Long> getCacheSize(ReadableConfig config) {
701+
long leftCacheSize =
702+
config.get(ExecutionConfigOptions.TABLE_EXEC_DELTA_JOIN_LEFT_CACHE_SIZE);
703+
long rightCacheSize =
704+
config.get(ExecutionConfigOptions.TABLE_EXEC_DELTA_JOIN_RIGHT_CACHE_SIZE);
705+
if ((leftCacheSize <= 0 || rightCacheSize <= 0) && enableCache(config)) {
706+
throw new IllegalArgumentException(
707+
"Cache size in delta join must be positive when enabling cache.");
708+
}
709+
return Tuple2.of(leftCacheSize, rightCacheSize);
710+
}
711+
712+
protected boolean enableCache(ReadableConfig config) {
713+
return config.get(ExecutionConfigOptions.TABLE_EXEC_DELTA_JOIN_CACHE_ENABLED);
714+
}
654715
}
655716

656717
private class DeltaJoinOperatorFactoryBuilderV1 extends DeltaJoinOperatorFactoryBuilder {
@@ -798,6 +859,7 @@ private DeltaJoinHandlerChain buildBinaryLookupHandlerChain(
798859
Collections.singletonList(
799860
generateLookupHandler(
800861
true, // isBinaryLookup
862+
null,
801863
node,
802864
generatedFetcherCollector,
803865
deltaJoinTree,
@@ -926,9 +988,10 @@ public StreamOperatorFactory<RowData> build() {
926988
Map<Integer, GeneratedFunction<AsyncFunction<RowData, Object>>>
927989
generatedFetcherCollector = new HashMap<>();
928990
DeltaJoinHandlerChain left2RightHandlerChain =
929-
generateDeltaJoinHandlerChain(true, generatedFetcherCollector);
991+
generateDeltaJoinHandlerChain(true, leftStreamType, generatedFetcherCollector);
930992
DeltaJoinHandlerChain right2LeftHandlerChain =
931-
generateDeltaJoinHandlerChain(false, generatedFetcherCollector);
993+
generateDeltaJoinHandlerChain(
994+
false, rightStreamType, generatedFetcherCollector);
932995
Preconditions.checkState(
933996
generatedFetcherCollector.size()
934997
== leftAllBinaryInputOrdinals.size()
@@ -1008,6 +1071,7 @@ public StreamOperatorFactory<RowData> build() {
10081071

10091072
private DeltaJoinHandlerChain generateDeltaJoinHandlerChain(
10101073
boolean lookupRight,
1074+
RowType streamRowType,
10111075
Map<Integer, GeneratedFunction<AsyncFunction<RowData, Object>>>
10121076
generatedFetcherCollector) {
10131077
int[] streamOwnedSourceOrdinals =
@@ -1029,6 +1093,7 @@ private DeltaJoinHandlerChain generateDeltaJoinHandlerChain(
10291093
Collections.singletonList(
10301094
generateLookupHandler(
10311095
true, // isBinaryLookup
1096+
null, // debug id
10321097
nodes.get(0),
10331098
generatedFetcherCollector,
10341099
deltaJoinTree,
@@ -1039,7 +1104,40 @@ private DeltaJoinHandlerChain generateDeltaJoinHandlerChain(
10391104
streamOwnedSourceOrdinals);
10401105
}
10411106

1042-
throw new UnsupportedOperationException("Support cascaded delta join operator later");
1107+
final List<DeltaJoinHandlerBase> lookupJoinHandlers = new ArrayList<>();
1108+
1109+
// build delta join handler chain
1110+
for (int i = 0; i < nodes.size(); i++) {
1111+
DeltaJoinLookupChain.Node node = nodes.get(i);
1112+
LookupHandlerBase lookupHandler =
1113+
generateLookupHandler(
1114+
false, // isBinaryLookup
1115+
i + 1, // debug id
1116+
node,
1117+
generatedFetcherCollector,
1118+
deltaJoinTree,
1119+
planner,
1120+
typeFactory,
1121+
classLoader,
1122+
config);
1123+
lookupJoinHandlers.add(lookupHandler);
1124+
}
1125+
List<Integer> lookupSideAllBinaryInputOrdinals =
1126+
lookupRight ? rightAllBinaryInputOrdinals : leftAllBinaryInputOrdinals;
1127+
int lookupSideTableOffset = lookupRight ? leftAllBinaryInputOrdinals.size() : 0;
1128+
lookupJoinHandlers.add(
1129+
new TailOutputDataHandler(
1130+
lookupSideAllBinaryInputOrdinals.stream()
1131+
.mapToInt(i -> i + lookupSideTableOffset)
1132+
.toArray()));
1133+
1134+
Preconditions.checkArgument(
1135+
streamRowType.getFieldCount()
1136+
== deltaJoinTree
1137+
.getOutputRowTypeOnNode(streamOwnedSourceOrdinals, typeFactory)
1138+
.getFieldCount());
1139+
1140+
return DeltaJoinHandlerChain.build(lookupJoinHandlers, streamOwnedSourceOrdinals);
10431141
}
10441142

10451143
private Set<Set<Integer>> getAllDrivenInputsWhenLookup(boolean lookupRight) {

flink-table/flink-table-planner/src/test/scala/org/apache/flink/table/planner/runtime/stream/sql/DeltaJoinITCase.scala renamed to flink-table/flink-table-planner/src/test/scala/org/apache/flink/table/planner/runtime/stream/sql/BinaryDeltaJoinITCase.scala

Lines changed: 6 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -18,23 +18,16 @@
1818
package org.apache.flink.table.planner.runtime.stream.sql
1919

2020
import org.apache.flink.core.execution.CheckpointingMode
21-
import org.apache.flink.table.api.Schema
22-
import org.apache.flink.table.api.bridge.scala.internal.StreamTableEnvironmentImpl
23-
import org.apache.flink.table.api.config.{ExecutionConfigOptions, OptimizerConfigOptions}
24-
import org.apache.flink.table.api.config.OptimizerConfigOptions.DeltaJoinStrategy
25-
import org.apache.flink.table.catalog.{CatalogTable, ObjectPath, ResolvedCatalogTable}
2621
import org.apache.flink.table.planner.{JHashMap, JMap}
2722
import org.apache.flink.table.planner.factories.TestValuesRuntimeFunctions.AsyncTestValueLookupFunction
2823
import org.apache.flink.table.planner.factories.TestValuesTableFactory
2924
import org.apache.flink.table.planner.factories.TestValuesTableFactory.changelogRow
30-
import org.apache.flink.table.planner.runtime.utils.{FailingCollectionSource, StreamingTestBase}
31-
import org.apache.flink.testutils.junit.extensions.parameterized.{ParameterizedTestExtension, Parameters}
25+
import org.apache.flink.table.planner.runtime.utils.FailingCollectionSource
3226
import org.apache.flink.types.Row
3327

3428
import org.assertj.core.api.Assertions.assertThat
3529
import org.assertj.core.util.Maps
36-
import org.junit.jupiter.api.{BeforeEach, TestTemplate}
37-
import org.junit.jupiter.api.extension.ExtendWith
30+
import org.junit.jupiter.api.TestTemplate
3831

3932
import javax.annotation.Nullable
4033

@@ -46,23 +39,8 @@ import java.util.concurrent.TimeUnit
4639
import scala.collection.JavaConversions._
4740
import scala.collection.JavaConverters.mapAsScalaMapConverter
4841

49-
@ExtendWith(Array(classOf[ParameterizedTestExtension]))
50-
class DeltaJoinITCase(enableCache: Boolean) extends StreamingTestBase {
51-
52-
@BeforeEach
53-
override def before(): Unit = {
54-
super.before()
55-
56-
tEnv.getConfig.set(
57-
OptimizerConfigOptions.TABLE_OPTIMIZER_DELTA_JOIN_STRATEGY,
58-
DeltaJoinStrategy.FORCE)
59-
60-
tEnv.getConfig.set(
61-
ExecutionConfigOptions.TABLE_EXEC_DELTA_JOIN_CACHE_ENABLED,
62-
Boolean.box(enableCache))
63-
64-
AsyncTestValueLookupFunction.invokeCount.set(0)
65-
}
42+
/** Tests for binary delta join with two tables. */
43+
class BinaryDeltaJoinITCase(enableCache: Boolean) extends DeltaJoinITCaseBase(enableCache) {
6644

6745
@TestTemplate
6846
def testJoinKeyEqualsIndex(): Unit = {
@@ -886,38 +864,6 @@ class DeltaJoinITCase(enableCache: Boolean) extends StreamingTestBase {
886864
.build())
887865
}
888866

889-
/** TODO add index in DDL. */
890-
private def addIndex(tableName: String, indexColumns: List[String]): Unit = {
891-
if (indexColumns.isEmpty) {
892-
return
893-
}
894-
895-
val catalogName = tEnv.getCurrentCatalog
896-
val databaseName = tEnv.getCurrentDatabase
897-
val tablePath = new ObjectPath(databaseName, tableName)
898-
val catalog = tEnv.getCatalog(catalogName).get()
899-
val catalogManager = tEnv.asInstanceOf[StreamTableEnvironmentImpl].getCatalogManager
900-
val schemaResolver = catalogManager.getSchemaResolver
901-
902-
val resolvedTable = catalog.getTable(tablePath).asInstanceOf[ResolvedCatalogTable]
903-
val originTable = resolvedTable.getOrigin
904-
val originSchema = originTable.getUnresolvedSchema
905-
906-
val newSchema = Schema.newBuilder().fromSchema(originSchema).index(indexColumns).build()
907-
908-
val newTable = CatalogTable
909-
.newBuilder()
910-
.schema(newSchema)
911-
.comment(originTable.getComment)
912-
.partitionKeys(originTable.getPartitionKeys)
913-
.options(originTable.getOptions)
914-
.build()
915-
val newResolvedTable = new ResolvedCatalogTable(newTable, schemaResolver.resolve(newSchema))
916-
917-
catalog.dropTable(tablePath, false)
918-
catalog.createTable(tablePath, newResolvedTable, false)
919-
}
920-
921867
private def testUpsertResult(testSpec: TestSpec): Unit = {
922868
prepareTable(
923869
testSpec.leftIndex,
@@ -1046,7 +992,7 @@ class DeltaJoinITCase(enableCache: Boolean) extends StreamingTestBase {
1046992
| $leftExtraOptionsStr
1047993
|)
1048994
|""".stripMargin)
1049-
addIndex("testLeft", leftIndex)
995+
addIndexesAndImmutableCols("testLeft", List(leftIndex), List())
1050996

1051997
tEnv.executeSql("drop table if exists testRight")
1052998
val rightExtraOptionsStr =
@@ -1080,7 +1026,7 @@ class DeltaJoinITCase(enableCache: Boolean) extends StreamingTestBase {
10801026
| $rightExtraOptionsStr
10811027
|)
10821028
|""".stripMargin)
1083-
addIndex("testRight", rightIndex)
1029+
addIndexesAndImmutableCols("testRight", List(rightIndex), List())
10841030

10851031
tEnv.executeSql("drop table if exists testSnk")
10861032
tEnv.executeSql(s"""
@@ -1268,10 +1214,3 @@ class DeltaJoinITCase(enableCache: Boolean) extends StreamingTestBase {
12681214

12691215
}
12701216
}
1271-
1272-
object DeltaJoinITCase {
1273-
@Parameters(name = "EnableCache={0}")
1274-
def parameters(): java.util.Collection[Boolean] = {
1275-
Seq[Boolean](true, false)
1276-
}
1277-
}

0 commit comments

Comments
 (0)