Skip to content
  •  
  •  
  •  
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,8 @@ case class CometScanRule(session: SparkSession) extends Rule[SparkPlan] with Com

COMET_NATIVE_SCAN_IMPL.get() match {
case SCAN_AUTO =>
// TODO add support for native_datafusion in the future
nativeIcebergCompatScan(session, scanExec, r, hadoopConf)
nativeDataFusionScan(session, scanExec, r, hadoopConf)
.orElse(nativeIcebergCompatScan(session, scanExec, r, hadoopConf))
.orElse(nativeCometScan(session, scanExec, r, hadoopConf))
.getOrElse(scanExec)
case SCAN_NATIVE_DATAFUSION =>
Expand All @@ -186,6 +186,10 @@ case class CometScanRule(session: SparkSession) extends Rule[SparkPlan] with Com
scanExec: FileSourceScanExec,
r: HadoopFsRelation,
hadoopConf: Configuration): Option[SparkPlan] = {
if (scanExec.bucketedScan) {
// bucketed scans are not supported by native_datafusion
return None
}
if (!CometNativeScan.isSupported(scanExec)) {
return None
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
: : : +- CometBroadcastExchange (6)
: : : +- CometProject (5)
: : : +- CometFilter (4)
: : : +- CometScan [native_iceberg_compat] parquet spark_catalog.default.date_dim (3)
: : : +- CometNativeScan parquet spark_catalog.default.date_dim (3)
: : +- CometBroadcastExchange (25)
: : +- CometFilter (24)
: : +- CometHashAggregate (23)
Expand All @@ -35,11 +35,11 @@
: +- CometBroadcastExchange (31)
: +- CometProject (30)
: +- CometFilter (29)
: +- CometScan [native_iceberg_compat] parquet spark_catalog.default.store (28)
: +- CometNativeScan parquet spark_catalog.default.store (28)
+- CometBroadcastExchange (37)
+- CometProject (36)
+- CometFilter (35)
+- CometScan [native_iceberg_compat] parquet spark_catalog.default.customer (34)
+- CometNativeScan parquet spark_catalog.default.customer (34)


(1) CometScan [native_iceberg_compat] parquet spark_catalog.default.store_returns
Expand All @@ -54,7 +54,7 @@ ReadSchema: struct<sr_customer_sk:int,sr_store_sk:int,sr_return_amt:decimal(7,2)
Input [4]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3, sr_returned_date_sk#4]
Condition : (isnotnull(sr_store_sk#2) AND isnotnull(sr_customer_sk#1))

(3) CometScan [native_iceberg_compat] parquet spark_catalog.default.date_dim
(3) CometNativeScan parquet spark_catalog.default.date_dim
Output [2]: [d_date_sk#6, d_year#7]
Batched: true
Location [not included in comparison]/{warehouse_dir}/date_dim]
Expand Down Expand Up @@ -169,7 +169,7 @@ Arguments: [ctr_store_sk#10], [ctr_store_sk#19], Inner, (cast(ctr_total_return#1
Input [5]: [ctr_customer_sk#9, ctr_store_sk#10, ctr_total_return#11, (avg(ctr_total_return) * 1.2)#23, ctr_store_sk#19]
Arguments: [ctr_customer_sk#9, ctr_store_sk#10], [ctr_customer_sk#9, ctr_store_sk#10]

(28) CometScan [native_iceberg_compat] parquet spark_catalog.default.store
(28) CometNativeScan parquet spark_catalog.default.store
Output [2]: [s_store_sk#24, s_state#25]
Batched: true
Location [not included in comparison]/{warehouse_dir}/store]
Expand Down Expand Up @@ -197,7 +197,7 @@ Arguments: [ctr_store_sk#10], [s_store_sk#24], Inner, BuildRight
Input [3]: [ctr_customer_sk#9, ctr_store_sk#10, s_store_sk#24]
Arguments: [ctr_customer_sk#9], [ctr_customer_sk#9]

(34) CometScan [native_iceberg_compat] parquet spark_catalog.default.customer
(34) CometNativeScan parquet spark_catalog.default.customer
Output [2]: [c_customer_sk#26, c_customer_id#27]
Batched: true
Location [not included in comparison]/{warehouse_dir}/customer]
Expand Down Expand Up @@ -239,10 +239,10 @@ BroadcastExchange (46)
+- * CometColumnarToRow (45)
+- CometProject (44)
+- CometFilter (43)
+- CometScan [native_iceberg_compat] parquet spark_catalog.default.date_dim (42)
+- CometNativeScan parquet spark_catalog.default.date_dim (42)


(42) CometScan [native_iceberg_compat] parquet spark_catalog.default.date_dim
(42) CometNativeScan parquet spark_catalog.default.date_dim
Output [2]: [d_date_sk#6, d_year#7]
Batched: true
Location [not included in comparison]/{warehouse_dir}/date_dim]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@ CometColumnarToRow
: : : : +- CometColumnarToRow
: : : : +- CometProject
: : : : +- CometFilter
: : : : +- CometScan [native_iceberg_compat] parquet spark_catalog.default.date_dim
: : : : +- CometNativeScan parquet spark_catalog.default.date_dim
: : : +- CometBroadcastExchange
: : : +- CometProject
: : : +- CometFilter
: : : +- CometScan [native_iceberg_compat] parquet spark_catalog.default.date_dim
: : : +- CometNativeScan parquet spark_catalog.default.date_dim
: : +- CometBroadcastExchange
: : +- CometFilter
: : +- CometHashAggregate
Expand All @@ -40,14 +40,14 @@ CometColumnarToRow
: : +- CometBroadcastExchange
: : +- CometProject
: : +- CometFilter
: : +- CometScan [native_iceberg_compat] parquet spark_catalog.default.date_dim
: : +- CometNativeScan parquet spark_catalog.default.date_dim
: +- CometBroadcastExchange
: +- CometProject
: +- CometFilter
: +- CometScan [native_iceberg_compat] parquet spark_catalog.default.store
: +- CometNativeScan parquet spark_catalog.default.store
+- CometBroadcastExchange
+- CometProject
+- CometFilter
+- CometScan [native_iceberg_compat] parquet spark_catalog.default.customer
+- CometNativeScan parquet spark_catalog.default.customer

Comet accelerated 46 out of 49 eligible operators (93%). Final plan contains 2 transitions between Spark and Comet.
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@ WholeStageCodegen (1)
InputAdapter
CometProject [d_date_sk]
CometFilter [d_date_sk,d_year]
CometScan [native_iceberg_compat] parquet spark_catalog.default.date_dim [d_date_sk,d_year]
CometNativeScan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
CometBroadcastExchange [d_date_sk] #3
CometProject [d_date_sk]
CometFilter [d_date_sk,d_year]
CometScan [native_iceberg_compat] parquet spark_catalog.default.date_dim [d_date_sk,d_year]
CometNativeScan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
CometBroadcastExchange [(avg(ctr_total_return) * 1.2),ctr_store_sk] #4
CometFilter [(avg(ctr_total_return) * 1.2),ctr_store_sk]
CometHashAggregate [sum,count] [(avg(ctr_total_return) * 1.2),ctr_store_sk,avg(ctr_total_return)]
Expand All @@ -45,8 +45,8 @@ WholeStageCodegen (1)
CometBroadcastExchange [s_store_sk] #7
CometProject [s_store_sk]
CometFilter [s_store_sk,s_state]
CometScan [native_iceberg_compat] parquet spark_catalog.default.store [s_store_sk,s_state]
CometNativeScan parquet spark_catalog.default.store [s_store_sk,s_state]
CometBroadcastExchange [c_customer_sk,c_customer_id] #8
CometProject [c_customer_id] [c_customer_sk,c_customer_id]
CometFilter [c_customer_sk,c_customer_id]
CometScan [native_iceberg_compat] parquet spark_catalog.default.customer [c_customer_sk,c_customer_id]
CometNativeScan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id]
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,15 @@ TakeOrderedAndProject (47)
: : : :- * CometColumnarToRow (12)
: : : : +- CometBroadcastHashJoin (11)
: : : : :- CometFilter (2)
: : : : : +- CometScan [native_iceberg_compat] parquet spark_catalog.default.customer (1)
: : : : : +- CometNativeScan parquet spark_catalog.default.customer (1)
: : : : +- CometBroadcastExchange (10)
: : : : +- CometProject (9)
: : : : +- CometBroadcastHashJoin (8)
: : : : :- CometScan [native_iceberg_compat] parquet spark_catalog.default.store_sales (3)
: : : : +- CometBroadcastExchange (7)
: : : : +- CometProject (6)
: : : : +- CometFilter (5)
: : : : +- CometScan [native_iceberg_compat] parquet spark_catalog.default.date_dim (4)
: : : : +- CometNativeScan parquet spark_catalog.default.date_dim (4)
: : : +- BroadcastExchange (18)
: : : +- * CometColumnarToRow (17)
: : : +- CometProject (16)
Expand All @@ -40,15 +40,15 @@ TakeOrderedAndProject (47)
: +- * CometColumnarToRow (32)
: +- CometProject (31)
: +- CometFilter (30)
: +- CometScan [native_iceberg_compat] parquet spark_catalog.default.customer_address (29)
: +- CometNativeScan parquet spark_catalog.default.customer_address (29)
+- BroadcastExchange (40)
+- * CometColumnarToRow (39)
+- CometProject (38)
+- CometFilter (37)
+- CometScan [native_iceberg_compat] parquet spark_catalog.default.customer_demographics (36)
+- CometNativeScan parquet spark_catalog.default.customer_demographics (36)


(1) CometScan [native_iceberg_compat] parquet spark_catalog.default.customer
(1) CometNativeScan parquet spark_catalog.default.customer
Output [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5]
Batched: true
Location [not included in comparison]/{warehouse_dir}/customer]
Expand All @@ -66,7 +66,7 @@ Location: InMemoryFileIndex []
PartitionFilters: [isnotnull(ss_sold_date_sk#7), dynamicpruningexpression(ss_sold_date_sk#7 IN dynamicpruning#8)]
ReadSchema: struct<ss_customer_sk:int>

(4) CometScan [native_iceberg_compat] parquet spark_catalog.default.date_dim
(4) CometNativeScan parquet spark_catalog.default.date_dim
Output [3]: [d_date_sk#9, d_year#10, d_moy#11]
Batched: true
Location [not included in comparison]/{warehouse_dir}/date_dim]
Expand Down Expand Up @@ -178,7 +178,7 @@ Condition : (exists#2 OR exists#1)
Output [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5]
Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1]

(29) CometScan [native_iceberg_compat] parquet spark_catalog.default.customer_address
(29) CometNativeScan parquet spark_catalog.default.customer_address
Output [2]: [ca_address_sk#20, ca_county#21]
Batched: true
Location [not included in comparison]/{warehouse_dir}/customer_address]
Expand Down Expand Up @@ -210,7 +210,7 @@ Join condition: None
Output [1]: [c_current_cdemo_sk#4]
Input [3]: [c_current_cdemo_sk#4, c_current_addr_sk#5, ca_address_sk#20]

(36) CometScan [native_iceberg_compat] parquet spark_catalog.default.customer_demographics
(36) CometNativeScan parquet spark_catalog.default.customer_demographics
Output [9]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30]
Batched: true
Location [not included in comparison]/{warehouse_dir}/customer_demographics]
Expand Down Expand Up @@ -274,10 +274,10 @@ BroadcastExchange (52)
+- * CometColumnarToRow (51)
+- CometProject (50)
+- CometFilter (49)
+- CometScan [native_iceberg_compat] parquet spark_catalog.default.date_dim (48)
+- CometNativeScan parquet spark_catalog.default.date_dim (48)


(48) CometScan [native_iceberg_compat] parquet spark_catalog.default.date_dim
(48) CometNativeScan parquet spark_catalog.default.date_dim
Output [3]: [d_date_sk#9, d_year#10, d_moy#11]
Batched: true
Location [not included in comparison]/{warehouse_dir}/date_dim]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ TakeOrderedAndProject
: : : :- CometColumnarToRow
: : : : +- CometBroadcastHashJoin
: : : : :- CometFilter
: : : : : +- CometScan [native_iceberg_compat] parquet spark_catalog.default.customer
: : : : : +- CometNativeScan parquet spark_catalog.default.customer
: : : : +- CometBroadcastExchange
: : : : +- CometProject
: : : : +- CometBroadcastHashJoin
Expand All @@ -24,11 +24,11 @@ TakeOrderedAndProject
: : : : : +- CometColumnarToRow
: : : : : +- CometProject
: : : : : +- CometFilter
: : : : : +- CometScan [native_iceberg_compat] parquet spark_catalog.default.date_dim
: : : : : +- CometNativeScan parquet spark_catalog.default.date_dim
: : : : +- CometBroadcastExchange
: : : : +- CometProject
: : : : +- CometFilter
: : : : +- CometScan [native_iceberg_compat] parquet spark_catalog.default.date_dim
: : : : +- CometNativeScan parquet spark_catalog.default.date_dim
: : : +- BroadcastExchange
: : : +- CometColumnarToRow
: : : +- CometProject
Expand All @@ -38,7 +38,7 @@ TakeOrderedAndProject
: : : +- CometBroadcastExchange
: : : +- CometProject
: : : +- CometFilter
: : : +- CometScan [native_iceberg_compat] parquet spark_catalog.default.date_dim
: : : +- CometNativeScan parquet spark_catalog.default.date_dim
: : +- BroadcastExchange
: : +- CometColumnarToRow
: : +- CometProject
Expand All @@ -48,16 +48,16 @@ TakeOrderedAndProject
: : +- CometBroadcastExchange
: : +- CometProject
: : +- CometFilter
: : +- CometScan [native_iceberg_compat] parquet spark_catalog.default.date_dim
: : +- CometNativeScan parquet spark_catalog.default.date_dim
: +- BroadcastExchange
: +- CometColumnarToRow
: +- CometProject
: +- CometFilter
: +- CometScan [native_iceberg_compat] parquet spark_catalog.default.customer_address
: +- CometNativeScan parquet spark_catalog.default.customer_address
+- BroadcastExchange
+- CometColumnarToRow
+- CometProject
+- CometFilter
+- CometScan [native_iceberg_compat] parquet spark_catalog.default.customer_demographics
+- CometNativeScan parquet spark_catalog.default.customer_demographics

Comet accelerated 35 out of 54 eligible operators (64%). Final plan contains 7 transitions between Spark and Comet.
Loading
Loading