Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,13 @@ object HeuristicTransform {
node =>
validator.validate(node) match {
case Validator.Passed =>
rule.offload(node)
val offloaded = rule.offload(node)
if (offloaded ne node) {
node.getTagValue(SparkPlan.LOGICAL_PLAN_TAG).foreach {
lp => offloaded.setTagValue(SparkPlan.LOGICAL_PLAN_TAG, lp)
}
}
offloaded
case Validator.Failed(reason) =>
logDebug(s"Validation failed by reason: $reason on query plan: ${node.nodeName}")
if (FallbackTags.maybeOffloadable(node)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,22 @@ import org.apache.spark.sql.execution.SparkPlan
class LegacyOffload(rules: Seq[OffloadSingleNode]) extends Rule[SparkPlan] with LogLevelUtil {
def apply(plan: SparkPlan): SparkPlan = {
val out =
rules.foldLeft(plan)((p, rule) => p.transformUp { case p => rule.offload(p) })
rules.foldLeft(plan) {
(p, rule) =>
p.transformUp {
case node =>
val offloaded = rule.offload(node)
if (offloaded ne node) {
// Propagate logical plan tag from original to offloaded node (non-recursive).
// Using setTagValue directly to avoid setLogicalLink's recursive propagation
// to children, which would incorrectly tag Exchange nodes.
node.getTagValue(SparkPlan.LOGICAL_PLAN_TAG).foreach {
lp => offloaded.setTagValue(SparkPlan.LOGICAL_PLAN_TAG, lp)
}
}
offloaded
}
}
out
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ object PushDownFilterToScan extends Rule[SparkPlan] with PredicateHelper {
scan) && scan.supportPushDownFilters =>
val newScan = scan.withNewPushdownFilters(splitConjunctivePredicates(filter.cond))
if (newScan.doValidate().ok()) {
newScan.copyTagsFrom(scan)
filter.withNewChildren(Seq(newScan))
} else {
filter
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -222,27 +222,35 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenCodeGeneratorWithInterpretedFallbackSuite]
enableSuite[GlutenCollationExpressionSuite]
enableSuite[GlutenCollationRegexpExpressionsSuite]
// TODO: 4.x enableSuite[GlutenCsvExpressionsSuite] // failures with GlutenPlugin
enableSuite[GlutenCsvExpressionsSuite]
.exclude("unsupported mode")
enableSuite[GlutenDynamicPruningSubquerySuite]
enableSuite[GlutenExprIdSuite]
// TODO: 4.x enableSuite[GlutenExpressionEvalHelperSuite] // 2 failures
enableSuite[GlutenExpressionEvalHelperSuite]
.exclude("SPARK-16489: checkEvaluation should fail if expression reuses variable names")
.exclude("SPARK-25388: checkEvaluation should fail if nullable in DataType is incorrect")
enableSuite[GlutenExpressionImplUtilsSuite]
enableSuite[GlutenExpressionSQLBuilderSuite]
enableSuite[GlutenExpressionSetSuite]
enableSuite[GlutenExtractPredicatesWithinOutputSetSuite]
enableSuite[GlutenHexSuite]
enableSuite[GlutenMutableProjectionSuite]
enableSuite[GlutenNamedExpressionSuite]
// TODO: 4.x enableSuite[GlutenObjectExpressionsSuite] // 7 failures
enableSuite[GlutenObjectExpressionsSuite]
.excludeByPrefix("SPARK-2359")
.excludeByPrefix("SPARK-2358")
.exclude("LambdaVariable should support interpreted execution")
enableSuite[GlutenOrderingSuite]
// TODO: 4.x enableSuite[GlutenScalaUDFSuite] // 1 failure
enableSuite[GlutenScalaUDFSuite]
.exclude("variant basic output variant")
enableSuite[GlutenSchemaPruningSuite]
enableSuite[GlutenSelectedFieldSuite]
// GlutenSubExprEvaluationRuntimeSuite is removed because SubExprEvaluationRuntimeSuite
// is in test-jar without shaded Guava, while SubExprEvaluationRuntime is shaded.
enableSuite[GlutenSubexpressionEliminationSuite]
enableSuite[GlutenTimeWindowSuite]
// TODO: 4.x enableSuite[GlutenToPrettyStringSuite] // 1 failure
enableSuite[GlutenToPrettyStringSuite]
.exclude("Timestamp as pretty strings")
enableSuite[GlutenUnsafeRowConverterSuite]
enableSuite[GlutenUnwrapUDTExpressionSuite]
enableSuite[GlutenV2ExpressionUtilsSuite]
Expand All @@ -253,7 +261,8 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenDataSourceV2MetricsSuite]
enableSuite[GlutenDataSourceV2OptionSuite]
enableSuite[GlutenDataSourceV2UtilsSuite]
// TODO: 4.x enableSuite[GlutenGroupBasedUpdateTableSuite] // 1 failure
enableSuite[GlutenGroupBasedUpdateTableSuite]
.exclude("update with NOT NULL checks")
enableSuite[GlutenMergeIntoDataFrameSuite]
enableSuite[GlutenProcedureSuite]
enableSuite[GlutenPushablePredicateSuite]
Expand Down Expand Up @@ -392,8 +401,10 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("parquet widening conversion ShortType -> DoubleType")
enableSuite[GlutenParquetVariantShreddingSuite]
// Generated suites for org.apache.spark.sql.execution.datasources.text
// TODO: 4.x enableSuite[GlutenWholeTextFileV1Suite] // 1 failure
// TODO: 4.x enableSuite[GlutenWholeTextFileV2Suite] // 1 failure
enableSuite[GlutenWholeTextFileV1Suite]
.exclude("reading text file with option wholetext=true")
enableSuite[GlutenWholeTextFileV2Suite]
.exclude("reading text file with option wholetext=true")
// Generated suites for org.apache.spark.sql.execution.datasources.v2
enableSuite[GlutenFileWriterFactorySuite]
enableSuite[GlutenV2SessionCatalogNamespaceSuite]
Expand Down Expand Up @@ -696,39 +707,85 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenAggregatingAccumulatorSuite]
enableSuite[GlutenCoGroupedIteratorSuite]
enableSuite[GlutenColumnarRulesSuite]
// TODO: 4.x enableSuite[GlutenDataSourceScanExecRedactionSuite] // 2 failures
// TODO: 4.x enableSuite[GlutenDataSourceV2ScanExecRedactionSuite] // 2 failures
enableSuite[GlutenDataSourceScanExecRedactionSuite]
.exclude("explain is redacted using SQLConf")
.exclude("SPARK-31793: FileSourceScanExec metadata should contain limited file paths")
enableSuite[GlutenDataSourceV2ScanExecRedactionSuite]
.exclude("explain is redacted using SQLConf")
.exclude("FileScan description")
enableSuite[GlutenExecuteImmediateEndToEndSuite]
// TODO: 4.x enableSuite[GlutenExternalAppendOnlyUnsafeRowArraySuite] // 14 failures
enableSuite[GlutenExternalAppendOnlyUnsafeRowArraySuite]
enableSuite[GlutenGlobalTempViewSuite]
enableSuite[GlutenGlobalTempViewTestSuite]
enableSuite[GlutenGroupedIteratorSuite]
enableSuite[GlutenHiveResultSuite]
// TODO: 4.x enableSuite[GlutenInsertSortForLimitAndOffsetSuite] // 6 failures
enableSuite[GlutenInsertSortForLimitAndOffsetSuite]
.exclude("root LIMIT preserves data ordering with top-K sort")
.exclude("middle LIMIT preserves data ordering with top-K sort")
.exclude("root LIMIT preserves data ordering with CollectLimitExec")
.exclude("middle LIMIT preserves data ordering with the extra sort")
.exclude("root OFFSET preserves data ordering with CollectLimitExec")
.exclude("middle OFFSET preserves data ordering with the extra sort")
enableSuite[GlutenLocalTempViewTestSuite]
// TODO: 4.x enableSuite[GlutenLogicalPlanTagInSparkPlanSuite] // RUN ABORTED
enableSuite[GlutenLogicalPlanTagInSparkPlanSuite]
enableSuite[GlutenOptimizeMetadataOnlyQuerySuite]
enableSuite[GlutenPersistedViewTestSuite]
// TODO: 4.x enableSuite[GlutenPlannerSuite] // 1 failure
// TODO: 4.x enableSuite[GlutenProjectedOrderingAndPartitioningSuite] // 6 failures
enableSuite[GlutenPlannerSuite]
.excludeByPrefix("efficient terminal limit")
.excludeByPrefix("terminal limit -> project -> sort")
.excludeByPrefix("TakeOrderedAndProject can appear")
.excludeByPrefix("TakeOrderedAndProjectExec appears only")
.excludeByPrefix("SPARK-24242")
.excludeByPrefix("SPARK-24556")
.excludeByPrefix("SPARK-33399")
.excludeByPrefix("SPARK-33400")
.excludeByPrefix("sort order doesn't have repeated")
.excludeByPrefix("aliases to expressions should not be replaced")
.excludeByPrefix("aliases in the object hash")
.excludeByPrefix("SPARK-33758")
.excludeByPrefix("SPARK-40086")
enableSuite[GlutenProjectedOrderingAndPartitioningSuite]
.excludeByPrefix("SPARK-42049")
enableSuite[GlutenQueryPlanningTrackerEndToEndSuite]
// TODO: 4.x enableSuite[GlutenRemoveRedundantProjectsSuite] // 14 failures
// TODO: 4.x enableSuite[GlutenRemoveRedundantSortsSuite] // 1 failure
enableSuite[GlutenRemoveRedundantProjectsSuite]
.exclude("project with filter")
.exclude("project with specific column ordering")
.exclude("project with extra columns")
.exclude("project with fewer columns")
.exclude("aggregate without ordering requirement")
.exclude("aggregate with ordering requirement")
.exclude("join without ordering requirement")
.exclude("join with ordering requirement")
.exclude("window function")
.exclude("generate should require column ordering")
.exclude("subquery")
.exclude("SPARK-33697: UnionExec should require column ordering")
.exclude("SPARK-33697: remove redundant projects under expand")
.exclude("SPARK-36020: Project should not be removed when child's logical link is different")
enableSuite[GlutenRemoveRedundantSortsSuite]
.exclude("cached sorted data doesn't need to be re-sorted")
.exclude("SPARK-33472: shuffled join with different left and right side partition numbers")
.exclude("remove redundant sorts with limit")
.exclude("remove redundant sorts with broadcast hash join")
.exclude("remove redundant sorts with sort merge join")
enableSuite[GlutenRowToColumnConverterSuite]
// TODO: 4.x enableSuite[GlutenSQLExecutionSuite] // 1 failure
enableSuite[GlutenSQLExecutionSuite]
enableSuite[GlutenSQLFunctionSuite]
// TODO: 4.x enableSuite[GlutenSQLJsonProtocolSuite] // 1 failure
// TODO: 4.x enableSuite[GlutenShufflePartitionsUtilSuite] // 1 failure
// TODO: 4.x enableSuite[GlutenSimpleSQLViewSuite] // 1 failure
// TODO: 4.x enableSuite[GlutenSparkPlanSuite] // 1 failure
enableSuite[GlutenSQLJsonProtocolSuite]
enableSuite[GlutenShufflePartitionsUtilSuite]
enableSuite[GlutenSimpleSQLViewSuite]
.exclude("alter temporary view should follow current storeAnalyzedPlanForView config")
.exclude("SPARK-53968 reading the view after allowPrecisionLoss is changed")
enableSuite[GlutenSparkPlanSuite]
.exclude("SPARK-37779: ColumnarToRowExec should be canonicalizable after being (de)serialized")
enableSuite[GlutenSparkPlannerSuite]
enableSuite[GlutenSparkScriptTransformationSuite]
enableSuite[GlutenSparkSqlParserSuite]
enableSuite[GlutenUnsafeFixedWidthAggregationMapSuite]
enableSuite[GlutenUnsafeKVExternalSorterSuite]
// TODO: 4.x enableSuite[GlutenUnsafeRowSerializerSuite] // 1 failure
// TODO: 4.x enableSuite[GlutenWholeStageCodegenSparkSubmitSuite] // 1 failure
// TODO: 4.x enableSuite[GlutenWholeStageCodegenSuite] // 24 failures
enableSuite[GlutenUnsafeRowSerializerSuite]
// TODO: 4.x enableSuite[GlutenWholeStageCodegenSparkSubmitSuite] // depends on codegen path
// TODO: 4.x enableSuite[GlutenWholeStageCodegenSuite] // 24 failures: all test WholeStageCodegen which Gluten bypasses
enableSuite[GlutenBroadcastExchangeSuite]
enableSuite[GlutenLocalBroadcastExchangeSuite]
enableSuite[GlutenCoalesceShufflePartitionsSuite]
Expand Down Expand Up @@ -825,10 +882,15 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenDataFrameTransposeSuite]
enableSuite[GlutenDefaultANSIValueSuite]
enableSuite[GlutenDeprecatedDatasetAggregatorSuite]
// TODO: 4.x enableSuite[GlutenExplainSuite] // 1 failure
enableSuite[GlutenExplainSuite]
.exclude("SPARK-33853: explain codegen - check presence of subquery")
.exclude("explain formatted - check presence of subquery in case of DPP")
.exclude("Support ExplainMode in Dataset.explain")
.exclude("Explain formatted output for scan operator for datasource V2")
enableSuite[GlutenICUCollationsMapSuite]
enableSuite[GlutenInlineTableParsingImprovementsSuite]
// TODO: 4.x enableSuite[GlutenJoinHintSuite] // 1 failure
enableSuite[GlutenJoinHintSuite]
.exclude("join strategy hint - shuffle-replicate-nl")
enableSuite[GlutenLogQuerySuite]
// Overridden
.exclude("Query Spark logs with exception using SQL")
Expand All @@ -839,7 +901,7 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenRuntimeConfigSuite]
enableSuite[GlutenSSBQuerySuite]
enableSuite[GlutenSessionStateSuite]
// TODO: 4.x enableSuite[GlutenSetCommandSuite] // 1 failure
// TODO: 4.x enableSuite[GlutenSetCommandSuite] // hive-site.xml hadoop conf not loaded
enableSuite[GlutenSparkSessionBuilderSuite]
enableSuite[GlutenSparkSessionJobTaggingAndCancellationSuite]
enableSuite[GlutenTPCDSCollationQueryTestSuite]
Expand Down Expand Up @@ -1093,7 +1155,7 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenFallbackSuite]
enableSuite[GlutenHiveSQLQuerySuite]
enableSuite[GlutenCollapseProjectExecTransformerSuite]
// TODO: 4.x enableSuite[GlutenSparkSessionExtensionSuite] // 1 failure
// TODO: 4.x enableSuite[GlutenSparkSessionExtensionSuite] // GlutenPlugin interferes with custom session extensions
enableSuite[GlutenGroupBasedDeleteFromTableSuite]
enableSuite[GlutenDeltaBasedDeleteFromTableSuite]
enableSuite[GlutenDataFrameToSchemaSuite]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,23 @@ package org.apache.spark.sql

class GlutenSparkSessionJobTaggingAndCancellationSuite
extends SparkSessionJobTaggingAndCancellationSuite
with GlutenTestsTrait {}
with GlutenTestsCommonTrait {

override def beforeAll(): Unit = {
System.setProperty("spark.plugins", "org.apache.gluten.GlutenPlugin")
System.setProperty("spark.memory.offHeap.enabled", "true")
System.setProperty("spark.memory.offHeap.size", "1024MB")
System.setProperty(
"spark.shuffle.manager",
"org.apache.spark.shuffle.sort.ColumnarShuffleManager")
super.beforeAll()
}

override def afterAll(): Unit = {
super.afterAll()
System.clearProperty("spark.plugins")
System.clearProperty("spark.memory.offHeap.enabled")
System.clearProperty("spark.memory.offHeap.size")
System.clearProperty("spark.shuffle.manager")
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,27 @@
*/
package org.apache.spark.sql.catalyst.expressions

import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.GlutenTestsTrait
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.UTC_OPT
import org.apache.spark.sql.types.{DoubleType, StructField, StructType}

class GlutenCsvExpressionsSuite extends CsvExpressionsSuite with GlutenTestsTrait {}
class GlutenCsvExpressionsSuite extends CsvExpressionsSuite with GlutenTestsTrait {

// Gluten's checkEvaluation (DataFrame-based) throws AnalysisException directly,
// not wrapped in TestFailedException as the codegen/interpreted path does.
testGluten("unsupported mode - gluten") {
val csvData = "---"
val schema = StructType(StructField("a", DoubleType) :: Nil)
checkError(
exception = intercept[AnalysisException] {
checkEvaluation(
CsvToStructs(schema, Map("mode" -> "DROPMALFORMED"), Literal(csvData), UTC_OPT),
InternalRow(null))
},
condition = "PARSE_MODE_UNSUPPORTED",
parameters = Map("funcName" -> "`from_csv`", "mode" -> "DROPMALFORMED")
)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,27 @@
*/
package org.apache.spark.sql.execution

import org.apache.spark.sql.GlutenTestsTrait
import org.apache.spark.sql.GlutenTestsCommonTrait

class GlutenExternalAppendOnlyUnsafeRowArraySuite
extends ExternalAppendOnlyUnsafeRowArraySuite
with GlutenTestsTrait {}
with GlutenTestsCommonTrait {

override def beforeAll(): Unit = {
System.setProperty("spark.plugins", "org.apache.gluten.GlutenPlugin")
System.setProperty("spark.memory.offHeap.enabled", "true")
System.setProperty("spark.memory.offHeap.size", "1024MB")
System.setProperty(
"spark.shuffle.manager",
"org.apache.spark.shuffle.sort.ColumnarShuffleManager")
super.beforeAll()
}

override def afterAll(): Unit = {
super.afterAll()
System.clearProperty("spark.plugins")
System.clearProperty("spark.memory.offHeap.enabled")
System.clearProperty("spark.memory.offHeap.size")
System.clearProperty("spark.shuffle.manager")
}
}
Loading
Loading