From 3029fe8031e051f01800b286de2ad876738d6815 Mon Sep 17 00:00:00 2001 From: Varun Srinivas Date: Mon, 18 May 2026 13:29:33 -0700 Subject: [PATCH 1/2] [GLUTEN-11917][VL] Respect allowPrecisionLoss from expression context in Spark 4.1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In Spark 4.1, SPARK-53968 introduced NumericEvalContext which captures allowPrecisionLoss at analysis time and embeds it in arithmetic expressions (Add, Subtract, Multiply, Divide) via an evalContext field. Previously, Gluten read SQLConf.get.decimalOperationsAllowPrecisionLoss at plan-conversion time, which can diverge from the expression's captured value — for example, when querying a persistent view whose expression was analyzed under a different session config. This commit adds a shim method decimalAllowPrecisionLoss(expr: BinaryArithmetic) to SparkShims. The Spark41Shims override reads evalContext.allowDecimalPrecisionLoss directly from the expression. All other Spark versions fall back to SQLConf.get, preserving existing behavior. DecimalArithmeticUtil.getResultType and VeloxSparkPlanExecApi.getDecimalArithmeticExprName are updated to use the shim, ensuring the correct result type and Velox function name are selected. Fixes: #11917 --- .../gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala | 4 ++-- .../org/apache/gluten/backendsapi/SparkPlanExecApi.scala | 2 +- .../apache/gluten/expression/ExpressionConverter.scala | 6 ++++-- .../org/apache/gluten/utils/DecimalArithmeticUtil.scala | 3 +-- .../scala/org/apache/gluten/sql/shims/SparkShims.scala | 5 ++++- .../apache/gluten/sql/shims/spark41/Spark41Shims.scala | 8 ++++++++ 6 files changed, 20 insertions(+), 8 deletions(-) diff --git a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala index b7a1e172b2c..5f1003689dd 100644 --- a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala +++ b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala @@ -162,8 +162,8 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi with Logging { } } - override def getDecimalArithmeticExprName(exprName: String): String = - if (!SQLConf.get.decimalOperationsAllowPrecisionLoss) { exprName + "_deny_precision_loss" } + override def getDecimalArithmeticExprName(exprName: String, allowPrecisionLoss: Boolean): String = + if (!allowPrecisionLoss) { exprName + "_deny_precision_loss" } else { exprName } /** Transform map_entries to Substrait. */ diff --git a/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala b/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala index ce0a79f0bc2..db65bd10470 100644 --- a/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala +++ b/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala @@ -264,7 +264,7 @@ trait SparkPlanExecApi { GenericExpressionTransformer(substraitExprName, Seq(left, right), original) } - def getDecimalArithmeticExprName(exprName: String): String = exprName + def getDecimalArithmeticExprName(exprName: String, allowPrecisionLoss: Boolean): String = exprName /** Transform map_entries to Substrait. */ def genMapEntriesTransformer( diff --git a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala index c50bae6e77b..7969d305025 100644 --- a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala +++ b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala @@ -647,7 +647,8 @@ object ExpressionConverter extends SQLConfHelper with Logging { DecimalArithmeticUtil.isDecimalArithmetic(b) => val arithmeticExprName = BackendsApiManager.getSparkPlanExecApiInstance.getDecimalArithmeticExprName( - getAndCheckSubstraitName(b, expressionsMap)) + getAndCheckSubstraitName(b, expressionsMap), + SparkShimLoader.getSparkShims.decimalAllowPrecisionLoss(b)) val left = replaceWithExpressionTransformer0(b.left, attributeSeq, expressionsMap) val right = @@ -664,7 +665,8 @@ object ExpressionConverter extends SQLConfHelper with Logging { ) case b: BinaryArithmetic if DecimalArithmeticUtil.isDecimalArithmetic(b) => val exprName = BackendsApiManager.getSparkPlanExecApiInstance.getDecimalArithmeticExprName( - substraitExprName) + substraitExprName, + SparkShimLoader.getSparkShims.decimalAllowPrecisionLoss(b)) if (!BackendsApiManager.getSettings.transformCheckOverflow) { GenericExpressionTransformer( exprName, diff --git a/gluten-substrait/src/main/scala/org/apache/gluten/utils/DecimalArithmeticUtil.scala b/gluten-substrait/src/main/scala/org/apache/gluten/utils/DecimalArithmeticUtil.scala index df5ed47e838..893cbdd0f5f 100644 --- a/gluten-substrait/src/main/scala/org/apache/gluten/utils/DecimalArithmeticUtil.scala +++ b/gluten-substrait/src/main/scala/org/apache/gluten/utils/DecimalArithmeticUtil.scala @@ -20,7 +20,6 @@ import org.apache.gluten.exception.GlutenNotSupportException import org.apache.gluten.sql.shims.SparkShimLoader import org.apache.spark.sql.catalyst.expressions.{Add, BinaryArithmetic, Cast, Divide, Expression, Literal, Multiply, Pmod, PromotePrecision, Remainder, Subtract} -import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{ByteType, Decimal, DecimalType, IntegerType, LongType, ShortType} import org.apache.spark.sql.utils.DecimalTypeUtil @@ -33,7 +32,7 @@ object DecimalArithmeticUtil { // Returns the result decimal type of a decimal arithmetic computing. def getResultType(expr: BinaryArithmetic, type1: DecimalType, type2: DecimalType): DecimalType = { - val allowPrecisionLoss = SQLConf.get.decimalOperationsAllowPrecisionLoss + val allowPrecisionLoss = SparkShimLoader.getSparkShims.decimalAllowPrecisionLoss(expr) var resultScale = 0 var resultPrecision = 0 expr match { diff --git a/shims/common/src/main/scala/org/apache/gluten/sql/shims/SparkShims.scala b/shims/common/src/main/scala/org/apache/gluten/sql/shims/SparkShims.scala index 4d1fd804a9c..760c31f06e2 100644 --- a/shims/common/src/main/scala/org/apache/gluten/sql/shims/SparkShims.scala +++ b/shims/common/src/main/scala/org/apache/gluten/sql/shims/SparkShims.scala @@ -24,7 +24,7 @@ import org.apache.spark.broadcast.Broadcast import org.apache.spark.internal.io.FileCommitProtocol import org.apache.spark.sql.{AnalysisException, SparkSession} import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, InputFileBlockLength, InputFileBlockStart, InputFileName, RaiseError, UnBase64} +import org.apache.spark.sql.catalyst.expressions.{Attribute, BinaryArithmetic, Expression, InputFileBlockLength, InputFileBlockStart, InputFileName, RaiseError, UnBase64} import org.apache.spark.sql.catalyst.plans.JoinType import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan @@ -250,6 +250,9 @@ trait SparkShims { def widerDecimalType(d1: DecimalType, d2: DecimalType): DecimalType + def decimalAllowPrecisionLoss(expr: BinaryArithmetic): Boolean = + SQLConf.get.decimalOperationsAllowPrecisionLoss + def getRewriteCreateTableAsSelect(session: SparkSession): SparkStrategy = _ => Seq.empty /** Shim method for get the "errorMessage" value for Spark 4.0 and above */ diff --git a/shims/spark41/src/main/scala/org/apache/gluten/sql/shims/spark41/Spark41Shims.scala b/shims/spark41/src/main/scala/org/apache/gluten/sql/shims/spark41/Spark41Shims.scala index 3eabf6b595a..d492632c0f1 100644 --- a/shims/spark41/src/main/scala/org/apache/gluten/sql/shims/spark41/Spark41Shims.scala +++ b/shims/spark41/src/main/scala/org/apache/gluten/sql/shims/spark41/Spark41Shims.scala @@ -610,6 +610,14 @@ class Spark41Shims extends SparkShims { DecimalPrecisionTypeCoercion.widerDecimalType(d1, d2) } + override def decimalAllowPrecisionLoss(expr: BinaryArithmetic): Boolean = expr match { + case a: Add => a.evalContext.allowDecimalPrecisionLoss + case s: Subtract => s.evalContext.allowDecimalPrecisionLoss + case m: Multiply => m.evalContext.allowDecimalPrecisionLoss + case d: Divide => d.evalContext.allowDecimalPrecisionLoss + case _ => SQLConf.get.decimalOperationsAllowPrecisionLoss + } + override def getErrorMessage(raiseError: RaiseError): Option[Expression] = { raiseError.errorParms match { case CreateMap(children, _) From 429cb76461d6a3cae1b97a89284372a6b4ffd2b4 Mon Sep 17 00:00:00 2001 From: Varun Srinivas Date: Mon, 18 May 2026 13:57:38 -0700 Subject: [PATCH 2/2] [GLUTEN-11917][VL] Add test and clarifying comments for allowPrecisionLoss shim MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add regression test covering the view-analyzed-under-different-session-config scenario that motivated the fix: arithmetic is analyzed with allowPrecisionLoss=false, cached in a temp view, then queried with allowPrecisionLoss=true. Gluten must read the captured evalContext, not SQLConf.get. - Add comment on SparkShims default explaining why SQLConf.get is correct for pre-4.1 Spark versions (no evalContext field exists before SPARK-53968). - Add comment on Spark41Shims wildcard arm explaining that Remainder/Pmod lack evalContext and are gated out of Velox execution by GlutenNotSupportException in DecimalArithmeticUtil.getResultType, making the SQLConf fallback safe. - Add comment on SparkPlanExecApi default explaining why non-Velox backends (e.g. ClickHouse) correctly ignore allowPrecisionLoss — they do not use the _deny_precision_loss naming convention. --- .../MathFunctionsValidateSuite.scala | 26 +++++++++++++++++++ .../gluten/backendsapi/SparkPlanExecApi.scala | 3 +++ .../apache/gluten/sql/shims/SparkShims.scala | 3 +++ .../sql/shims/spark41/Spark41Shims.scala | 3 +++ 4 files changed, 35 insertions(+) diff --git a/backends-velox/src/test/scala/org/apache/gluten/functions/MathFunctionsValidateSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/functions/MathFunctionsValidateSuite.scala index 2b2922629c7..9fa0d336a45 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/functions/MathFunctionsValidateSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/functions/MathFunctionsValidateSuite.scala @@ -413,4 +413,30 @@ abstract class MathFunctionsValidateSuite extends FunctionsValidateSuite { } } } + + test("decimal arithmetic respects allowPrecisionLoss captured at view analysis time") { + // Regression test for GLUTEN-11917: in Spark 4.1, arithmetic expressions embed + // allowPrecisionLoss in their evalContext at analysis time. Gluten must read from + // the expression rather than SQLConf.get, which can differ when querying a view + // analyzed under a different session config. + withTempView("t", "v") { + sql(""" + |SELECT + |CAST('1234567890123456789012345.12345678901' AS DECIMAL(38,11)) AS a, + |CAST('1234567890123456789012345.02345678901' AS DECIMAL(38,11)) AS b""".stripMargin) + .createOrReplaceTempView("t") + + // Analyze arithmetic with allowPrecisionLoss=false and cache it in the view's plan. + withSQLConf("spark.sql.decimalOperations.allowPrecisionLoss" -> "false") { + sql("CREATE OR REPLACE TEMP VIEW v AS SELECT a - b, a + b, a * b, a / b FROM t") + } + + // Query under the opposite setting -- Gluten must use the captured context, not SQLConf. + withSQLConf("spark.sql.decimalOperations.allowPrecisionLoss" -> "true") { + runQueryAndCompare("SELECT * FROM v") { + checkGlutenPlan[ProjectExecTransformer] + } + } + } + } } diff --git a/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala b/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala index db65bd10470..84e2d865541 100644 --- a/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala +++ b/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala @@ -264,6 +264,9 @@ trait SparkPlanExecApi { GenericExpressionTransformer(substraitExprName, Seq(left, right), original) } + // Default: ignore allowPrecisionLoss and return exprName unchanged. Non-Velox backends + // (e.g. ClickHouse) do not use the _deny_precision_loss naming convention; they handle + // decimal precision through their own mechanisms. VeloxSparkPlanExecApi overrides this. def getDecimalArithmeticExprName(exprName: String, allowPrecisionLoss: Boolean): String = exprName /** Transform map_entries to Substrait. */ diff --git a/shims/common/src/main/scala/org/apache/gluten/sql/shims/SparkShims.scala b/shims/common/src/main/scala/org/apache/gluten/sql/shims/SparkShims.scala index 760c31f06e2..b230c9e927b 100644 --- a/shims/common/src/main/scala/org/apache/gluten/sql/shims/SparkShims.scala +++ b/shims/common/src/main/scala/org/apache/gluten/sql/shims/SparkShims.scala @@ -250,6 +250,9 @@ trait SparkShims { def widerDecimalType(d1: DecimalType, d2: DecimalType): DecimalType + // Spark 4.1+ (SPARK-53968) embeds allowDecimalPrecisionLoss in each arithmetic expression's + // evalContext at analysis time. Spark41Shims overrides this to read from the expression. + // All earlier versions have no evalContext field, so reading SQLConf.get here is correct. def decimalAllowPrecisionLoss(expr: BinaryArithmetic): Boolean = SQLConf.get.decimalOperationsAllowPrecisionLoss diff --git a/shims/spark41/src/main/scala/org/apache/gluten/sql/shims/spark41/Spark41Shims.scala b/shims/spark41/src/main/scala/org/apache/gluten/sql/shims/spark41/Spark41Shims.scala index d492632c0f1..44665a9db06 100644 --- a/shims/spark41/src/main/scala/org/apache/gluten/sql/shims/spark41/Spark41Shims.scala +++ b/shims/spark41/src/main/scala/org/apache/gluten/sql/shims/spark41/Spark41Shims.scala @@ -615,6 +615,9 @@ class Spark41Shims extends SparkShims { case s: Subtract => s.evalContext.allowDecimalPrecisionLoss case m: Multiply => m.evalContext.allowDecimalPrecisionLoss case d: Divide => d.evalContext.allowDecimalPrecisionLoss + // Remainder and Pmod do not carry evalContext in Spark 4.1. They also throw + // GlutenNotSupportException in DecimalArithmeticUtil.getResultType, so they never + // reach Velox execution; SQLConf.get is a safe fallback for the name-lookup path. case _ => SQLConf.get.decimalOperationsAllowPrecisionLoss }