This is an automated email from the ASF dual-hosted git repository.
zhouyuan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 0cc376a359 [GLUTEN-11917][VL] Respect allowPrecisionLoss from
expression context in Spark 4.1 (#12110)
0cc376a359 is described below
commit 0cc376a359b5dcc4cb43540a4206f0fb4d7ff8e3
Author: Varun Srinivas <[email protected]>
AuthorDate: Thu May 28 09:33:39 2026 -0700
[GLUTEN-11917][VL] Respect allowPrecisionLoss from expression context in
Spark 4.1 (#12110)
* [GLUTEN-11917][VL] Respect allowPrecisionLoss from expression context in
Spark 4.1
In Spark 4.1, SPARK-53968 introduced NumericEvalContext which captures
allowPrecisionLoss at analysis time and embeds it in arithmetic expressions
(Add, Subtract, Multiply, Divide) via an evalContext field. Previously,
Gluten read SQLConf.get.decimalOperationsAllowPrecisionLoss at
plan-conversion
time, which can diverge from the expression's captured value — for example,
when querying a persistent view whose expression was analyzed under a
different
session config.
This commit adds a shim method decimalAllowPrecisionLoss(expr:
BinaryArithmetic)
to SparkShims. The Spark41Shims override reads
evalContext.allowDecimalPrecisionLoss
directly from the expression. All other Spark versions fall back to
SQLConf.get,
preserving existing behavior. DecimalArithmeticUtil.getResultType and
VeloxSparkPlanExecApi.getDecimalArithmeticExprName are updated to use the
shim,
ensuring the correct result type and Velox function name are selected.
Fixes: #11917
* [GLUTEN-11917][VL] Add test and clarifying comments for
allowPrecisionLoss shim
- Add regression test covering the
view-analyzed-under-different-session-config
scenario that motivated the fix: arithmetic is analyzed with
allowPrecisionLoss=false,
cached in a temp view, then queried with allowPrecisionLoss=true. Gluten
must read
the captured evalContext, not SQLConf.get.
- Add comment on SparkShims default explaining why SQLConf.get is correct
for
pre-4.1 Spark versions (no evalContext field exists before SPARK-53968).
- Add comment on Spark41Shims wildcard arm explaining that Remainder/Pmod
lack
evalContext and are gated out of Velox execution by
GlutenNotSupportException
in DecimalArithmeticUtil.getResultType, making the SQLConf fallback safe.
- Add comment on SparkPlanExecApi default explaining why non-Velox backends
(e.g. ClickHouse) correctly ignore allowPrecisionLoss — they do not use
the
_deny_precision_loss naming convention.
* [GLUTEN-11917][VL] Update GlutenSimpleSQLViewSuite comment for Spark 4.1
The original comment noted 2 failures. One was the allowPrecisionLoss
evalContext issue fixed in this PR (GLUTEN-11917). The suite still has
1 remaining failure unrelated to GLUTEN-11917, so it stays disabled.
---
.../backendsapi/velox/VeloxSparkPlanExecApi.scala | 4 ++--
.../functions/MathFunctionsValidateSuite.scala | 26 ++++++++++++++++++++++
.../gluten/backendsapi/SparkPlanExecApi.scala | 5 ++++-
.../gluten/expression/ExpressionConverter.scala | 6 +++--
.../gluten/utils/DecimalArithmeticUtil.scala | 3 +--
.../gluten/utils/velox/VeloxTestSettings.scala | 2 +-
.../org/apache/gluten/sql/shims/SparkShims.scala | 8 ++++++-
.../gluten/sql/shims/spark41/Spark41Shims.scala | 11 +++++++++
8 files changed, 56 insertions(+), 9 deletions(-)
diff --git
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
index b7a1e172b2..5f1003689d 100644
---
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
+++
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
@@ -162,8 +162,8 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi with
Logging {
}
}
- override def getDecimalArithmeticExprName(exprName: String): String =
- if (!SQLConf.get.decimalOperationsAllowPrecisionLoss) { exprName +
"_deny_precision_loss" }
+ override def getDecimalArithmeticExprName(exprName: String,
allowPrecisionLoss: Boolean): String =
+ if (!allowPrecisionLoss) { exprName + "_deny_precision_loss" }
else { exprName }
/** Transform map_entries to Substrait. */
diff --git
a/backends-velox/src/test/scala/org/apache/gluten/functions/MathFunctionsValidateSuite.scala
b/backends-velox/src/test/scala/org/apache/gluten/functions/MathFunctionsValidateSuite.scala
index 2b2922629c..9fa0d336a4 100644
---
a/backends-velox/src/test/scala/org/apache/gluten/functions/MathFunctionsValidateSuite.scala
+++
b/backends-velox/src/test/scala/org/apache/gluten/functions/MathFunctionsValidateSuite.scala
@@ -413,4 +413,30 @@ abstract class MathFunctionsValidateSuite extends
FunctionsValidateSuite {
}
}
}
+
+ test("decimal arithmetic respects allowPrecisionLoss captured at view
analysis time") {
+ // Regression test for GLUTEN-11917: in Spark 4.1, arithmetic expressions
embed
+ // allowPrecisionLoss in their evalContext at analysis time. Gluten must
read from
+ // the expression rather than SQLConf.get, which can differ when querying
a view
+ // analyzed under a different session config.
+ withTempView("t", "v") {
+ sql("""
+ |SELECT
+ |CAST('1234567890123456789012345.12345678901' AS DECIMAL(38,11))
AS a,
+ |CAST('1234567890123456789012345.02345678901' AS DECIMAL(38,11))
AS b""".stripMargin)
+ .createOrReplaceTempView("t")
+
+ // Analyze arithmetic with allowPrecisionLoss=false and cache it in the
view's plan.
+ withSQLConf("spark.sql.decimalOperations.allowPrecisionLoss" -> "false")
{
+ sql("CREATE OR REPLACE TEMP VIEW v AS SELECT a - b, a + b, a * b, a /
b FROM t")
+ }
+
+ // Query under the opposite setting -- Gluten must use the captured
context, not SQLConf.
+ withSQLConf("spark.sql.decimalOperations.allowPrecisionLoss" -> "true") {
+ runQueryAndCompare("SELECT * FROM v") {
+ checkGlutenPlan[ProjectExecTransformer]
+ }
+ }
+ }
+ }
}
diff --git
a/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
b/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
index ce0a79f0bc..84e2d86554 100644
---
a/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
+++
b/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
@@ -264,7 +264,10 @@ trait SparkPlanExecApi {
GenericExpressionTransformer(substraitExprName, Seq(left, right), original)
}
- def getDecimalArithmeticExprName(exprName: String): String = exprName
+ // Default: ignore allowPrecisionLoss and return exprName unchanged.
Non-Velox backends
+ // (e.g. ClickHouse) do not use the _deny_precision_loss naming convention;
they handle
+ // decimal precision through their own mechanisms. VeloxSparkPlanExecApi
overrides this.
+ def getDecimalArithmeticExprName(exprName: String, allowPrecisionLoss:
Boolean): String = exprName
/** Transform map_entries to Substrait. */
def genMapEntriesTransformer(
diff --git
a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
index c50bae6e77..7969d30502 100644
---
a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
+++
b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
@@ -647,7 +647,8 @@ object ExpressionConverter extends SQLConfHelper with
Logging {
DecimalArithmeticUtil.isDecimalArithmetic(b) =>
val arithmeticExprName =
BackendsApiManager.getSparkPlanExecApiInstance.getDecimalArithmeticExprName(
- getAndCheckSubstraitName(b, expressionsMap))
+ getAndCheckSubstraitName(b, expressionsMap),
+ SparkShimLoader.getSparkShims.decimalAllowPrecisionLoss(b))
val left =
replaceWithExpressionTransformer0(b.left, attributeSeq,
expressionsMap)
val right =
@@ -664,7 +665,8 @@ object ExpressionConverter extends SQLConfHelper with
Logging {
)
case b: BinaryArithmetic if DecimalArithmeticUtil.isDecimalArithmetic(b)
=>
val exprName =
BackendsApiManager.getSparkPlanExecApiInstance.getDecimalArithmeticExprName(
- substraitExprName)
+ substraitExprName,
+ SparkShimLoader.getSparkShims.decimalAllowPrecisionLoss(b))
if (!BackendsApiManager.getSettings.transformCheckOverflow) {
GenericExpressionTransformer(
exprName,
diff --git
a/gluten-substrait/src/main/scala/org/apache/gluten/utils/DecimalArithmeticUtil.scala
b/gluten-substrait/src/main/scala/org/apache/gluten/utils/DecimalArithmeticUtil.scala
index df5ed47e83..893cbdd0f5 100644
---
a/gluten-substrait/src/main/scala/org/apache/gluten/utils/DecimalArithmeticUtil.scala
+++
b/gluten-substrait/src/main/scala/org/apache/gluten/utils/DecimalArithmeticUtil.scala
@@ -20,7 +20,6 @@ import org.apache.gluten.exception.GlutenNotSupportException
import org.apache.gluten.sql.shims.SparkShimLoader
import org.apache.spark.sql.catalyst.expressions.{Add, BinaryArithmetic, Cast,
Divide, Expression, Literal, Multiply, Pmod, PromotePrecision, Remainder,
Subtract}
-import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types.{ByteType, Decimal, DecimalType,
IntegerType, LongType, ShortType}
import org.apache.spark.sql.utils.DecimalTypeUtil
@@ -33,7 +32,7 @@ object DecimalArithmeticUtil {
// Returns the result decimal type of a decimal arithmetic computing.
def getResultType(expr: BinaryArithmetic, type1: DecimalType, type2:
DecimalType): DecimalType = {
- val allowPrecisionLoss = SQLConf.get.decimalOperationsAllowPrecisionLoss
+ val allowPrecisionLoss =
SparkShimLoader.getSparkShims.decimalAllowPrecisionLoss(expr)
var resultScale = 0
var resultPrecision = 0
expr match {
diff --git
a/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
b/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index d43199709c..a3712e62ae 100644
---
a/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++
b/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -702,7 +702,7 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenSQLFunctionSuite]
enableSuite[GlutenSQLJsonProtocolSuite]
enableSuite[GlutenShufflePartitionsUtilSuite]
- // TODO: 4.x enableSuite[GlutenSimpleSQLViewSuite] // 2 failures
+ // TODO: 4.x enableSuite[GlutenSimpleSQLViewSuite] // 1 failure remains
after GLUTEN-11917 fix
enableSuite[GlutenSparkPlanSuite]
.exclude("SPARK-37779: ColumnarToRowExec should be canonicalizable after
being (de)serialized")
enableSuite[GlutenSparkPlannerSuite]
diff --git
a/shims/common/src/main/scala/org/apache/gluten/sql/shims/SparkShims.scala
b/shims/common/src/main/scala/org/apache/gluten/sql/shims/SparkShims.scala
index 1f1cfdf282..c7b8f89c87 100644
--- a/shims/common/src/main/scala/org/apache/gluten/sql/shims/SparkShims.scala
+++ b/shims/common/src/main/scala/org/apache/gluten/sql/shims/SparkShims.scala
@@ -24,7 +24,7 @@ import org.apache.spark.broadcast.Broadcast
import org.apache.spark.internal.io.FileCommitProtocol
import org.apache.spark.sql.{AnalysisException, SparkSession}
import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression,
InputFileBlockLength, InputFileBlockStart, InputFileName, RaiseError, UnBase64}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, BinaryArithmetic,
Expression, InputFileBlockLength, InputFileBlockStart, InputFileName,
RaiseError, UnBase64}
import org.apache.spark.sql.catalyst.plans.JoinType
import org.apache.spark.sql.catalyst.plans.QueryPlan
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
@@ -250,6 +250,12 @@ trait SparkShims {
def widerDecimalType(d1: DecimalType, d2: DecimalType): DecimalType
+ // Spark 4.1+ (SPARK-53968) embeds allowDecimalPrecisionLoss in each
arithmetic expression's
+ // evalContext at analysis time. Spark41Shims overrides this to read from
the expression.
+ // All earlier versions have no evalContext field, so reading SQLConf.get
here is correct.
+ def decimalAllowPrecisionLoss(expr: BinaryArithmetic): Boolean =
+ SQLConf.get.decimalOperationsAllowPrecisionLoss
+
def getRewriteCreateTableAsSelect(session: SparkSession): SparkStrategy = _
=> Seq.empty
/** Shim method for get the "errorMessage" value for Spark 4.0 and above */
diff --git
a/shims/spark41/src/main/scala/org/apache/gluten/sql/shims/spark41/Spark41Shims.scala
b/shims/spark41/src/main/scala/org/apache/gluten/sql/shims/spark41/Spark41Shims.scala
index 238c8ded81..bcd433e7da 100644
---
a/shims/spark41/src/main/scala/org/apache/gluten/sql/shims/spark41/Spark41Shims.scala
+++
b/shims/spark41/src/main/scala/org/apache/gluten/sql/shims/spark41/Spark41Shims.scala
@@ -610,6 +610,17 @@ class Spark41Shims extends SparkShims {
DecimalPrecisionTypeCoercion.widerDecimalType(d1, d2)
}
+ override def decimalAllowPrecisionLoss(expr: BinaryArithmetic): Boolean =
expr match {
+ case a: Add => a.evalContext.allowDecimalPrecisionLoss
+ case s: Subtract => s.evalContext.allowDecimalPrecisionLoss
+ case m: Multiply => m.evalContext.allowDecimalPrecisionLoss
+ case d: Divide => d.evalContext.allowDecimalPrecisionLoss
+ // Remainder and Pmod do not carry evalContext in Spark 4.1. They also
throw
+ // GlutenNotSupportException in DecimalArithmeticUtil.getResultType, so
they never
+ // reach Velox execution; SQLConf.get is a safe fallback for the
name-lookup path.
+ case _ => SQLConf.get.decimalOperationsAllowPrecisionLoss
+ }
+
override def getErrorMessage(raiseError: RaiseError): Option[Expression] = {
raiseError.errorParms match {
case CreateMap(children, _)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]