This is an automated email from the ASF dual-hosted git repository.

zhouyuan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 0cc376a359 [GLUTEN-11917][VL] Respect allowPrecisionLoss from 
expression context in Spark 4.1 (#12110)
0cc376a359 is described below

commit 0cc376a359b5dcc4cb43540a4206f0fb4d7ff8e3
Author: Varun Srinivas <[email protected]>
AuthorDate: Thu May 28 09:33:39 2026 -0700

    [GLUTEN-11917][VL] Respect allowPrecisionLoss from expression context in 
Spark 4.1 (#12110)
    
    * [GLUTEN-11917][VL] Respect allowPrecisionLoss from expression context in 
Spark 4.1
    
    In Spark 4.1, SPARK-53968 introduced NumericEvalContext which captures
    allowPrecisionLoss at analysis time and embeds it in arithmetic expressions
    (Add, Subtract, Multiply, Divide) via an evalContext field. Previously,
    Gluten read SQLConf.get.decimalOperationsAllowPrecisionLoss at 
plan-conversion
    time, which can diverge from the expression's captured value — for example,
    when querying a persistent view whose expression was analyzed under a 
different
    session config.
    
    This commit adds a shim method decimalAllowPrecisionLoss(expr: 
BinaryArithmetic)
    to SparkShims. The Spark41Shims override reads 
evalContext.allowDecimalPrecisionLoss
    directly from the expression. All other Spark versions fall back to 
SQLConf.get,
    preserving existing behavior. DecimalArithmeticUtil.getResultType and
    VeloxSparkPlanExecApi.getDecimalArithmeticExprName are updated to use the 
shim,
    ensuring the correct result type and Velox function name are selected.
    
    Fixes: #11917
    
    * [GLUTEN-11917][VL] Add test and clarifying comments for 
allowPrecisionLoss shim
    
    - Add regression test covering the 
view-analyzed-under-different-session-config
      scenario that motivated the fix: arithmetic is analyzed with 
allowPrecisionLoss=false,
      cached in a temp view, then queried with allowPrecisionLoss=true. Gluten 
must read
      the captured evalContext, not SQLConf.get.
    - Add comment on SparkShims default explaining why SQLConf.get is correct 
for
      pre-4.1 Spark versions (no evalContext field exists before SPARK-53968).
    - Add comment on Spark41Shims wildcard arm explaining that Remainder/Pmod 
lack
      evalContext and are gated out of Velox execution by 
GlutenNotSupportException
      in DecimalArithmeticUtil.getResultType, making the SQLConf fallback safe.
    - Add comment on SparkPlanExecApi default explaining why non-Velox backends
      (e.g. ClickHouse) correctly ignore allowPrecisionLoss — they do not use 
the
      _deny_precision_loss naming convention.
    
    * [GLUTEN-11917][VL] Update GlutenSimpleSQLViewSuite comment for Spark 4.1
    
    The original comment noted 2 failures. One was the allowPrecisionLoss
    evalContext issue fixed in this PR (GLUTEN-11917). The suite still has
    1 remaining failure unrelated to GLUTEN-11917, so it stays disabled.
---
 .../backendsapi/velox/VeloxSparkPlanExecApi.scala  |  4 ++--
 .../functions/MathFunctionsValidateSuite.scala     | 26 ++++++++++++++++++++++
 .../gluten/backendsapi/SparkPlanExecApi.scala      |  5 ++++-
 .../gluten/expression/ExpressionConverter.scala    |  6 +++--
 .../gluten/utils/DecimalArithmeticUtil.scala       |  3 +--
 .../gluten/utils/velox/VeloxTestSettings.scala     |  2 +-
 .../org/apache/gluten/sql/shims/SparkShims.scala   |  8 ++++++-
 .../gluten/sql/shims/spark41/Spark41Shims.scala    | 11 +++++++++
 8 files changed, 56 insertions(+), 9 deletions(-)

diff --git 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
index b7a1e172b2..5f1003689d 100644
--- 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
+++ 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
@@ -162,8 +162,8 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi with 
Logging {
     }
   }
 
-  override def getDecimalArithmeticExprName(exprName: String): String =
-    if (!SQLConf.get.decimalOperationsAllowPrecisionLoss) { exprName + 
"_deny_precision_loss" }
+  override def getDecimalArithmeticExprName(exprName: String, 
allowPrecisionLoss: Boolean): String =
+    if (!allowPrecisionLoss) { exprName + "_deny_precision_loss" }
     else { exprName }
 
   /** Transform map_entries to Substrait. */
diff --git 
a/backends-velox/src/test/scala/org/apache/gluten/functions/MathFunctionsValidateSuite.scala
 
b/backends-velox/src/test/scala/org/apache/gluten/functions/MathFunctionsValidateSuite.scala
index 2b2922629c..9fa0d336a4 100644
--- 
a/backends-velox/src/test/scala/org/apache/gluten/functions/MathFunctionsValidateSuite.scala
+++ 
b/backends-velox/src/test/scala/org/apache/gluten/functions/MathFunctionsValidateSuite.scala
@@ -413,4 +413,30 @@ abstract class MathFunctionsValidateSuite extends 
FunctionsValidateSuite {
       }
     }
   }
+
+  test("decimal arithmetic respects allowPrecisionLoss captured at view 
analysis time") {
+    // Regression test for GLUTEN-11917: in Spark 4.1, arithmetic expressions 
embed
+    // allowPrecisionLoss in their evalContext at analysis time. Gluten must 
read from
+    // the expression rather than SQLConf.get, which can differ when querying 
a view
+    // analyzed under a different session config.
+    withTempView("t", "v") {
+      sql("""
+            |SELECT
+            |CAST('1234567890123456789012345.12345678901' AS DECIMAL(38,11)) 
AS a,
+            |CAST('1234567890123456789012345.02345678901' AS DECIMAL(38,11)) 
AS b""".stripMargin)
+        .createOrReplaceTempView("t")
+
+      // Analyze arithmetic with allowPrecisionLoss=false and cache it in the 
view's plan.
+      withSQLConf("spark.sql.decimalOperations.allowPrecisionLoss" -> "false") 
{
+        sql("CREATE OR REPLACE TEMP VIEW v AS SELECT a - b, a + b, a * b, a / 
b FROM t")
+      }
+
+      // Query under the opposite setting -- Gluten must use the captured 
context, not SQLConf.
+      withSQLConf("spark.sql.decimalOperations.allowPrecisionLoss" -> "true") {
+        runQueryAndCompare("SELECT * FROM v") {
+          checkGlutenPlan[ProjectExecTransformer]
+        }
+      }
+    }
+  }
 }
diff --git 
a/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
 
b/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
index ce0a79f0bc..84e2d86554 100644
--- 
a/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
+++ 
b/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
@@ -264,7 +264,10 @@ trait SparkPlanExecApi {
     GenericExpressionTransformer(substraitExprName, Seq(left, right), original)
   }
 
-  def getDecimalArithmeticExprName(exprName: String): String = exprName
+  // Default: ignore allowPrecisionLoss and return exprName unchanged. 
Non-Velox backends
+  // (e.g. ClickHouse) do not use the _deny_precision_loss naming convention; 
they handle
+  // decimal precision through their own mechanisms. VeloxSparkPlanExecApi 
overrides this.
+  def getDecimalArithmeticExprName(exprName: String, allowPrecisionLoss: 
Boolean): String = exprName
 
   /** Transform map_entries to Substrait. */
   def genMapEntriesTransformer(
diff --git 
a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
 
b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
index c50bae6e77..7969d30502 100644
--- 
a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
+++ 
b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
@@ -647,7 +647,8 @@ object ExpressionConverter extends SQLConfHelper with 
Logging {
             DecimalArithmeticUtil.isDecimalArithmetic(b) =>
         val arithmeticExprName =
           
BackendsApiManager.getSparkPlanExecApiInstance.getDecimalArithmeticExprName(
-            getAndCheckSubstraitName(b, expressionsMap))
+            getAndCheckSubstraitName(b, expressionsMap),
+            SparkShimLoader.getSparkShims.decimalAllowPrecisionLoss(b))
         val left =
           replaceWithExpressionTransformer0(b.left, attributeSeq, 
expressionsMap)
         val right =
@@ -664,7 +665,8 @@ object ExpressionConverter extends SQLConfHelper with 
Logging {
         )
       case b: BinaryArithmetic if DecimalArithmeticUtil.isDecimalArithmetic(b) 
=>
         val exprName = 
BackendsApiManager.getSparkPlanExecApiInstance.getDecimalArithmeticExprName(
-          substraitExprName)
+          substraitExprName,
+          SparkShimLoader.getSparkShims.decimalAllowPrecisionLoss(b))
         if (!BackendsApiManager.getSettings.transformCheckOverflow) {
           GenericExpressionTransformer(
             exprName,
diff --git 
a/gluten-substrait/src/main/scala/org/apache/gluten/utils/DecimalArithmeticUtil.scala
 
b/gluten-substrait/src/main/scala/org/apache/gluten/utils/DecimalArithmeticUtil.scala
index df5ed47e83..893cbdd0f5 100644
--- 
a/gluten-substrait/src/main/scala/org/apache/gluten/utils/DecimalArithmeticUtil.scala
+++ 
b/gluten-substrait/src/main/scala/org/apache/gluten/utils/DecimalArithmeticUtil.scala
@@ -20,7 +20,6 @@ import org.apache.gluten.exception.GlutenNotSupportException
 import org.apache.gluten.sql.shims.SparkShimLoader
 
 import org.apache.spark.sql.catalyst.expressions.{Add, BinaryArithmetic, Cast, 
Divide, Expression, Literal, Multiply, Pmod, PromotePrecision, Remainder, 
Subtract}
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{ByteType, Decimal, DecimalType, 
IntegerType, LongType, ShortType}
 import org.apache.spark.sql.utils.DecimalTypeUtil
 
@@ -33,7 +32,7 @@ object DecimalArithmeticUtil {
   // Returns the result decimal type of a decimal arithmetic computing.
   def getResultType(expr: BinaryArithmetic, type1: DecimalType, type2: 
DecimalType): DecimalType = {
 
-    val allowPrecisionLoss = SQLConf.get.decimalOperationsAllowPrecisionLoss
+    val allowPrecisionLoss = 
SparkShimLoader.getSparkShims.decimalAllowPrecisionLoss(expr)
     var resultScale = 0
     var resultPrecision = 0
     expr match {
diff --git 
a/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index d43199709c..a3712e62ae 100644
--- 
a/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -702,7 +702,7 @@ class VeloxTestSettings extends BackendTestSettings {
   enableSuite[GlutenSQLFunctionSuite]
   enableSuite[GlutenSQLJsonProtocolSuite]
   enableSuite[GlutenShufflePartitionsUtilSuite]
-  // TODO: 4.x enableSuite[GlutenSimpleSQLViewSuite]  // 2 failures
+  // TODO: 4.x enableSuite[GlutenSimpleSQLViewSuite]  // 1 failure remains 
after GLUTEN-11917 fix
   enableSuite[GlutenSparkPlanSuite]
     .exclude("SPARK-37779: ColumnarToRowExec should be canonicalizable after 
being (de)serialized")
   enableSuite[GlutenSparkPlannerSuite]
diff --git 
a/shims/common/src/main/scala/org/apache/gluten/sql/shims/SparkShims.scala 
b/shims/common/src/main/scala/org/apache/gluten/sql/shims/SparkShims.scala
index 1f1cfdf282..c7b8f89c87 100644
--- a/shims/common/src/main/scala/org/apache/gluten/sql/shims/SparkShims.scala
+++ b/shims/common/src/main/scala/org/apache/gluten/sql/shims/SparkShims.scala
@@ -24,7 +24,7 @@ import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.internal.io.FileCommitProtocol
 import org.apache.spark.sql.{AnalysisException, SparkSession}
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, 
InputFileBlockLength, InputFileBlockStart, InputFileName, RaiseError, UnBase64}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, BinaryArithmetic, 
Expression, InputFileBlockLength, InputFileBlockStart, InputFileName, 
RaiseError, UnBase64}
 import org.apache.spark.sql.catalyst.plans.JoinType
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
@@ -250,6 +250,12 @@ trait SparkShims {
 
   def widerDecimalType(d1: DecimalType, d2: DecimalType): DecimalType
 
+  // Spark 4.1+ (SPARK-53968) embeds allowDecimalPrecisionLoss in each 
arithmetic expression's
+  // evalContext at analysis time. Spark41Shims overrides this to read from 
the expression.
+  // All earlier versions have no evalContext field, so reading SQLConf.get 
here is correct.
+  def decimalAllowPrecisionLoss(expr: BinaryArithmetic): Boolean =
+    SQLConf.get.decimalOperationsAllowPrecisionLoss
+
   def getRewriteCreateTableAsSelect(session: SparkSession): SparkStrategy = _ 
=> Seq.empty
 
   /** Shim method for get the "errorMessage" value for Spark 4.0 and above */
diff --git 
a/shims/spark41/src/main/scala/org/apache/gluten/sql/shims/spark41/Spark41Shims.scala
 
b/shims/spark41/src/main/scala/org/apache/gluten/sql/shims/spark41/Spark41Shims.scala
index 238c8ded81..bcd433e7da 100644
--- 
a/shims/spark41/src/main/scala/org/apache/gluten/sql/shims/spark41/Spark41Shims.scala
+++ 
b/shims/spark41/src/main/scala/org/apache/gluten/sql/shims/spark41/Spark41Shims.scala
@@ -610,6 +610,17 @@ class Spark41Shims extends SparkShims {
     DecimalPrecisionTypeCoercion.widerDecimalType(d1, d2)
   }
 
+  override def decimalAllowPrecisionLoss(expr: BinaryArithmetic): Boolean = 
expr match {
+    case a: Add => a.evalContext.allowDecimalPrecisionLoss
+    case s: Subtract => s.evalContext.allowDecimalPrecisionLoss
+    case m: Multiply => m.evalContext.allowDecimalPrecisionLoss
+    case d: Divide => d.evalContext.allowDecimalPrecisionLoss
+    // Remainder and Pmod do not carry evalContext in Spark 4.1. They also 
throw
+    // GlutenNotSupportException in DecimalArithmeticUtil.getResultType, so 
they never
+    // reach Velox execution; SQLConf.get is a safe fallback for the 
name-lookup path.
+    case _ => SQLConf.get.decimalOperationsAllowPrecisionLoss
+  }
+
   override def getErrorMessage(raiseError: RaiseError): Option[Expression] = {
     raiseError.errorParms match {
       case CreateMap(children, _)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to