This is an automated email from the ASF dual-hosted git repository.

gengliang pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.3 by this push:
     new fd998c8a678 [SPARK-39093][SQL] Avoid codegen compilation error when 
dividing year-month intervals or day-time intervals by an integral
fd998c8a678 is described below

commit fd998c8a6783c0c8aceed8dcde4017cd479e42c8
Author: Bruce Robbins <bersprock...@gmail.com>
AuthorDate: Wed May 4 17:22:11 2022 +0800

    [SPARK-39093][SQL] Avoid codegen compilation error when dividing year-month 
intervals or day-time intervals by an integral
    
    ### What changes were proposed in this pull request?
    
    In `DivideYMInterval#doGenCode` and `DivideDTInterval#doGenCode`, rely on 
the operand variable names provided by `nullSafeCodeGen` rather than calling 
`genCode` on the operands twice.
    
    ### Why are the changes needed?
    
    `DivideYMInterval#doGenCode` and `DivideDTInterval#doGenCode` call 
`genCode` on the operands twice (once directly, and once indirectly via 
`nullSafeCodeGen`). However, if you call `genCode` on an operand twice, you 
might not get back the same variable name for both calls (e.g., when the 
operand is not a `BoundReference` or if whole-stage codegen is turned off). 
When that happens, `nullSafeCodeGen` generates initialization code for one set 
of variables, but the divide expression genera [...]
    ```
    spark-sql> create or replace temp view v1 as
             > select * FROM VALUES
             > (interval '10' months, interval '10' day, 2)
             > as v1(period, duration, num);
    Time taken: 2.81 seconds
    spark-sql> cache table v1;
    Time taken: 2.184 seconds
    spark-sql> select period/(num + 3) from v1;
    22/05/03 08:56:37 ERROR CodeGenerator: failed to compile: 
org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 40, 
Column 44: Expression "project_value_2" is not an rvalue
    ...
    22/05/03 08:56:37 WARN UnsafeProjection: Expr codegen error and falling 
back to interpreter mode
    ...
    0-2
    Time taken: 0.149 seconds, Fetched 1 row(s)
    spark-sql> select duration/(num + 3) from v1;
    22/05/03 08:57:29 ERROR CodeGenerator: failed to compile: 
org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 40, 
Column 54: Expression "project_value_2" is not an rvalue
    ...
    22/05/03 08:57:29 WARN UnsafeProjection: Expr codegen error and falling 
back to interpreter mode
    ...
    2 00:00:00.000000000
    Time taken: 0.089 seconds, Fetched 1 row(s)
    ```
    The error is not fatal (unless you have `spark.sql.codegen.fallback` set to 
`false`), but it muddies the log and can slow the query (since the expression 
is interpreted).
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    New unit tests (unit tests run with `spark.sql.codegen.fallback` set to 
`false`, so the new tests fail without the fix).
    
    Closes #36442 from bersprockets/interval_div_issue.
    
    Authored-by: Bruce Robbins <bersprock...@gmail.com>
    Signed-off-by: Gengliang Wang <gengli...@apache.org>
    (cherry picked from commit ca87bead23ca32a05c6a404a91cea47178f63e70)
    Signed-off-by: Gengliang Wang <gengli...@apache.org>
---
 .../catalyst/expressions/intervalExpressions.scala | 34 ++++++++++------------
 .../apache/spark/sql/ColumnExpressionSuite.scala   | 12 ++++++++
 2 files changed, 28 insertions(+), 18 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala
index dbaff16d47f..3b06f811546 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala
@@ -670,21 +670,20 @@ case class DivideYMInterval(
           case _ => classOf[IntMath].getName
         }
         val javaType = CodeGenerator.javaType(dataType)
-        val months = left.genCode(ctx)
-        val num = right.genCode(ctx)
-        val checkIntegralDivideOverflow =
-          s"""
-             |if (${months.value} == ${Int.MinValue} && ${num.value} == -1)
-             |  throw 
QueryExecutionErrors.overflowInIntegralDivideError($errorContext);
-             |""".stripMargin
-        nullSafeCodeGen(ctx, ev, (m, n) =>
+        nullSafeCodeGen(ctx, ev, (m, n) => {
+          val checkIntegralDivideOverflow =
+            s"""
+               |if ($m == ${Int.MinValue} && $n == -1)
+               |  throw 
QueryExecutionErrors.overflowInIntegralDivideError($errorContext);
+               |""".stripMargin
           // Similarly to non-codegen code. The result of `divide(Int, Long, 
...)` must fit
           // to `Int`. Casting to `Int` is safe here.
           s"""
              |${divideByZeroCheckCodegen(right.dataType, n, errorContext)}
              |$checkIntegralDivideOverflow
              |${ev.value} = ($javaType)$math.divide($m, $n, 
java.math.RoundingMode.HALF_UP);
-          """.stripMargin)
+          """.stripMargin
+        })
       case _: DecimalType =>
         nullSafeCodeGen(ctx, ev, (m, n) =>
           s"""
@@ -744,19 +743,18 @@ case class DivideDTInterval(
     right.dataType match {
       case _: IntegralType =>
         val math = classOf[LongMath].getName
-        val micros = left.genCode(ctx)
-        val num = right.genCode(ctx)
-        val checkIntegralDivideOverflow =
-          s"""
-             |if (${micros.value} == ${Long.MinValue}L && ${num.value} == -1L)
-             |  throw 
QueryExecutionErrors.overflowInIntegralDivideError($errorContext);
-             |""".stripMargin
-        nullSafeCodeGen(ctx, ev, (m, n) =>
+        nullSafeCodeGen(ctx, ev, (m, n) => {
+          val checkIntegralDivideOverflow =
+            s"""
+               |if ($m == ${Long.MinValue}L && $n == -1L)
+               |  throw 
QueryExecutionErrors.overflowInIntegralDivideError($errorContext);
+               |""".stripMargin
           s"""
              |${divideByZeroCheckCodegen(right.dataType, n, errorContext)}
              |$checkIntegralDivideOverflow
              |${ev.value} = $math.divide($m, $n, 
java.math.RoundingMode.HALF_UP);
-          """.stripMargin)
+          """.stripMargin
+        })
       case _: DecimalType =>
         nullSafeCodeGen(ctx, ev, (m, n) =>
           s"""
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
index 4256e5bc164..f970b68c39a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
@@ -2988,4 +2988,16 @@ class ColumnExpressionSuite extends QueryTest with 
SharedSparkSession {
     checkAnswer(uncDf.filter($"src".ilike("ѐёђѻώề")), Seq("ЀЁЂѺΏỀ").toDF())
     // scalastyle:on
   }
+
+  test("SPARK-39093: divide period by integral expression") {
+    val df = Seq(((Period.ofDays(10)), 2)).toDF("pd", "num")
+    checkAnswer(df.select($"pd" / ($"num" + 3)),
+      Seq((Period.ofDays(2))).toDF)
+  }
+
+  test("SPARK-39093: divide duration by integral expression") {
+    val df = Seq(((Duration.ofDays(10)), 2)).toDF("dd", "num")
+    checkAnswer(df.select($"dd" / ($"num" + 3)),
+      Seq((Duration.ofDays(2))).toDF)
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to