This is an automated email from the ASF dual-hosted git repository. yao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new f18d945af7b6 [SPARK-47063][SQL] CAST long to timestamp has different behavior for codegen vs interpreted f18d945af7b6 is described below commit f18d945af7b69fbc89b38b9ca3ca79263b0881ed Author: Pablo Langa <soy...@gmail.com> AuthorDate: Wed Feb 28 11:44:04 2024 +0800 [SPARK-47063][SQL] CAST long to timestamp has different behavior for codegen vs interpreted ### What changes were proposed in this pull request? When an overflow occurs casting long to timestamp there are different behaviors between codegen and interpreted ``` scala> Seq(Long.MaxValue, Long.MinValue).toDF("v").repartition(1).selectExpr("*", "CAST(v AS timestamp) as ts").selectExpr("*", "unix_micros(ts)").show(false) +--------------------+-------------------+---------------+ |v |ts |unix_micros(ts)| +--------------------+-------------------+---------------+ |9223372036854775807 |1969-12-31 20:59:59|-1000000 | |-9223372036854775808|1969-12-31 21:00:00|0 | +--------------------+-------------------+---------------+ scala> spark.conf.set("spark.sql.codegen.wholeStage", false) scala> spark.conf.set("spark.sql.codegen.factoryMode", "NO_CODEGEN") scala> Seq(Long.MaxValue, Long.MinValue).toDF("v").repartition(1).selectExpr("*", "CAST(v AS timestamp) as ts").selectExpr("*", "unix_micros(ts)").show(false) +--------------------+-----------------------------+--------------------+ |v |ts |unix_micros(ts) | +--------------------+-----------------------------+--------------------+ |9223372036854775807 |+294247-01-10 01:00:54.775807|9223372036854775807 | |-9223372036854775808|-290308-12-21 15:16:20.224192|-9223372036854775808| +--------------------+-----------------------------+--------------------+ ``` To align the behavior this PR change the codegen function the be the same as interpreted (https://github.com/apache/spark/blob/f0090c95ad4eca18040104848117a7da648ffa3c/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala#L687) ### Why are the changes needed? This is necesary to be consistent in all cases ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? With unit test and manually ### Was this patch authored or co-authored using generative AI tooling? No Closes #45294 from planga82/bugfix/spark47063_cast_codegen. Authored-by: Pablo Langa <soy...@gmail.com> Signed-off-by: Kent Yao <y...@apache.org> --- .../main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala | 3 ++- .../apache/spark/sql/catalyst/expressions/CastWithAnsiOffSuite.scala | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 66907dc6c353..3f14f1458433 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -1624,7 +1624,8 @@ case class Cast( val block = inline"new java.math.BigDecimal($MICROS_PER_SECOND)" code"($d.toBigDecimal().bigDecimal().multiply($block)).longValue()" } - private[this] def longToTimeStampCode(l: ExprValue): Block = code"$l * (long)$MICROS_PER_SECOND" + private[this] def longToTimeStampCode(l: ExprValue): Block = + code"java.util.concurrent.TimeUnit.SECONDS.toMicros($l)" private[this] def timestampToLongCode(ts: ExprValue): Block = code"java.lang.Math.floorDiv($ts, $MICROS_PER_SECOND)" private[this] def timestampToDoubleCode(ts: ExprValue): Block = diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastWithAnsiOffSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastWithAnsiOffSuite.scala index e260b6fdbdb5..141eaf56fffb 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastWithAnsiOffSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastWithAnsiOffSuite.scala @@ -507,6 +507,8 @@ class CastWithAnsiOffSuite extends CastSuiteBase { checkEvaluation(cast(1.0 / 0.0, TimestampType), null) checkEvaluation(cast(Float.NaN, TimestampType), null) checkEvaluation(cast(1.0f / 0.0f, TimestampType), null) + checkEvaluation(cast(Literal(Long.MaxValue), TimestampType), Long.MaxValue) + checkEvaluation(cast(Literal(Long.MinValue), TimestampType), Long.MinValue) } test("cast a timestamp before the epoch 1970-01-01 00:00:00Z") { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org