This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.3 by this push: new a7554c34b59 [SPARK-39476][SQL] Disable Unwrap cast optimize when casting from Long to Float/ Double or from Integer to Float a7554c34b59 is described below commit a7554c34b59d1cacf53b2e239acd746a886bdde6 Author: wangguangxin.cn <wangguangxin...@bytedance.com> AuthorDate: Thu Jun 16 09:27:24 2022 +0800 [SPARK-39476][SQL] Disable Unwrap cast optimize when casting from Long to Float/ Double or from Integer to Float ### What changes were proposed in this pull request? Cast from Integer to Float or from Long to Double/Float may loss precision if the length of Integer/Long beyonds the **significant digits** of a Double(which is 15 or 16 digits) or Float(which is 7 or 8 digits). For example, ```select *, cast(a as int) from (select cast(33554435 as foat) a )``` gives `33554436` instead of `33554435`. When it comes the optimization rule `UnwrapCastInBinaryComparison`, it may result in incorrect (confused) result . We can reproduce it with following script. ``` spark.range(10).map(i => 64707595868612313L).createOrReplaceTempView("tbl") val df = sql("select * from tbl where cast(value as double) = cast('64707595868612313' as double)") df.explain(true) df.show() ``` With we disable this optimization rule , it returns 10 records. But if we enable this optimization rule, it returns empty, since the sql is optimized to ``` select * from tbl where value = 64707595868612312L ``` ### Why are the changes needed? Fix the behavior that may confuse users (or maybe a bug?) ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Add a new UT Closes #36873 from WangGuangxin/SPARK-24994-followup. Authored-by: wangguangxin.cn <wangguangxin...@bytedance.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> (cherry picked from commit 9612db3fc9c38204b2bf9f724dedb9ec5f636556) Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../optimizer/UnwrapCastInBinaryComparison.scala | 14 ++++++++-- .../sql/UnwrapCastInComparisonEndToEndSuite.scala | 31 ++++++++++++++++++++++ 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparison.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparison.scala index d9ec2c0d4b4..94e27379b74 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparison.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparison.scala @@ -358,8 +358,18 @@ object UnwrapCastInBinaryComparison extends Rule[LogicalPlan] { toType.sameType(literalType) && !fromExp.foldable && toType.isInstanceOf[NumericType] && - ((fromExp.dataType.isInstanceOf[NumericType] && Cast.canUpCast(fromExp.dataType, toType)) || - fromExp.dataType.isInstanceOf[BooleanType]) + canUnwrapCast(fromExp.dataType, toType) + } + + private def canUnwrapCast(from: DataType, to: DataType): Boolean = (from, to) match { + case (BooleanType, _) => true + // SPARK-39476: It's not safe to unwrap cast from Integer to Float or from Long to Float/Double, + // since the length of Integer/Long may exceed the significant digits of Float/Double. + case (IntegerType, FloatType) => false + case (LongType, FloatType) => false + case (LongType, DoubleType) => false + case _ if from.isInstanceOf[NumericType] => Cast.canUpCast(from, to) + case _ => false } private[optimizer] def getRange(dt: DataType): Option[(Any, Any)] = dt match { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UnwrapCastInComparisonEndToEndSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UnwrapCastInComparisonEndToEndSuite.scala index 2c361299b17..1d7af84ef60 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/UnwrapCastInComparisonEndToEndSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/UnwrapCastInComparisonEndToEndSuite.scala @@ -209,5 +209,36 @@ class UnwrapCastInComparisonEndToEndSuite extends QueryTest with SharedSparkSess } } + test("SPARK-39476: Should not unwrap cast from Long to Double/Float") { + withTable(t) { + Seq((6470759586864300301L)) + .toDF("c1").write.saveAsTable(t) + val df = spark.table(t) + + checkAnswer( + df.where("cast(c1 as double) == cast(6470759586864300301L as double)") + .select("c1"), + Row(6470759586864300301L)) + + checkAnswer( + df.where("cast(c1 as float) == cast(6470759586864300301L as float)") + .select("c1"), + Row(6470759586864300301L)) + } + } + + test("SPARK-39476: Should not unwrap cast from Integer to Float") { + withTable(t) { + Seq((33554435)) + .toDF("c1").write.saveAsTable(t) + val df = spark.table(t) + + checkAnswer( + df.where("cast(c1 as float) == cast(33554435 as float)") + .select("c1"), + Row(33554435)) + } + } + private def decimal(v: BigDecimal): Decimal = Decimal(v, 5, 2) } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org