This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-3.2 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.2 by this push: new 380177d0f10 [SPARK-39476][SQL] Disable Unwrap cast optimize when casting from Long to Float/ Double or from Integer to Float 380177d0f10 is described below commit 380177d0f10c2c52c8bdfa9d7f00f30f68fad15a Author: wangguangxin.cn <wangguangxin...@bytedance.com> AuthorDate: Thu Jun 16 09:27:24 2022 +0800 [SPARK-39476][SQL] Disable Unwrap cast optimize when casting from Long to Float/ Double or from Integer to Float Cast from Integer to Float or from Long to Double/Float may loss precision if the length of Integer/Long beyonds the **significant digits** of a Double(which is 15 or 16 digits) or Float(which is 7 or 8 digits). For example, ```select *, cast(a as int) from (select cast(33554435 as foat) a )``` gives `33554436` instead of `33554435`. When it comes the optimization rule `UnwrapCastInBinaryComparison`, it may result in incorrect (confused) result . We can reproduce it with following script. ``` spark.range(10).map(i => 64707595868612313L).createOrReplaceTempView("tbl") val df = sql("select * from tbl where cast(value as double) = cast('64707595868612313' as double)") df.explain(true) df.show() ``` With we disable this optimization rule , it returns 10 records. But if we enable this optimization rule, it returns empty, since the sql is optimized to ``` select * from tbl where value = 64707595868612312L ``` Fix the behavior that may confuse users (or maybe a bug?) No Add a new UT Closes #36873 from WangGuangxin/SPARK-24994-followup. Authored-by: wangguangxin.cn <wangguangxin...@bytedance.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> (cherry picked from commit 9612db3fc9c38204b2bf9f724dedb9ec5f636556) Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../optimizer/UnwrapCastInBinaryComparison.scala | 12 ++++++++- .../sql/UnwrapCastInComparisonEndToEndSuite.scala | 31 ++++++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparison.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparison.scala index 6d7b6e56e4e..d33c7f2eeb4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparison.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparison.scala @@ -359,7 +359,17 @@ object UnwrapCastInBinaryComparison extends Rule[LogicalPlan] { !fromExp.foldable && fromExp.dataType.isInstanceOf[NumericType] && toType.isInstanceOf[NumericType] && - Cast.canUpCast(fromExp.dataType, toType) + canUnwrapCast(fromExp.dataType, toType) + } + + private def canUnwrapCast(from: DataType, to: DataType): Boolean = (from, to) match { + // SPARK-39476: It's not safe to unwrap cast from Integer to Float or from Long to Float/Double, + // since the length of Integer/Long may exceed the significant digits of Float/Double. + case (IntegerType, FloatType) => false + case (LongType, FloatType) => false + case (LongType, DoubleType) => false + case _ if from.isInstanceOf[NumericType] => Cast.canUpCast(from, to) + case _ => false } private[optimizer] def getRange(dt: DataType): Option[(Any, Any)] = dt match { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UnwrapCastInComparisonEndToEndSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UnwrapCastInComparisonEndToEndSuite.scala index e6f0426428b..c27097562e5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/UnwrapCastInComparisonEndToEndSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/UnwrapCastInComparisonEndToEndSuite.scala @@ -190,5 +190,36 @@ class UnwrapCastInComparisonEndToEndSuite extends QueryTest with SharedSparkSess } } + test("SPARK-39476: Should not unwrap cast from Long to Double/Float") { + withTable(t) { + Seq((6470759586864300301L)) + .toDF("c1").write.saveAsTable(t) + val df = spark.table(t) + + checkAnswer( + df.where("cast(c1 as double) == cast(6470759586864300301L as double)") + .select("c1"), + Row(6470759586864300301L)) + + checkAnswer( + df.where("cast(c1 as float) == cast(6470759586864300301L as float)") + .select("c1"), + Row(6470759586864300301L)) + } + } + + test("SPARK-39476: Should not unwrap cast from Integer to Float") { + withTable(t) { + Seq((33554435)) + .toDF("c1").write.saveAsTable(t) + val df = spark.table(t) + + checkAnswer( + df.where("cast(c1 as float) == cast(33554435 as float)") + .select("c1"), + Row(33554435)) + } + } + private def decimal(v: BigDecimal): Decimal = Decimal(v, 5, 2) } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org