This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-3.1 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.1 by this push: new 682442a [SPARK-36755][SQL] ArraysOverlap should handle duplicated Double.NaN and Float.NaN 682442a is described below commit 682442a5aa46d50b6b4b74537fccf04fdd33fe0f Author: Angerszhuuuu <angers....@gmail.com> AuthorDate: Wed Sep 15 22:31:46 2021 +0800 [SPARK-36755][SQL] ArraysOverlap should handle duplicated Double.NaN and Float.NaN ### What changes were proposed in this pull request? For query ``` select arrays_overlap(array(cast('nan' as double), 1d), array(cast('nan' as double))) ``` This returns [false], but it should return [true]. This issue is caused by `scala.mutable.HashSet` can't handle `Double.NaN` and `Float.NaN`. ### Why are the changes needed? Fix bug ### Does this PR introduce _any_ user-facing change? arrays_overlap won't handle equal `NaN` value ### How was this patch tested? Added UT Closes #34006 from AngersZhuuuu/SPARK-36755. Authored-by: Angerszhuuuu <angers....@gmail.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> (cherry picked from commit b665782f0d3729928be4ca897ec2eb990b714879) Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../sql/catalyst/expressions/collectionOperations.scala | 4 ++-- .../catalyst/expressions/CollectionExpressionsSuite.scala | 13 +++++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index 7b231fe..9f922d1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -1262,12 +1262,12 @@ case class ArraysOverlap(left: Expression, right: Expression) (arr2, arr1) } if (smaller.numElements() > 0) { - val smallestSet = new mutable.HashSet[Any] + val smallestSet = new java.util.HashSet[Any]() smaller.foreach(elementType, (_, v) => if (v == null) { hasNull = true } else { - smallestSet += v + smallestSet.add(v) }) bigger.foreach(elementType, (_, v1) => if (v1 == null) { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala index 25e40c4..69a24d9 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala @@ -1965,4 +1965,17 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper Literal.create(Seq(Float.NaN, null, 1f), ArrayType(FloatType))), Seq(Float.NaN, null, 1f)) } + + test("SPARK-36755: ArraysOverlap hould handle duplicated Double.NaN and Float.Nan") { + checkEvaluation(ArraysOverlap( + Literal.apply(Array(Double.NaN, 1d)), Literal.apply(Array(Double.NaN))), true) + checkEvaluation(ArraysOverlap( + Literal.create(Seq(Double.NaN, null), ArrayType(DoubleType)), + Literal.create(Seq(Double.NaN, null, 1d), ArrayType(DoubleType))), true) + checkEvaluation(ArraysOverlap( + Literal.apply(Array(Float.NaN)), Literal.apply(Array(Float.NaN, 1f))), true) + checkEvaluation(ArraysOverlap( + Literal.create(Seq(Float.NaN, null), ArrayType(FloatType)), + Literal.create(Seq(Float.NaN, null, 1f), ArrayType(FloatType))), true) + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org