This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-2.4 by this push:
     new 16986b2  [SPARK-26382][CORE] prefix comparator should handle -0.0
16986b2 is described below

commit 16986b29e22553797e0e78df445eac94a44285c7
Author: Wenchen Fan <wenc...@databricks.com>
AuthorDate: Tue Dec 18 10:09:56 2018 -0800

    [SPARK-26382][CORE] prefix comparator should handle -0.0
    
    ## What changes were proposed in this pull request?
    
    This is kind of a followup of https://github.com/apache/spark/pull/23239
    
    The `UnsafeProject` will normalize special float/double values(NaN and 
-0.0), so the sorter doesn't have to handle it.
    
    However, for consistency and future-proof, this PR proposes to normalize 
`-0.0` in the prefix comparator, so that it's same with the normal ordering. 
Note that prefix comparator handles NaN as well.
    
    This is not a bug fix, but a safe guard.
    
    ## How was this patch tested?
    
    existing tests
    
    Closes #23334 from cloud-fan/sort.
    
    Authored-by: Wenchen Fan <wenc...@databricks.com>
    Signed-off-by: Dongjoon Hyun <dongj...@apache.org>
    (cherry picked from commit befca983d2da4f7828aa7a7cd7345d17c4f291dd)
    Signed-off-by: Dongjoon Hyun <dongj...@apache.org>
---
 .../util/collection/unsafe/sort/PrefixComparators.java  |  2 ++
 .../collection/unsafe/sort/PrefixComparatorsSuite.scala | 17 +++++++++++++++--
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git 
a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java
 
b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java
index 0910db2..bef1bda 100644
--- 
a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java
+++ 
b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java
@@ -69,6 +69,8 @@ public class PrefixComparators {
      * details see http://stereopsis.com/radix.html.
      */
     public static long computePrefix(double value) {
+      // normalize -0.0 to 0.0, as they should be equal
+      value = value == -0.0 ? 0.0 : value;
       // Java's doubleToLongBits already canonicalizes all NaN values to the 
smallest possible
       // positive NaN, so there's nothing special we need to do for NaNs.
       long bits = Double.doubleToLongBits(value);
diff --git 
a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala
 
b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala
index 73546ef..38cb37c 100644
--- 
a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala
+++ 
b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala
@@ -125,6 +125,7 @@ class PrefixComparatorsSuite extends SparkFunSuite with 
PropertyChecks {
     val nan2Prefix = 
PrefixComparators.DoublePrefixComparator.computePrefix(nan2)
     assert(nan1Prefix === nan2Prefix)
     val doubleMaxPrefix = 
PrefixComparators.DoublePrefixComparator.computePrefix(Double.MaxValue)
+    // NaN is greater than the max double value.
     assert(PrefixComparators.DOUBLE.compare(nan1Prefix, doubleMaxPrefix) === 1)
   }
 
@@ -134,22 +135,34 @@ class PrefixComparatorsSuite extends SparkFunSuite with 
PropertyChecks {
     assert(java.lang.Double.doubleToRawLongBits(negativeNan) < 0)
     val prefix = 
PrefixComparators.DoublePrefixComparator.computePrefix(negativeNan)
     val doubleMaxPrefix = 
PrefixComparators.DoublePrefixComparator.computePrefix(Double.MaxValue)
+    // -NaN is greater than the max double value.
     assert(PrefixComparators.DOUBLE.compare(prefix, doubleMaxPrefix) === 1)
   }
 
   test("double prefix comparator handles other special values properly") {
-    val nullValue = 0L
+    // See `SortPrefix.nullValue` for how we deal with nulls for float/double 
type
+    val smallestNullPrefix = 0L
+    val largestNullPrefix = -1L
     val nan = 
PrefixComparators.DoublePrefixComparator.computePrefix(Double.NaN)
     val posInf = 
PrefixComparators.DoublePrefixComparator.computePrefix(Double.PositiveInfinity)
     val negInf = 
PrefixComparators.DoublePrefixComparator.computePrefix(Double.NegativeInfinity)
     val minValue = 
PrefixComparators.DoublePrefixComparator.computePrefix(Double.MinValue)
     val maxValue = 
PrefixComparators.DoublePrefixComparator.computePrefix(Double.MaxValue)
     val zero = PrefixComparators.DoublePrefixComparator.computePrefix(0.0)
+    val minusZero = 
PrefixComparators.DoublePrefixComparator.computePrefix(-0.0)
+
+    // null is greater than everything including NaN, when we need to treat it 
as the largest value.
+    assert(PrefixComparators.DOUBLE.compare(largestNullPrefix, nan) === 1)
+    // NaN is greater than the positive infinity.
     assert(PrefixComparators.DOUBLE.compare(nan, posInf) === 1)
     assert(PrefixComparators.DOUBLE.compare(posInf, maxValue) === 1)
     assert(PrefixComparators.DOUBLE.compare(maxValue, zero) === 1)
     assert(PrefixComparators.DOUBLE.compare(zero, minValue) === 1)
     assert(PrefixComparators.DOUBLE.compare(minValue, negInf) === 1)
-    assert(PrefixComparators.DOUBLE.compare(negInf, nullValue) === 1)
+    // null is smaller than everything including negative infinity, when we 
need to treat it as
+    // the smallest value.
+    assert(PrefixComparators.DOUBLE.compare(negInf, smallestNullPrefix) === 1)
+    // 0.0 should be equal to -0.0.
+    assert(PrefixComparators.DOUBLE.compare(zero, minusZero) === 0)
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to