Repository: spark
Updated Branches:
  refs/heads/branch-1.4 dfd905df5 -> 51d98b0e9


[SPARK-7844] [MLLIB] Fix broken tests in KernelDensity

The densities in KernelDensity are scaled down by
(number of parallel processes X number of points). It should be just no.of 
samples. This results in broken tests in KernelDensitySuite which haven't been 
tested properly.

Author: MechCoder <manojkumarsivaraj...@gmail.com>

Closes #6383 from MechCoder/spark-7844 and squashes the following commits:

ab81302 [MechCoder] Math->math
9b8ed50 [MechCoder] Make one pass to update count
a92fe50 [MechCoder] [SPARK-7844] Fix broken tests in KernelDensity

(cherry picked from commit 61664732b25b35f94be35a42cde651cbfd0e02b7)
Signed-off-by: Xiangrui Meng <m...@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/51d98b0e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/51d98b0e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/51d98b0e

Branch: refs/heads/branch-1.4
Commit: 51d98b0e97c97a7eca2d4ff2fc14b9cfe9af9e2f
Parents: dfd905d
Author: MechCoder <manojkumarsivaraj...@gmail.com>
Authored: Tue May 26 13:21:00 2015 -0700
Committer: Xiangrui Meng <m...@databricks.com>
Committed: Tue May 26 13:22:42 2015 -0700

----------------------------------------------------------------------
 .../scala/org/apache/spark/mllib/stat/KernelDensity.scala |  2 +-
 .../org/apache/spark/mllib/stat/KernelDensitySuite.scala  | 10 ++++++----
 2 files changed, 7 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/51d98b0e/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala
index a6bfe26..58a50f9 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala
@@ -93,7 +93,7 @@ class KernelDensity extends Serializable {
           x._1(i) += normPdf(y, bandwidth, logStandardDeviationPlusHalfLog2Pi, 
points(i))
           i += 1
         }
-        (x._1, n)
+        (x._1, x._2 + 1)
       },
       (x, y) => {
         blas.daxpy(n, 1.0, y._1, 1, x._1, 1)

http://git-wip-us.apache.org/repos/asf/spark/blob/51d98b0e/mllib/src/test/scala/org/apache/spark/mllib/stat/KernelDensitySuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/mllib/stat/KernelDensitySuite.scala 
b/mllib/src/test/scala/org/apache/spark/mllib/stat/KernelDensitySuite.scala
index 14bb1ce..a309c94 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/stat/KernelDensitySuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/stat/KernelDensitySuite.scala
@@ -29,8 +29,8 @@ class KernelDensitySuite extends FunSuite with 
MLlibTestSparkContext {
     val densities = new 
KernelDensity().setSample(rdd).setBandwidth(3.0).estimate(evaluationPoints)
     val normal = new NormalDistribution(5.0, 3.0)
     val acceptableErr = 1e-6
-    assert(densities(0) - normal.density(5.0) < acceptableErr)
-    assert(densities(0) - normal.density(6.0) < acceptableErr)
+    assert(math.abs(densities(0) - normal.density(5.0)) < acceptableErr)
+    assert(math.abs(densities(1) - normal.density(6.0)) < acceptableErr)
   }
 
   test("kernel density multiple samples") {
@@ -40,7 +40,9 @@ class KernelDensitySuite extends FunSuite with 
MLlibTestSparkContext {
     val normal1 = new NormalDistribution(5.0, 3.0)
     val normal2 = new NormalDistribution(10.0, 3.0)
     val acceptableErr = 1e-6
-    assert(densities(0) - (normal1.density(5.0) + normal2.density(5.0)) / 2 < 
acceptableErr)
-    assert(densities(0) - (normal1.density(6.0) + normal2.density(6.0)) / 2 < 
acceptableErr)
+    assert(math.abs(
+      densities(0) - (normal1.density(5.0) + normal2.density(5.0)) / 2) < 
acceptableErr)
+    assert(math.abs(
+      densities(1) - (normal1.density(6.0) + normal2.density(6.0)) / 2) < 
acceptableErr)
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to