Repository: spark Updated Branches: refs/heads/branch-1.4 dfd905df5 -> 51d98b0e9
[SPARK-7844] [MLLIB] Fix broken tests in KernelDensity The densities in KernelDensity are scaled down by (number of parallel processes X number of points). It should be just no.of samples. This results in broken tests in KernelDensitySuite which haven't been tested properly. Author: MechCoder <manojkumarsivaraj...@gmail.com> Closes #6383 from MechCoder/spark-7844 and squashes the following commits: ab81302 [MechCoder] Math->math 9b8ed50 [MechCoder] Make one pass to update count a92fe50 [MechCoder] [SPARK-7844] Fix broken tests in KernelDensity (cherry picked from commit 61664732b25b35f94be35a42cde651cbfd0e02b7) Signed-off-by: Xiangrui Meng <m...@databricks.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/51d98b0e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/51d98b0e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/51d98b0e Branch: refs/heads/branch-1.4 Commit: 51d98b0e97c97a7eca2d4ff2fc14b9cfe9af9e2f Parents: dfd905d Author: MechCoder <manojkumarsivaraj...@gmail.com> Authored: Tue May 26 13:21:00 2015 -0700 Committer: Xiangrui Meng <m...@databricks.com> Committed: Tue May 26 13:22:42 2015 -0700 ---------------------------------------------------------------------- .../scala/org/apache/spark/mllib/stat/KernelDensity.scala | 2 +- .../org/apache/spark/mllib/stat/KernelDensitySuite.scala | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/51d98b0e/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala index a6bfe26..58a50f9 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala @@ -93,7 +93,7 @@ class KernelDensity extends Serializable { x._1(i) += normPdf(y, bandwidth, logStandardDeviationPlusHalfLog2Pi, points(i)) i += 1 } - (x._1, n) + (x._1, x._2 + 1) }, (x, y) => { blas.daxpy(n, 1.0, y._1, 1, x._1, 1) http://git-wip-us.apache.org/repos/asf/spark/blob/51d98b0e/mllib/src/test/scala/org/apache/spark/mllib/stat/KernelDensitySuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/mllib/stat/KernelDensitySuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/stat/KernelDensitySuite.scala index 14bb1ce..a309c94 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/stat/KernelDensitySuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/stat/KernelDensitySuite.scala @@ -29,8 +29,8 @@ class KernelDensitySuite extends FunSuite with MLlibTestSparkContext { val densities = new KernelDensity().setSample(rdd).setBandwidth(3.0).estimate(evaluationPoints) val normal = new NormalDistribution(5.0, 3.0) val acceptableErr = 1e-6 - assert(densities(0) - normal.density(5.0) < acceptableErr) - assert(densities(0) - normal.density(6.0) < acceptableErr) + assert(math.abs(densities(0) - normal.density(5.0)) < acceptableErr) + assert(math.abs(densities(1) - normal.density(6.0)) < acceptableErr) } test("kernel density multiple samples") { @@ -40,7 +40,9 @@ class KernelDensitySuite extends FunSuite with MLlibTestSparkContext { val normal1 = new NormalDistribution(5.0, 3.0) val normal2 = new NormalDistribution(10.0, 3.0) val acceptableErr = 1e-6 - assert(densities(0) - (normal1.density(5.0) + normal2.density(5.0)) / 2 < acceptableErr) - assert(densities(0) - (normal1.density(6.0) + normal2.density(6.0)) / 2 < acceptableErr) + assert(math.abs( + densities(0) - (normal1.density(5.0) + normal2.density(5.0)) / 2) < acceptableErr) + assert(math.abs( + densities(1) - (normal1.density(6.0) + normal2.density(6.0)) / 2) < acceptableErr) } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org