Repository: spark Updated Branches: refs/heads/branch-1.1 685bdd2b7 -> 4b1c77cbf
[branch-1.1][SPARK-4355] OnlineSummarizer doesn't merge mean correctly andrewor14 This backports the bug fix in #3220 . It would be good if we can get it in 1.1.1. But this is minor. Author: Xiangrui Meng <m...@databricks.com> Closes #3251 from mengxr/SPARK-4355-1.1 and squashes the following commits: 33886b6 [Xiangrui Meng] Merge remote-tracking branch 'apache/branch-1.1' into SPARK-4355-1.1 91fe1a3 [Xiangrui Meng] fix OnlineSummarizer.merge when other.mean is zero Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4b1c77cb Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4b1c77cb Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4b1c77cb Branch: refs/heads/branch-1.1 Commit: 4b1c77cbf59ccc752bc0d0291df3550cbfbe730c Parents: 685bdd2 Author: Xiangrui Meng <m...@databricks.com> Authored: Thu Nov 13 15:36:03 2014 -0800 Committer: Xiangrui Meng <m...@databricks.com> Committed: Thu Nov 13 15:36:03 2014 -0800 ---------------------------------------------------------------------- .../stat/MultivariateOnlineSummarizer.scala | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/4b1c77cb/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala index 7d845c4..f23eb5b 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala @@ -104,21 +104,19 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S val deltaMean: BDV[Double] = currMean - other.currMean var i = 0 while (i < n) { - // merge mean together - if (other.currMean(i) != 0.0) { + if (nnz(i) + other.nnz(i) != 0.0) { + // merge mean together currMean(i) = (currMean(i) * nnz(i) + other.currMean(i) * other.nnz(i)) / (nnz(i) + other.nnz(i)) - } - // merge m2n together - if (nnz(i) + other.nnz(i) != 0.0) { + // merge m2n together currM2n(i) += other.currM2n(i) + deltaMean(i) * deltaMean(i) * nnz(i) * other.nnz(i) / (nnz(i) + other.nnz(i)) - } - if (currMax(i) < other.currMax(i)) { - currMax(i) = other.currMax(i) - } - if (currMin(i) > other.currMin(i)) { - currMin(i) = other.currMin(i) + if (currMax(i) < other.currMax(i)) { + currMax(i) = other.currMax(i) + } + if (currMin(i) > other.currMin(i)) { + currMin(i) = other.currMin(i) + } } i += 1 } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org