Repository: spark Updated Branches: refs/heads/master 5410747a8 -> 5994cfe81
[SPARK-10875] [MLLIB] Computed covariance matrix should be symmetric Compute upper triangular values of the covariance matrix, then copy to lower triangular values. Author: Nick Pritchard <nicholas.pritch...@falkonry.com> Closes #8940 from pnpritchard/SPARK-10875. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5994cfe8 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5994cfe8 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5994cfe8 Branch: refs/heads/master Commit: 5994cfe81271a39294aa29fd47aa94c99aa56743 Parents: 5410747 Author: Nick Pritchard <nicholas.pritch...@falkonry.com> Authored: Thu Oct 8 22:22:20 2015 -0700 Committer: Xiangrui Meng <m...@databricks.com> Committed: Thu Oct 8 22:22:20 2015 -0700 ---------------------------------------------------------------------- .../mllib/linalg/distributed/RowMatrix.scala | 6 ++++-- .../mllib/linalg/distributed/RowMatrixSuite.scala | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/5994cfe8/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala index 7c7d900..b8a7adc 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala @@ -357,9 +357,11 @@ class RowMatrix @Since("1.0.0") ( var alpha = 0.0 while (i < n) { alpha = m / m1 * mean(i) - j = 0 + j = i while (j < n) { - G(i, j) = G(i, j) / m1 - alpha * mean(j) + val Gij = G(i, j) / m1 - alpha * mean(j) + G(i, j) = Gij + G(j, i) = Gij j += 1 } i += 1 http://git-wip-us.apache.org/repos/asf/spark/blob/5994cfe8/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala index 283ffec..4abb98f 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala @@ -24,6 +24,7 @@ import breeze.linalg.{DenseVector => BDV, DenseMatrix => BDM, norm => brzNorm, s import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.linalg.{Matrices, Vectors, Vector} +import org.apache.spark.mllib.random.RandomRDDs import org.apache.spark.mllib.util.{LocalClusterSparkContext, MLlibTestSparkContext} class RowMatrixSuite extends SparkFunSuite with MLlibTestSparkContext { @@ -255,6 +256,23 @@ class RowMatrixSuite extends SparkFunSuite with MLlibTestSparkContext { assert(closeToZero(abs(expected.r) - abs(rOnly.R.toBreeze.asInstanceOf[BDM[Double]]))) } } + + test("compute covariance") { + for (mat <- Seq(denseMat, sparseMat)) { + val result = mat.computeCovariance() + val expected = breeze.linalg.cov(mat.toBreeze()) + assert(closeToZero(abs(expected) - abs(result.toBreeze.asInstanceOf[BDM[Double]]))) + } + } + + test("covariance matrix is symmetric (SPARK-10875)") { + val rdd = RandomRDDs.normalVectorRDD(sc, 100, 10, 0, 0) + val matrix = new RowMatrix(rdd) + val cov = matrix.computeCovariance() + for (i <- 0 until cov.numRows; j <- 0 until i) { + assert(cov(i, j) === cov(j, i)) + } + } } class RowMatrixClusterSuite extends SparkFunSuite with LocalClusterSparkContext { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org