Repository: spark
Updated Branches:
  refs/heads/master 3cac6614a -> 8b292b19c


[SPARK-10654][MLLIB] Add columnSimilarities to IndexedRowMatrix

Add columnSimilarities to IndexedRowMatrix by delegating to functionality 
already in RowMatrix.

With a test.

Author: Reza Zadeh <r...@databricks.com>

Closes #8792 from rezazadeh/colsims.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8b292b19
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8b292b19
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8b292b19

Branch: refs/heads/master
Commit: 8b292b19c9b3aaaa51b919a12132e099e5be832d
Parents: 3cac661
Author: Reza Zadeh <r...@databricks.com>
Authored: Mon Oct 26 22:00:24 2015 -0700
Committer: Xiangrui Meng <m...@databricks.com>
Committed: Mon Oct 26 22:00:24 2015 -0700

----------------------------------------------------------------------
 .../mllib/linalg/distributed/IndexedRowMatrix.scala    | 13 +++++++++++++
 .../linalg/distributed/IndexedRowMatrixSuite.scala     | 12 ++++++++++++
 2 files changed, 25 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/8b292b19/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
 
b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
index e6af0c0..9762991 100644
--- 
a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
@@ -68,6 +68,19 @@ class IndexedRowMatrix @Since("1.0.0") (
     nRows
   }
 
+
+  /**
+   * Compute all cosine similarities between columns of this matrix using the 
brute-force
+   * approach of computing normalized dot products.
+   *
+   * @return An n x n sparse upper-triangular matrix of cosine similarities 
between
+   *         columns of this matrix.
+   */
+  @Since("1.6.0")
+  def columnSimilarities(): CoordinateMatrix = {
+    toRowMatrix().columnSimilarities()
+  }
+
   /**
    * Drops row indices and converts this matrix to a
    * [[org.apache.spark.mllib.linalg.distributed.RowMatrix]].

http://git-wip-us.apache.org/repos/asf/spark/blob/8b292b19/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala
index 0ecb7a2..6de6cf2 100644
--- 
a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala
@@ -153,6 +153,18 @@ class IndexedRowMatrixSuite extends SparkFunSuite with 
MLlibTestSparkContext {
     }
   }
 
+  test("similar columns") {
+    val A = new IndexedRowMatrix(indexedRows)
+    val gram = A.computeGramianMatrix().toBreeze.toDenseMatrix
+
+    val G = A.columnSimilarities().toBreeze()
+
+    for (i <- 0 until n; j <- i + 1 until n) {
+      val trueResult = gram(i, j) / scala.math.sqrt(gram(i, i) * gram(j, j))
+      assert(math.abs(G(i, j) - trueResult) < 1e-6)
+    }
+  }
+
   def closeToZero(G: BDM[Double]): Boolean = {
     G.valuesIterator.map(math.abs).sum < 1e-6
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to