Repository: spark Updated Branches: refs/heads/branch-2.0 5b003c9bc -> 579268426
[SPARK-15922][MLLIB] `toIndexedRowMatrix` should consider the case `cols < offset+colsPerBlock` ## What changes were proposed in this pull request? SPARK-15922 reports the following scenario throwing an exception due to the mismatched vector sizes. This PR handles the exceptional case, `cols < (offset + colsPerBlock)`. **Before** ```scala scala> import org.apache.spark.mllib.linalg.distributed._ scala> import org.apache.spark.mllib.linalg._ scala> val rows = IndexedRow(0L, new DenseVector(Array(1,2,3))) :: IndexedRow(1L, new DenseVector(Array(1,2,3))):: IndexedRow(2L, new DenseVector(Array(1,2,3))):: Nil scala> val rdd = sc.parallelize(rows) scala> val matrix = new IndexedRowMatrix(rdd, 3, 3) scala> val bmat = matrix.toBlockMatrix scala> val imat = bmat.toIndexedRowMatrix scala> imat.rows.collect ... // java.lang.IllegalArgumentException: requirement failed: Vectors must be the same length! ``` **After** ```scala ... scala> imat.rows.collect res0: Array[org.apache.spark.mllib.linalg.distributed.IndexedRow] = Array(IndexedRow(0,[1.0,2.0,3.0]), IndexedRow(1,[1.0,2.0,3.0]), IndexedRow(2,[1.0,2.0,3.0])) ``` ## How was this patch tested? Pass the Jenkins tests (including the above case) Author: Dongjoon Hyun <dongj...@apache.org> Closes #13643 from dongjoon-hyun/SPARK-15922. (cherry picked from commit 36110a8306608186696c536028d2776e022d305a) Signed-off-by: Sean Owen <so...@cloudera.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/57926842 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/57926842 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/57926842 Branch: refs/heads/branch-2.0 Commit: 5792684268b273562e694855eb671c21c4044280 Parents: 5b003c9 Author: Dongjoon Hyun <dongj...@apache.org> Authored: Thu Jun 16 23:02:46 2016 +0200 Committer: Sean Owen <so...@cloudera.com> Committed: Thu Jun 16 23:03:00 2016 +0200 ---------------------------------------------------------------------- .../org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala | 2 +- .../spark/mllib/linalg/distributed/BlockMatrixSuite.scala | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/57926842/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala index 7a24617..639295c 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala @@ -288,7 +288,7 @@ class BlockMatrix @Since("1.3.0") ( vectors.foreach { case (blockColIdx: Int, vec: BV[Double]) => val offset = colsPerBlock * blockColIdx - wholeVector(offset until offset + colsPerBlock) := vec + wholeVector(offset until Math.min(cols, offset + colsPerBlock)) := vec } new IndexedRow(rowIdx, Vectors.fromBreeze(wholeVector)) } http://git-wip-us.apache.org/repos/asf/spark/blob/57926842/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala index e5a2cbb..61266f3 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala @@ -135,6 +135,11 @@ class BlockMatrixSuite extends SparkFunSuite with MLlibTestSparkContext { assert(rowMat.numCols() === n) assert(rowMat.toBreeze() === gridBasedMat.toBreeze()) + // SPARK-15922: BlockMatrix to IndexedRowMatrix throws an error" + val bmat = rowMat.toBlockMatrix + val imat = bmat.toIndexedRowMatrix + imat.rows.collect + val rows = 1 val cols = 10 --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org