Repository: spark
Updated Branches:
  refs/heads/master f9bf15d9b -> 36110a830


[SPARK-15922][MLLIB] `toIndexedRowMatrix` should consider the case `cols < 
offset+colsPerBlock`

## What changes were proposed in this pull request?

SPARK-15922 reports the following scenario throwing an exception due to the 
mismatched vector sizes. This PR handles the exceptional case, `cols < (offset 
+ colsPerBlock)`.

**Before**
```scala
scala> import org.apache.spark.mllib.linalg.distributed._
scala> import org.apache.spark.mllib.linalg._
scala> val rows = IndexedRow(0L, new DenseVector(Array(1,2,3))) :: 
IndexedRow(1L, new DenseVector(Array(1,2,3))):: IndexedRow(2L, new 
DenseVector(Array(1,2,3))):: Nil
scala> val rdd = sc.parallelize(rows)
scala> val matrix = new IndexedRowMatrix(rdd, 3, 3)
scala> val bmat = matrix.toBlockMatrix
scala> val imat = bmat.toIndexedRowMatrix
scala> imat.rows.collect
... // java.lang.IllegalArgumentException: requirement failed: Vectors must be 
the same length!
```

**After**
```scala
...
scala> imat.rows.collect
res0: Array[org.apache.spark.mllib.linalg.distributed.IndexedRow] = 
Array(IndexedRow(0,[1.0,2.0,3.0]), IndexedRow(1,[1.0,2.0,3.0]), 
IndexedRow(2,[1.0,2.0,3.0]))
```

## How was this patch tested?

Pass the Jenkins tests (including the above case)

Author: Dongjoon Hyun <dongj...@apache.org>

Closes #13643 from dongjoon-hyun/SPARK-15922.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/36110a83
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/36110a83
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/36110a83

Branch: refs/heads/master
Commit: 36110a8306608186696c536028d2776e022d305a
Parents: f9bf15d
Author: Dongjoon Hyun <dongj...@apache.org>
Authored: Thu Jun 16 23:02:46 2016 +0200
Committer: Sean Owen <so...@cloudera.com>
Committed: Thu Jun 16 23:02:46 2016 +0200

----------------------------------------------------------------------
 .../org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala | 2 +-
 .../spark/mllib/linalg/distributed/BlockMatrixSuite.scala       | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/36110a83/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
 
b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
index 7a24617..639295c 100644
--- 
a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
@@ -288,7 +288,7 @@ class BlockMatrix @Since("1.3.0") (
 
       vectors.foreach { case (blockColIdx: Int, vec: BV[Double]) =>
         val offset = colsPerBlock * blockColIdx
-        wholeVector(offset until offset + colsPerBlock) := vec
+        wholeVector(offset until Math.min(cols, offset + colsPerBlock)) := vec
       }
       new IndexedRow(rowIdx, Vectors.fromBreeze(wholeVector))
     }

http://git-wip-us.apache.org/repos/asf/spark/blob/36110a83/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala
index e5a2cbb..61266f3 100644
--- 
a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala
@@ -135,6 +135,11 @@ class BlockMatrixSuite extends SparkFunSuite with 
MLlibTestSparkContext {
     assert(rowMat.numCols() === n)
     assert(rowMat.toBreeze() === gridBasedMat.toBreeze())
 
+    // SPARK-15922: BlockMatrix to IndexedRowMatrix throws an error"
+    val bmat = rowMat.toBlockMatrix
+    val imat = bmat.toIndexedRowMatrix
+    imat.rows.collect
+
     val rows = 1
     val cols = 10
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to