spark git commit: [SPARK-16372][MLLIB] Retag RDD to tallSkinnyQR of RowMatrix
Repository: spark Updated Branches: refs/heads/master 986b25140 -> 4c6f00d09 [SPARK-16372][MLLIB] Retag RDD to tallSkinnyQR of RowMatrix ## What changes were proposed in this pull request? The following Java code because of type erasing: ```Java JavaRDD rows = jsc.parallelize(...); RowMatrix mat = new RowMatrix(rows.rdd()); QRDecompositionresult = mat.tallSkinnyQR(true); ``` We should use retag to restore the type to prevent the following exception: ```Java java.lang.ClassCastException: [Ljava.lang.Object; cannot be cast to [Lorg.apache.spark.mllib.linalg.Vector; ``` ## How was this patch tested? Java unit test Author: Xusen Yin Closes #14051 from yinxusen/SPARK-16372. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4c6f00d0 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4c6f00d0 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4c6f00d0 Branch: refs/heads/master Commit: 4c6f00d09c016dfc1d2de6e694dff219c9027fa0 Parents: 986b251 Author: Xusen Yin Authored: Thu Jul 7 11:28:04 2016 +0100 Committer: Sean Owen Committed: Thu Jul 7 11:28:04 2016 +0100 -- .../spark/mllib/api/python/PythonMLLibAPI.scala | 2 +- .../mllib/linalg/distributed/RowMatrix.scala| 2 +- .../linalg/distributed/JavaRowMatrixSuite.java | 44 3 files changed, 46 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4c6f00d0/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala index f4819f7..a80cca7 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala @@ -1127,7 +1127,7 @@ private[python] class PythonMLLibAPI extends Serializable { * Wrapper around RowMatrix constructor. */ def createRowMatrix(rows: JavaRDD[Vector], numRows: Long, numCols: Int): RowMatrix = { -new RowMatrix(rows.rdd.retag(classOf[Vector]), numRows, numCols) +new RowMatrix(rows.rdd, numRows, numCols) } /** http://git-wip-us.apache.org/repos/asf/spark/blob/4c6f00d0/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala index cd5209d..1c94479 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala @@ -537,7 +537,7 @@ class RowMatrix @Since("1.0.0") ( def tallSkinnyQR(computeQ: Boolean = false): QRDecomposition[RowMatrix, Matrix] = { val col = numCols().toInt // split rows horizontally into smaller matrices, and compute QR for each of them -val blockQRs = rows.glom().map { partRows => +val blockQRs = rows.retag(classOf[Vector]).glom().map { partRows => val bdm = BDM.zeros[Double](partRows.length, col) var i = 0 partRows.foreach { row => http://git-wip-us.apache.org/repos/asf/spark/blob/4c6f00d0/mllib/src/test/java/org/apache/spark/mllib/linalg/distributed/JavaRowMatrixSuite.java -- diff --git a/mllib/src/test/java/org/apache/spark/mllib/linalg/distributed/JavaRowMatrixSuite.java b/mllib/src/test/java/org/apache/spark/mllib/linalg/distributed/JavaRowMatrixSuite.java new file mode 100644 index 000..c01af40 --- /dev/null +++ b/mllib/src/test/java/org/apache/spark/mllib/linalg/distributed/JavaRowMatrixSuite.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + *
spark git commit: [SPARK-16372][MLLIB] Retag RDD to tallSkinnyQR of RowMatrix
Repository: spark Updated Branches: refs/heads/branch-1.6 2588776ad -> 45dda9221 [SPARK-16372][MLLIB] Retag RDD to tallSkinnyQR of RowMatrix ## What changes were proposed in this pull request? The following Java code because of type erasing: ```Java JavaRDD rows = jsc.parallelize(...); RowMatrix mat = new RowMatrix(rows.rdd()); QRDecompositionresult = mat.tallSkinnyQR(true); ``` We should use retag to restore the type to prevent the following exception: ```Java java.lang.ClassCastException: [Ljava.lang.Object; cannot be cast to [Lorg.apache.spark.mllib.linalg.Vector; ``` ## How was this patch tested? Java unit test Author: Xusen Yin Closes #14051 from yinxusen/SPARK-16372. (cherry picked from commit 4c6f00d09c016dfc1d2de6e694dff219c9027fa0) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/45dda922 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/45dda922 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/45dda922 Branch: refs/heads/branch-1.6 Commit: 45dda92214191310a56333a2085e2343eba170cd Parents: 2588776 Author: Xusen Yin Authored: Thu Jul 7 11:28:04 2016 +0100 Committer: Sean Owen Committed: Thu Jul 7 11:28:29 2016 +0100 -- .../spark/mllib/api/python/PythonMLLibAPI.scala | 2 +- .../mllib/linalg/distributed/RowMatrix.scala| 2 +- .../linalg/distributed/JavaRowMatrixSuite.java | 44 3 files changed, 46 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/45dda922/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala index 1714983..a059e38 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala @@ -1110,7 +1110,7 @@ private[python] class PythonMLLibAPI extends Serializable { * Wrapper around RowMatrix constructor. */ def createRowMatrix(rows: JavaRDD[Vector], numRows: Long, numCols: Int): RowMatrix = { -new RowMatrix(rows.rdd.retag(classOf[Vector]), numRows, numCols) +new RowMatrix(rows.rdd, numRows, numCols) } /** http://git-wip-us.apache.org/repos/asf/spark/blob/45dda922/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala index 52c0f19..b941d1f 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala @@ -526,7 +526,7 @@ class RowMatrix @Since("1.0.0") ( def tallSkinnyQR(computeQ: Boolean = false): QRDecomposition[RowMatrix, Matrix] = { val col = numCols().toInt // split rows horizontally into smaller matrices, and compute QR for each of them -val blockQRs = rows.glom().map { partRows => +val blockQRs = rows.retag(classOf[Vector]).glom().map { partRows => val bdm = BDM.zeros[Double](partRows.length, col) var i = 0 partRows.foreach { row => http://git-wip-us.apache.org/repos/asf/spark/blob/45dda922/mllib/src/test/java/org/apache/spark/mllib/linalg/distributed/JavaRowMatrixSuite.java -- diff --git a/mllib/src/test/java/org/apache/spark/mllib/linalg/distributed/JavaRowMatrixSuite.java b/mllib/src/test/java/org/apache/spark/mllib/linalg/distributed/JavaRowMatrixSuite.java new file mode 100644 index 000..c01af40 --- /dev/null +++ b/mllib/src/test/java/org/apache/spark/mllib/linalg/distributed/JavaRowMatrixSuite.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR