This is an automated email from the ASF dual-hosted git repository.

srowen pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-2.4 by this push:
     new f36d0c5  [SPARK-26228][MLLIB] OOM issue encountered when computing 
Gramian matrix
f36d0c5 is described below

commit f36d0c56c256164f229b900778f593a0d8e4c7fc
Author: Sean Owen <sean.o...@databricks.com>
AuthorDate: Tue Jan 22 19:22:06 2019 -0600

    [SPARK-26228][MLLIB] OOM issue encountered when computing Gramian matrix
    
    Avoid memory problems in closure cleaning when handling large Gramians (>= 
16K rows/cols) by using null as zeroValue
    
    Existing tests.
    Note that it's hard to test the case that triggers this issue as it would 
require a large amount of memory and run a while. I confirmed locally that a 
16K x 16K Gramian failed with tons of driver memory before, and didn't fail 
upfront after this change.
    
    Closes #23600 from srowen/SPARK-26228.
    
    Authored-by: Sean Owen <sean.o...@databricks.com>
    Signed-off-by: Sean Owen <sean.o...@databricks.com>
    (cherry picked from commit 6dcad38ba3393188084f378b7ff6dfc12b685b13)
    Signed-off-by: Sean Owen <sean.o...@databricks.com>
---
 .../spark/mllib/linalg/distributed/RowMatrix.scala   | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
 
b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
index 78a8810..5109efb 100644
--- 
a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
@@ -119,11 +119,25 @@ class RowMatrix @Since("1.0.0") (
     val nt = if (n % 2 == 0) ((n / 2) * (n + 1)) else (n * ((n + 1) / 2))
 
     // Compute the upper triangular part of the gram matrix.
-    val GU = rows.treeAggregate(new BDV[Double](nt))(
-      seqOp = (U, v) => {
+    val GU = rows.treeAggregate(null.asInstanceOf[BDV[Double]])(
+      seqOp = (maybeU, v) => {
+        val U =
+          if (maybeU == null) {
+            new BDV[Double](nt)
+          } else {
+            maybeU
+          }
         BLAS.spr(1.0, v, U.data)
         U
-      }, combOp = (U1, U2) => U1 += U2)
+      }, combOp = (U1, U2) =>
+        if (U1 == null) {
+          U2
+        } else if (U2 == null) {
+          U1
+        } else {
+          U1 += U2
+        }
+    )
 
     RowMatrix.triuToFull(n, GU.data)
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to