I'm trying to compute the Frobenius norm error in approximating an IndexedRowMatrix A with the product L*R where L and R are Breeze DenseMatrices.
I've written the following function that computes the squared error over each partition of rows then sums up to get the total squared error (ignore the mean argument, it is not used). It works on a smaller dataset that I've been using to test my code, but fails on the full-sized A with an error about the java heap being out of size. A here is an 8mil-by-100K matrix partitioned into 5015 parts, L is 8mil-by-16, and R is 16-by-100K. Together L and R take up way less memory than I have on each executor (64 Gb), so I don't understand the cause of the error. def calcCenteredFrobNormErr(mat: IndexedRowMatrix, lhsTall: BDM[Double], rhsFat: BDM[Double], mean: BDV[Double] ) : Double = { val lhsFactor = mat.rows.context.broadcast(lhsTall) val rhsFactor = mat.rows.context.broadcast(rhsFat) def partitionDiffFrobNorm2(rowiter : Iterator[IndexedRow], lhsFactor: Broadcast[BDM[Double]], rhsFactor: Broadcast[BDM[Double]]) : Iterator[Double] = { val lhsTall = lhsFactor.value val rhsFat = rhsFactor.value val rowlist = rowiter.toList val numrows = rowlist.length val matSubMat = BDM.zeros[Double](numrows, mat.numCols.toInt) val lhsSubMat = BDM.zeros[Double](numrows, lhsTall.cols) var currowindex = 0 rowlist.foreach( (currow: IndexedRow) => { currow.vector.foreachActive { case (j, v) => matSubMat(currowindex, j) = v } lhsSubMat(currowindex, ::) := lhsTall(currow.index.toInt, ::) currowindex += 1 } ) val diffmat = matSubMat - lhsSubMat * rhsFat List(sum(diffmat :* diffmat)).iterator } report("Beginning to compute Frobenius norm", true) val res = mat.rows.mapPartitions(rowiter => partitionDiffFrobNorm2(rowiter, lhsFactor, rhsFactor)).reduce(_ + _) report("Finished computing Frobenius norm", true) math.sqrt(res) } -- View this message in context: http://apache-spark-user-list.1001560.n3.nabble.com/java-heap-error-tp23856.html Sent from the Apache Spark User List mailing list archive at Nabble.com. --------------------------------------------------------------------- To unsubscribe, e-mail: user-unsubscr...@spark.apache.org For additional commands, e-mail: user-h...@spark.apache.org