Github user dbtsai commented on a diff in the pull request: https://github.com/apache/spark/pull/15628#discussion_r87669463 --- Diff: mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala --- @@ -153,6 +153,86 @@ sealed trait Matrix extends Serializable { */ @Since("2.0.0") def numActives: Int + + /** + * Converts this matrix to a sparse matrix. + * + * @param columnMajor Whether the values of the resulting sparse matrix should be in column major + * or row major order. If `false`, resulting matrix will be row major. + */ + private[ml] def toSparseMatrix(columnMajor: Boolean): SparseMatrix + + /** + * Converts this matrix to a sparse matrix in column major order. + */ + @Since("2.1.0") + def toSparse: SparseMatrix = toSparseMatrix(columnMajor = true) + + /** + * Converts this matrix to a dense matrix. + * + * @param columnMajor Whether the values of the resulting dense matrix should be in column major + * or row major order. If `false`, resulting matrix will be row major. + */ + private [ml] def toDenseMatrix(columnMajor: Boolean): DenseMatrix + + /** + * Converts this matrix to a dense matrix in column major order. + */ + @Since("2.1.0") + def toDense: DenseMatrix = toDenseMatrix(columnMajor = true) + + /** + * Returns a matrix in either dense or sparse format, whichever uses less storage. + * + * @param columnMajor Whether the values of the resulting matrix should be in column major + * or row major order. If `false`, resulting matrix will be row major. + */ + @Since("2.1.0") + def compressed(columnMajor: Boolean): Matrix = { + if (getDenseSizeInBytes < getSparseSizeInBytes(columnMajor)) { + toDenseMatrix(columnMajor) + } else { + toSparseMatrix(columnMajor) + } + } + + /** + * Returns a matrix in dense column major, dense row major, sparse row major, or sparse column + * major format, whichever uses less storage. When dense representation is optimal, it maintains + * the current layout order. + */ + @Since("2.1.0") + def compressed: Matrix = { + val cscSize = getSparseSizeInBytes(columnMajor = true) + val csrSize = getSparseSizeInBytes(columnMajor = false) + val minSparseSize = cscSize.min(csrSize) + if (getDenseSizeInBytes < minSparseSize) { + // size is the same either way, so maintain current layout --- End diff -- ``` scala if (getDenseSizeInBytes < math.min(cscSize, csrSize)) ... ... if (cscSize < csrSize) ``` could be easier to read. Also, can you elaborate the comment like ``` // sizes for dense matrix in row major or column major are the same, so maintain current layout ```
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org