Github user dbtsai commented on a diff in the pull request: https://github.com/apache/spark/pull/15628#discussion_r107844935 --- Diff: mllib-local/src/test/scala/org/apache/spark/ml/linalg/MatricesSuite.scala --- @@ -160,22 +160,395 @@ class MatricesSuite extends SparkMLFunSuite { assert(sparseMat.values(2) === 10.0) } - test("toSparse, toDense") { - val m = 3 - val n = 2 - val values = Array(1.0, 2.0, 4.0, 5.0) - val allValues = Array(1.0, 2.0, 0.0, 0.0, 4.0, 5.0) - val colPtrs = Array(0, 2, 4) - val rowIndices = Array(0, 1, 1, 2) + test("dense to dense") { + /* + dm1 = 4.0 2.0 -8.0 + -1.0 7.0 4.0 + + dm2 = 5.0 -9.0 4.0 + 1.0 -3.0 -8.0 + */ + val dm1 = new DenseMatrix(2, 3, Array(4.0, -1.0, 2.0, 7.0, -8.0, 4.0)) + val dm2 = new DenseMatrix(2, 3, Array(5.0, -9.0, 4.0, 1.0, -3.0, -8.0), isTransposed = true) + + val dm3 = dm1.toDense + assert(dm3 === dm1) + assert(!dm3.isTransposed) + assert(dm3.values.equals(dm1.values)) + + val dm4 = dm1.toDenseRowMajor + assert(dm4 === dm1) + assert(dm4.isTransposed) + assert(dm4.values === Array(4.0, 2.0, -8.0, -1.0, 7.0, 4.0)) + + val dm5 = dm2.toDenseColMajor + assert(dm5 === dm2) + assert(!dm5.isTransposed) + assert(dm5.values === Array(5.0, 1.0, -9.0, -3.0, 4.0, -8.0)) + + val dm6 = dm2.toDenseRowMajor + assert(dm6 === dm2) + assert(dm6.isTransposed) + assert(dm6.values.equals(dm2.values)) + + val dm7 = dm1.toDenseRowMajor + assert(dm7 === dm1) + assert(dm7.isTransposed) + assert(dm7.values === Array(4.0, 2.0, -8.0, -1.0, 7.0, 4.0)) + + val dm8 = dm1.toDenseColMajor + assert(dm8 === dm1) + assert(!dm8.isTransposed) + assert(dm8.values.equals(dm1.values)) + + val dm9 = dm2.toDense + assert(dm9 === dm2) + assert(dm9.isTransposed) + assert(dm9.values.equals(dm2.values)) + } - val spMat1 = new SparseMatrix(m, n, colPtrs, rowIndices, values) - val deMat1 = new DenseMatrix(m, n, allValues) + test("dense to sparse") { + /* + dm1 = 0.0 4.0 5.0 + 0.0 2.0 0.0 + + dm2 = 0.0 4.0 5.0 + 0.0 2.0 0.0 - val spMat2 = deMat1.toSparse - val deMat2 = spMat1.toDense + dm3 = 0.0 0.0 0.0 + 0.0 0.0 0.0 + */ + val dm1 = new DenseMatrix(2, 3, Array(0.0, 0.0, 4.0, 2.0, 5.0, 0.0)) + val dm2 = new DenseMatrix(2, 3, Array(0.0, 4.0, 5.0, 0.0, 2.0, 0.0), isTransposed = true) + val dm3 = new DenseMatrix(2, 3, Array(0.0, 0.0, 0.0, 0.0, 0.0, 0.0)) + + val sm1 = dm1.toSparseColMajor + assert(sm1 === dm1) + assert(!sm1.isTransposed) + assert(sm1.values === Array(4.0, 2.0, 5.0)) + + val sm2 = dm1.toSparseRowMajor + assert(sm2 === dm1) + assert(sm2.isTransposed) + assert(sm2.values === Array(4.0, 5.0, 2.0)) + + val sm3 = dm2.toSparseColMajor + assert(sm3 === dm2) + assert(!sm3.isTransposed) + assert(sm3.values === Array(4.0, 2.0, 5.0)) + + val sm4 = dm2.toSparseRowMajor + assert(sm4 === dm2) + assert(sm4.isTransposed) + assert(sm4.values === Array(4.0, 5.0, 2.0)) + + val sm5 = dm3.toSparseColMajor + assert(sm5 === dm3) + assert(sm5.values === Array.empty[Double]) + assert(!sm5.isTransposed) + + val sm6 = dm3.toSparseRowMajor + assert(sm6 === dm3) + assert(sm6.values === Array.empty[Double]) + assert(sm6.isTransposed) + + val sm7 = dm1.toSparse + assert(sm7 === dm1) + assert(sm7.values === Array(4.0, 2.0, 5.0)) + assert(!sm7.isTransposed) + + val sm8 = dm1.toSparseColMajor + assert(sm8 === dm1) + assert(sm8.values === Array(4.0, 2.0, 5.0)) + assert(!sm8.isTransposed) + + val sm9 = dm2.toSparseRowMajor + assert(sm9 === dm2) + assert(sm9.values === Array(4.0, 5.0, 2.0)) + assert(sm9.isTransposed) + + val sm10 = dm2.toSparse + assert(sm10 === dm2) + assert(sm10.values === Array(4.0, 5.0, 2.0)) + assert(sm10.isTransposed) + } + + test("sparse to sparse") { + /* + sm1 = sm2 = sm3 = sm4 = 0.0 4.0 5.0 + 0.0 2.0 0.0 + smZeros = 0.0 0.0 0.0 + 0.0 0.0 0.0 + */ + val sm1 = new SparseMatrix(2, 3, Array(0, 0, 2, 3), Array(0, 1, 0), Array(4.0, 2.0, 5.0)) + val sm2 = new SparseMatrix(2, 3, Array(0, 2, 3), Array(1, 2, 1), Array(4.0, 5.0, 2.0), + isTransposed = true) + val sm3 = new SparseMatrix(2, 3, Array(0, 0, 2, 4), Array(0, 1, 0, 1), + Array(4.0, 2.0, 5.0, 0.0)) + val sm4 = new SparseMatrix(2, 3, Array(0, 2, 4), Array(1, 2, 1, 2), + Array(4.0, 5.0, 2.0, 0.0), isTransposed = true) + val smZeros = new SparseMatrix(2, 3, Array(0, 2, 4, 6), Array(0, 1, 0, 1, 0, 1), + Array(0.0, 0.0, 0.0, 0.0, 0.0, 0.0)) + + val sm5 = sm1.toSparseRowMajor + assert(sm5 === sm1) + assert(sm5.isTransposed) + assert(sm5.values === Array(4.0, 5.0, 2.0)) + + val sm6 = sm1.toSparseColMajor + assert(sm6 === sm1) + assert(!sm6.isTransposed) + assert(sm6.values.equals(sm1.values)) + + val sm7 = sm2.toSparseColMajor + assert(sm7 === sm2) + assert(!sm7.isTransposed) + assert(sm7.values === Array(4.0, 2.0, 5.0)) + + val sm8 = sm2.toSparseRowMajor + assert(sm8 === sm2) + assert(sm8.isTransposed) + assert(sm8.values.equals(sm2.values)) + + val sm9 = sm3.toSparse + assert(sm9 === sm3) + assert(sm9.values === Array(4.0, 2.0, 5.0)) + assert(!sm9.isTransposed) + + val sm10 = sm3.toSparseRowMajor + assert(sm10 === sm3) + assert(sm10.values === Array(4.0, 5.0, 2.0)) + assert(sm10.isTransposed) + + val sm11 = sm4.toSparseRowMajor + assert(sm11 === sm4) + assert(sm11.values === Array(4.0, 5.0, 2.0)) + assert(sm11.isTransposed) + + val sm12 = sm4.toSparse + assert(sm12 === sm4) + assert(sm12.values === Array(4.0, 5.0, 2.0)) + assert(sm12.isTransposed) + + val sm13 = smZeros.toSparse + assert(sm13 === smZeros) + assert(sm13.values === Array.empty[Double]) + assert(!sm13.isTransposed) + + val sm14 = sm4.toSparseColMajor + assert(sm14 === sm4) + assert(sm14.values === Array(4.0, 2.0, 5.0)) + assert(!sm14.isTransposed) + + val sm15 = smZeros.toSparseColMajor + assert(sm15 === smZeros) + assert(sm15.values === Array.empty[Double]) + assert(!sm15.isTransposed) + + val sm16 = sm3.toSparseRowMajor + assert(sm16 === sm4) + assert(sm16.values === Array(4.0, 5.0, 2.0)) + assert(sm16.isTransposed) + + val sm17 = smZeros.toSparseRowMajor + assert(sm17 === smZeros) + assert(sm17.values === Array.empty[Double]) + assert(sm17.isTransposed) + } + + test("sparse to dense") { + /* + sm1 = sm2 = 0.0 4.0 5.0 + 0.0 2.0 0.0 + + sm3 = 0.0 0.0 0.0 + 0.0 0.0 0.0 + */ + val sm1 = new SparseMatrix(2, 3, Array(0, 0, 2, 3), Array(0, 1, 0), Array(4.0, 2.0, 5.0)) + val sm2 = new SparseMatrix(2, 3, Array(0, 2, 3), Array(1, 2, 1), Array(4.0, 5.0, 2.0), + isTransposed = true) + val sm3 = new SparseMatrix(2, 3, Array(0, 0, 0, 0), Array.empty[Int], Array.empty[Double]) + + val dm1 = sm1.toDense + assert(dm1 === sm1) + assert(!dm1.isTransposed) + assert(dm1.values === Array(0.0, 0.0, 4.0, 2.0, 5.0, 0.0)) + + val dm2 = sm1.toDenseRowMajor + assert(dm2 === sm1) + assert(dm2.isTransposed) + assert(dm2.values === Array(0.0, 4.0, 5.0, 0.0, 2.0, 0.0)) + + val dm3 = sm2.toDense + assert(dm3 === sm2) + assert(dm3.isTransposed) + assert(dm3.values === Array(0.0, 4.0, 5.0, 0.0, 2.0, 0.0)) + + val dm4 = sm2.toDenseRowMajor + assert(dm4 === sm2) + assert(dm4.isTransposed) + assert(dm4.values === Array(0.0, 4.0, 5.0, 0.0, 2.0, 0.0)) + + val dm5 = sm3.toDense + assert(dm5 === sm3) + assert(!dm5.isTransposed) + assert(dm5.values === Array.fill(6)(0.0)) + + val dm6 = sm2.toDenseColMajor + assert(dm6 === sm2) + assert(!dm6.isTransposed) + assert(dm6.values === Array(0.0, 0.0, 4.0, 2.0, 5.0, 0.0)) + + val dm7 = sm2.toDenseRowMajor + assert(dm7 === sm2) + assert(dm7.isTransposed) + assert(dm7.values === Array(0.0, 4.0, 5.0, 0.0, 2.0, 0.0)) + } + + test("compressed dense") { + /* + dm1 = 1.0 0.0 0.0 0.0 + 1.0 0.0 0.0 0.0 + 0.0 0.0 0.0 0.0 + + dm2 = 1.0 1.0 0.0 0.0 + 0.0 0.0 0.0 0.0 + 0.0 0.0 0.0 0.0 + */ + // this should compress to a sparse matrix + val dm1 = new DenseMatrix(3, 4, Array.fill(2)(1.0) ++ Array.fill(10)(0.0)) + + // optimal compression layout is row major since numRows < numCols + val cm1 = dm1.compressed.asInstanceOf[SparseMatrix] + assert(cm1 === dm1) + assert(cm1.isTransposed) + assert(cm1.getSizeInBytes <= dm1.getSizeInBytes) + + // force compressed column major + val cm2 = dm1.compressedColMajor.asInstanceOf[SparseMatrix] + assert(cm2 === dm1) + assert(!cm2.isTransposed) + assert(cm2.getSizeInBytes <= dm1.getSizeInBytes) + + // optimal compression layout for transpose is column major + val dm2 = dm1.transpose + val cm3 = dm2.compressed.asInstanceOf[SparseMatrix] + assert(cm3 === dm2) + assert(!cm3.isTransposed) + assert(cm3.getSizeInBytes <= dm2.getSizeInBytes) + + /* + dm3 = 1.0 1.0 1.0 0.0 + 1.0 1.0 0.0 0.0 + 1.0 1.0 0.0 0.0 + + dm4 = 1.0 1.0 1.0 1.0 + 1.0 1.0 1.0 0.0 + 0.0 0.0 0.0 0.0 + */ + // this should compress to a dense matrix + val dm3 = new DenseMatrix(3, 4, Array.fill(7)(1.0) ++ Array.fill(5)(0.0)) + val dm4 = new DenseMatrix(3, 4, Array.fill(7)(1.0) ++ Array.fill(5)(0.0), isTransposed = true) + + val cm4 = dm3.compressed.asInstanceOf[DenseMatrix] + assert(cm4 === dm3) + assert(!cm4.isTransposed) + assert(cm4.values.equals(dm3.values)) + assert(cm4.getSizeInBytes <= dm3.getSizeInBytes) + + // force compressed row major + val cm5 = dm3.compressedRowMajor.asInstanceOf[DenseMatrix] + assert(cm5 === dm3) + assert(cm5.isTransposed) + assert(cm5.getSizeInBytes <= dm3.getSizeInBytes) + + val cm6 = dm4.compressed.asInstanceOf[DenseMatrix] + assert(cm6 === dm4) + assert(cm6.isTransposed) + assert(cm6.values.equals(dm4.values)) + assert(cm6.getSizeInBytes <= dm4.getSizeInBytes) + } - assert(spMat1.asBreeze === spMat2.asBreeze) - assert(deMat1.asBreeze === deMat2.asBreeze) + test("compressed sparse") { + /* + sm1 = 0.0 -1.0 + 0.0 0.0 + -4.0 0.0 + 0.0 0.0 + + sm2 = 0.0 0.0 -4.0 0.0 + -1.0 0.0 0.0 0.0 + */ + // these should compress to sparse matrices + val sm1 = new SparseMatrix(4, 2, Array(0, 1, 2), Array(2, 0), Array(-4.0, -1.0)) + val sm2 = sm1.transpose + + val cm1 = sm1.compressed.asInstanceOf[SparseMatrix] + // optimal is column major + assert(cm1 === sm1) + assert(!cm1.isTransposed) + assert(cm1.values.equals(sm1.values)) + assert(cm1.getSizeInBytes <= sm1.getSizeInBytes) + + val cm2 = sm1.compressedRowMajor.asInstanceOf[SparseMatrix] + assert(cm2 === sm1) + assert(cm2.isTransposed) + // forced to be row major, so we have increased the size + assert(cm2.getSizeInBytes > sm1.getSizeInBytes) + assert(cm2.getSizeInBytes <= sm1.toDense.getSizeInBytes) --- End diff -- If =, dense should be preferred since it will be faster. As a result, we should only check <.
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org