This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/main by this push:
     new 3081ecf4c1 [SYSTEMDS-3793] Fix transpose performance on ultra-sparse 
matrices
3081ecf4c1 is described below

commit 3081ecf4c13f06be4beda9d016b23eadd75c5603
Author: Matthias Boehm <[email protected]>
AuthorDate: Tue Nov 19 18:07:04 2024 +0100

    [SYSTEMDS-3793] Fix transpose performance on ultra-sparse matrices
    
    The multi-threaded implementation of ultra-sparse matrices has a couple
    of shortcomings (e.g., count column nnz, block allocation, too late
    fallback to single-threaded). On a large 85M x 85M graph with 90M
    non-zeros the transpose did not finish in hours. In this patch we now
    introduces a more sophisticated sparse row iterator (row and column
    lower/bounds) in order to facilitate a simple and fast transpose
    ultra sparse operation. However, this implementation was still much
    slower than falling back to single-threaded operations and thus use
    single-threaded transpose for all ultra-sparse matrices instead of
    if nnz < max(rows,cols). Now this operations completes in <9s.
---
 .../org/apache/sysds/runtime/data/SparseBlock.java | 50 ++++++++++++++++------
 .../sysds/runtime/matrix/data/LibMatrixReorg.java  | 26 ++++++++---
 .../sysds/runtime/matrix/data/MatrixBlock.java     | 13 ++++++
 3 files changed, 71 insertions(+), 18 deletions(-)

diff --git a/src/main/java/org/apache/sysds/runtime/data/SparseBlock.java 
b/src/main/java/org/apache/sysds/runtime/data/SparseBlock.java
index 91bba40d0c..864569358f 100644
--- a/src/main/java/org/apache/sysds/runtime/data/SparseBlock.java
+++ b/src/main/java/org/apache/sysds/runtime/data/SparseBlock.java
@@ -561,6 +561,22 @@ public abstract class SparseBlock implements Serializable, 
Block
                //default generic iterator, override if necessary
                return new SparseBlockIterator(rl, Math.min(ru,numRows()));
        }
+       
+       /**
+        * Get a non-zero iterator over the subblock [rl/cl, ru/cu). Note that
+        * the returned IJV object is reused across next calls and should 
+        * be directly consumed or deep copied. 
+        * 
+        * @param rl   inclusive lower row index starting at 0
+        * @param ru   exclusive upper row index starting at 0
+        * @param cl   inclusive lower column index starting at 0
+        * @param cu   exclusive upper column index starting at 0
+        * @return IJV iterator
+        */
+       public Iterator<IJV> getIterator(int rl, int ru, int cl, int cu) {
+               //default generic iterator, override if necessary
+               return new SparseBlockIterator(rl, Math.min(ru,numRows()), cl, 
cu);
+       }
 
        /**
         * Get an iterator over the indices of non-empty rows within the entire 
sparse block.
@@ -694,19 +710,29 @@ public abstract class SparseBlock implements 
Serializable, Block
                private int _curColIx = -1; //current col index pos
                private int[] _curIndexes = null; //current col indexes
                private double[] _curValues = null; //current col values
-               private boolean _noNext = false; //end indicator                
+               private boolean _noNext = false; //end indicator
                private IJV retijv = new IJV(); //reuse output tuple
+               private int _cl = 0;
+               private int _cu = Integer.MAX_VALUE;
 
                protected SparseBlockIterator(int ru) {
                        _rlen = ru;
                        _curRow = 0;
-                       findNextNonZeroRow();
+                       findNextNonZeroRow(0);
                }
                
                protected SparseBlockIterator(int rl, int ru) {
                        _rlen = ru;
                        _curRow = rl;
-                       findNextNonZeroRow();
+                       findNextNonZeroRow(0);
+               }
+               
+               protected SparseBlockIterator(int rl, int ru, int cl, int cu) {
+                       _rlen = ru;
+                       _curRow = rl;
+                       _cl = cl;
+                       _cu = cu;
+                       findNextNonZeroRow(cl);
                }
                
                @Override
@@ -717,14 +743,12 @@ public abstract class SparseBlock implements 
Serializable, Block
                @Override
                public IJV next( ) {
                        retijv.set(_curRow, _curIndexes[_curColIx], 
_curValues[_curColIx]);
-                       
-                       //NOTE: no preincrement on curcolix to avoid OpenJDK8 
escape analysis bug, encountered 
-                       //with tests 
SparsityRecompileTest/SparsityFunctionRecompileTest on parfor local result merge
-                       if( _curColIx < pos(_curRow)+size(_curRow)-1 ) 
+                       if( _curColIx < pos(_curRow)+size(_curRow)-1 && 
_curIndexes[_curColIx+1] < _cu ) { 
                                _curColIx++;
+                       }
                        else {
                                _curRow++;
-                               findNextNonZeroRow();   
+                               findNextNonZeroRow(_cl);
                        }
 
                        return retijv;
@@ -733,19 +757,21 @@ public abstract class SparseBlock implements 
Serializable, Block
                @Override
                public void remove() {
                        throw new RuntimeException("SparseBlockIterator is 
unsupported!");
-               }               
+               }
                
                /**
                 * Moves cursor to next non-zero value or indicates that no 
more 
                 * values are available.
                 */
-               private void findNextNonZeroRow() {
-                       while( _curRow<_rlen && isEmpty(_curRow))
+               private void findNextNonZeroRow(int cl) {
+                       while( _curRow<_rlen && (isEmpty(_curRow) 
+                               || (cl>0 && posFIndexGTE(_curRow, cl) < 0)) )
                                _curRow++;
                        if(_curRow >= _rlen)
                                _noNext = true;
                        else {
-                               _curColIx = pos(_curRow);
+                               _curColIx = (cl==0) ? 
+                                       pos(_curRow) : posFIndexGTE(_curRow, 
cl);
                                _curIndexes = indexes(_curRow); 
                                _curValues = values(_curRow);
                        }
diff --git 
a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixReorg.java 
b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixReorg.java
index 9c751401cd..132154907c 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixReorg.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixReorg.java
@@ -234,8 +234,8 @@ public class LibMatrixReorg {
                        || (SHALLOW_COPY_REORG && !in.sparse && !out.sparse && 
(in.rlen == 1 || in.clen == 1)) //
                        || (in.sparse && !out.sparse && in.rlen == 1) //
                        || (!in.sparse && out.sparse && in.rlen == 1) //
-                       || (in.sparse && out.sparse && in.nonZeros < 
Math.max(in.rlen, in.clen)) // ultra-sparse
-               ) {
+                       || (in.sparse && out.sparse && in.isUltraSparse(false)))
+               {
                        return transpose(in, out);
                }
                // set meta data and allocate output arrays (if required)
@@ -250,7 +250,9 @@ public class LibMatrixReorg {
                // Timing time = new Timing(true);
 
                // CSR is only allowed in the transposed output if the number 
of non zeros is counted in the columns
-               allowCSR = allowCSR && (in.clen <= 4096 || out.nonZeros < 
10000000);
+               // and the temporary count arrays are not larger than the 
entire input
+               allowCSR = allowCSR && (in.clen <= 4096 || out.nonZeros < 
10000000) 
+                               && (k*4*in.clen < in.getInMemorySize());
                
                int[] cnt = null;
                final ExecutorService pool = CommonThreadPool.get(k);
@@ -276,12 +278,13 @@ public class LibMatrixReorg {
                                out.allocateSparseRowsBlock(false);
                        else
                                out.allocateDenseBlock(false);
-       
 
                        // compute actual transpose and check for errors
                        ArrayList<TransposeTask> tasks = new ArrayList<>();
-                       boolean allowReturnBlock = out.sparse && in.sparse && 
in.rlen >= in.clen && cnt == null;
-                       boolean row = (in.sparse || in.rlen >= in.clen) && 
(!out.sparse || allowReturnBlock);
+                       boolean allowReturnBlock = out.sparse && in.sparse 
+                               && in.rlen >= in.clen && cnt == null && 
!in.isUltraSparse(false);
+                       boolean row = (in.sparse || in.rlen >= in.clen)
+                               && (!out.sparse || allowReturnBlock) && 
!in.isUltraSparse(false);
                        int len = row ? in.rlen : in.clen;
                        int blklen = (int) (Math.ceil((double) len / k));
                        blklen += (!out.sparse && (blklen % 8) != 0) ? 8 - 
blklen % 8 : 0;
@@ -1192,6 +1195,15 @@ public class LibMatrixReorg {
                out.setNonZeros(in.getNonZeros());
        }
        
+       private static void transposeUltraSparse(MatrixBlock in, MatrixBlock 
out, int rl, int ru, int cl, int cu) {
+               Iterator<IJV> iter = in.getSparseBlockIterator(rl, ru, cl, cu);
+               SparseBlock b = out.getSparseBlock();
+               while( iter.hasNext() ) {
+                       IJV cell = iter.next();
+                       b.append(cell.getJ(), cell.getI(), cell.getV());
+               }
+       }
+       
        private static void transposeSparseToSparse(MatrixBlock in, MatrixBlock 
out, int rl, int ru, int cl, int cu,
                int[] cnt) {
                // NOTE: called only in sequential or column-wise parallel 
execution
@@ -3861,6 +3873,8 @@ public class LibMatrixReorg {
                                transposeDenseToDense( _in, _out, rl, ru, cl, 
cu );
                        else if( _in.sparse && _out.sparse && _out.sparseBlock 
instanceof SparseBlockCSR)
                                transposeSparseToSparseCSR(_in, _out, rl, ru, 
cl, cu, _cnt);
+                       else if( _in.sparse && _out.sparse && 
_in.isUltraSparse(false) )
+                               transposeUltraSparse(_in, _out, rl, ru, cl, cu);
                        else if( _in.sparse && _out.sparse ){
                                if(allowReturnBlock)
                                        return 
transposeSparseToSparseBlock(_in, rl, ru);
diff --git 
a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java 
b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
index 66d616a738..5bb7e2712c 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
@@ -686,6 +686,19 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock<MatrixBlock>,
                //get iterator over sparse block
                return sparseBlock.getIterator(rl, ru);
        }
+       
+       public Iterator<IJV> getSparseBlockIterator(int rl, int ru, int cl, int 
cu) {
+               //check for valid format, should have been checked from outside
+               if( !sparse )
+                       throw new RuntimeException("getSparseBlockInterator 
should not be called for dense format");
+               
+               //check for existing sparse block: return empty list
+               if( sparseBlock==null )
+                       return Collections.emptyListIterator();
+               
+               //get iterator over sparse block
+               return sparseBlock.getIterator(rl, ru, cl, cu);
+       }
 
        @Override
        public double get(int r, int c) {

Reply via email to