This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/master by this push:
     new d57d042  [SYSTEMDS-2856] Fix regression multi-threaded binary/ternary 
operations
d57d042 is described below

commit d57d0421d069c1284b4266d47ca1cd2382c5088a
Author: Matthias Boehm <[email protected]>
AuthorDate: Sat Feb 13 21:05:40 2021 +0100

    [SYSTEMDS-2856] Fix regression multi-threaded binary/ternary operations
    
    This patch fixes performance regressions of special cases of binary
    operations (early abort on empty inputs). The rework for multi-threaded
    specifically use a fallback to single-threaded whenever one input is
    empty. However, the allocation was moved outside the specific kernels,
    leading to allocation before the early abort, which largely destroyed
    the benfit of early abort.
    
    Furthermore, we now also avoid redundant nnz maintenance per ternary
    task as these are maintained anyway during computation (and now passed
    back to the multi-threaded tasks).
---
 .../runtime/matrix/data/LibMatrixBincell.java      | 28 ++++++++++++++++++----
 .../runtime/matrix/data/LibMatrixTercell.java      | 17 ++++++-------
 2 files changed, 33 insertions(+), 12 deletions(-)

diff --git 
a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixBincell.java 
b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixBincell.java
index 29363b0..44a25f4 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixBincell.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixBincell.java
@@ -164,11 +164,15 @@ public class LibMatrixBincell
         */
        public static void bincellOp(MatrixBlock m1, MatrixBlock m2, 
MatrixBlock ret, BinaryOperator op) {
                BinaryAccessType atype = getBinaryAccessType(m1, m2);
-               
-               //preallocate for consistency
-               if( atype == BinaryAccessType.MATRIX_MATRIX )
+
+               // preallocate for consistency (but be careful 
+               // not to allocate if empty inputs might allow early abort)
+               if( atype == BinaryAccessType.MATRIX_MATRIX
+                       && !(m1.isEmpty() || m2.isEmpty()) )
+               {
                        ret.allocateBlock(); //chosen outside
-               
+               }
+
                //execute binary cell operations
                long nnz = 0;
                if(op.sparseSafe || isSparseSafeDivide(op, m2))
@@ -755,6 +759,10 @@ public class LibMatrixBincell
        private static long safeBinaryMMSparseSparse(MatrixBlock m1, 
MatrixBlock m2, MatrixBlock ret,
                BinaryOperator op, int rl, int ru)
        {
+               //guard for postponed allocation in single-threaded exec
+               if( ret.sparse && !ret.isAllocated() )
+                       ret.allocateSparseRowsBlock();
+               
                //both sparse blocks existing
                long lnnz = 0;
                if(m1.sparseBlock!=null && m2.sparseBlock!=null)
@@ -828,6 +836,10 @@ public class LibMatrixBincell
        private static long safeBinaryMMSparseDenseDense(MatrixBlock m1, 
MatrixBlock m2, MatrixBlock ret,
                BinaryOperator op, int rl, int ru)
        {
+               //guard for postponed allocation in single-threaded exec
+               if( !ret.isAllocated() )
+                       ret.allocateDenseBlock();
+               
                //specific case in order to prevent binary search on sparse 
inputs (see quickget and quickset)
                final int n = ret.clen;
                DenseBlock dc = ret.getDenseBlock();
@@ -912,6 +924,10 @@ public class LibMatrixBincell
        private static long safeBinaryMMDenseDenseDense(MatrixBlock m1, 
MatrixBlock m2, MatrixBlock ret,
                BinaryOperator op, int rl, int ru)
        {
+               //guard for postponed allocation in single-threaded exec
+               if( !ret.isAllocated() )
+                       ret.allocateDenseBlock();
+               
                DenseBlock da = m1.getDenseBlock();
                DenseBlock db = m2.getDenseBlock();
                DenseBlock dc = ret.getDenseBlock();
@@ -943,6 +959,10 @@ public class LibMatrixBincell
                //prepare second input and allocate output
                MatrixBlock b = m1.sparse ? m2 : m1;
                
+               //guard for postponed allocation in single-threaded exec
+               if( !ret.isAllocated() )
+                       ret.allocateBlock();
+               
                long lnnz = 0;
                for( int i=rl; i<Math.min(ru, a.numRows()); i++ ) {
                        if( a.isEmpty(i) ) continue;
diff --git 
a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixTercell.java 
b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixTercell.java
index cb48a1f..46f31ef 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixTercell.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixTercell.java
@@ -78,11 +78,13 @@ public class LibMatrixTercell
                        }
                }
                else {
-                       unsafeTernary(m1, m2, m3, ret, op, s1, s2, s3, d1, d2, 
d3, 0, ret.rlen);
+                       long nnz = unsafeTernary(m1, m2, m3, ret, op,
+                               s1, s2, s3, d1, d2, d3, 0, ret.rlen);
+                       ret.setNonZeros(nnz);
                }
        }
        
-       private static void unsafeTernary(MatrixBlock m1, MatrixBlock m2, 
MatrixBlock m3, MatrixBlock ret,
+       private static long unsafeTernary(MatrixBlock m1, MatrixBlock m2, 
MatrixBlock m3, MatrixBlock ret,
                TernaryOperator op, boolean s1, boolean s2, boolean s3, double 
d1, double d2, double d3, int rl, int ru)
        {
                //basic ternary operations (all combinations sparse/dense)
@@ -99,7 +101,7 @@ public class LibMatrixTercell
                        }
                
                //set global output nnz once
-               ret.nonZeros = lnnz;
+               return lnnz;
        }
        
        private static class TercellTask implements Callable<Long> {
@@ -122,11 +124,10 @@ public class LibMatrixTercell
                
                @Override
                public Long call() {
-                       //execute binary operation on row partition
-                       unsafeTernary(_m1, _m2, _m3, _ret, _op, _s1, _s2, _s3, 
_d1, _d2, _d3, _rl, _ru);
-                       
-                       //maintain block nnz (upper bounds inclusive)
-                       return _ret.recomputeNonZeros(_rl, _ru-1);
+                       // execute binary operation on row partition
+                       // (including nnz maintenance)
+                       return unsafeTernary(_m1, _m2, _m3, _ret, _op,
+                               _s1, _s2, _s3, _d1, _d2, _d3, _rl, _ru);
                }
        }
 }

Reply via email to