This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/master by this push:
     new 64d5022  [MINOR] Performance replace operations (w/ pattern, 
replacement)
64d5022 is described below

commit 64d5022df1c4a75931a072ef7ce73dc322a393f0
Author: Matthias Boehm <[email protected]>
AuthorDate: Sat Aug 15 15:09:35 2020 +0200

    [MINOR] Performance replace operations (w/ pattern, replacement)
    
    As more and more pre-processing techniques use replace operations for
    robustness against NaN and other special values, this patch makes a
    performance improvement for special cases. So far we always allocated
    the output and copied values with on-the-fly replacement of the pattern
    and awareness for NaNs. Now, we first probe with (early abort) if the
    matrix contains the pattern and only if this is the case allocate and
    copy the values; otherwise the we simply return the input.
---
 .../sysds/runtime/matrix/data/MatrixBlock.java     | 72 +++++++++++++++-------
 1 file changed, 49 insertions(+), 23 deletions(-)

diff --git 
a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java 
b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
index fe2a7bd..a879de3 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
@@ -636,6 +636,39 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                return denseBlock.get(r, c);
        }
        
+       public boolean containsValue(double pattern) {
+               if(isEmptyBlock(true))
+                       return pattern==0;
+               
+               //make a pass over the data to determine if it includes the
+               //pattern, with early abort as soon as the pattern is found
+               boolean NaNpattern = Double.isNaN(pattern);
+               if( isInSparseFormat() ) {
+                       if( nonZeros < getLength() && pattern == 0 )
+                               return true;
+                       SparseBlock sb = getSparseBlock();
+                       for(int i=0; i<rlen; i++) {
+                               if( sb.isEmpty(i) ) continue;
+                               int apos = sb.pos(i);
+                               int alen = sb.size(i);
+                               double[] avals = sb.values(i);
+                               for( int j=apos; j<apos+alen; j++ )
+                                       if(avals[j]==pattern || (NaNpattern && 
Double.isNaN(avals[j])))
+                                               return true;
+                       }
+               }
+               else {
+                       DenseBlock db = getDenseBlock();
+                       for(int i=0; i<rlen; i++) {
+                               double[] vals = db.values(i);
+                               for(int j=0; j<clen; j++)
+                                       if(vals[j]==pattern || (NaNpattern && 
Double.isNaN(vals[j])))
+                                               return true;
+                       }
+               }
+               return false;
+       }
+       
        /**
         * Append value is only used when values are appended at the end of 
each row for the sparse representation
         * This can only be called, when the caller knows the access pattern of 
the block
@@ -5004,10 +5037,13 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                MatrixBlock ret = checkType(result);
                examSparsity(); //ensure its in the right format
                ret.reset(rlen, clen, sparse);
+               //probe early abort conditions
                if( nonZeros == 0 && pattern != 0  )
-                       return ret; //early abort
-               boolean NaNpattern = Double.isNaN(pattern);
+                       return ret;
+               if( !containsValue(pattern) )
+                       return this; //avoid allocation + copy
                
+               boolean NaNpattern = Double.isNaN(pattern);
                if( sparse ) //SPARSE
                {
                        if( pattern != 0d ) //SPARSE <- SPARSE (sparse-safe)
@@ -5017,15 +5053,13 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                                SparseBlock c = ret.sparseBlock;
                                
                                for( int i=0; i<rlen; i++ ) {
-                                       if( !a.isEmpty(i) )
-                                       {
+                                       if( !a.isEmpty(i) ) {
                                                c.allocate(i);
                                                int apos = a.pos(i);
                                                int alen = a.size(i);
                                                int[] aix = a.indexes(i);
                                                double[] avals = a.values(i);
-                                               for( int j=apos; j<apos+alen; 
j++ )
-                                               {
+                                               for( int j=apos; j<apos+alen; 
j++ ) {
                                                        double val = avals[j];
                                                        if( val== pattern || 
(NaNpattern && Double.isNaN(val)) )
                                                                c.append(i, 
aix[j], replacement);
@@ -5038,19 +5072,17 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                        else //DENSE <- SPARSE
                        {
                                ret.sparse = false;
-                               ret.allocateDenseBlock();       
+                               ret.allocateDenseBlock();
                                SparseBlock a = sparseBlock;
                                double[] c = ret.getDenseBlockValues();
                                
                                //initialize with replacement (since all 0 
values, see SPARSITY_TURN_POINT)
-                               Arrays.fill(c, replacement); 
+                               Arrays.fill(c, replacement);
                                
                                //overwrite with existing values (via scatter)
                                if( a != null  ) //check for empty matrix
-                                       for( int i=0, cix=0; i<rlen; i++, 
cix+=clen )
-                                       {
-                                               if( !a.isEmpty(i) )
-                                               {
+                                       for( int i=0, cix=0; i<rlen; i++, 
cix+=clen ) {
+                                               if( !a.isEmpty(i) ) {
                                                        int apos = a.pos(i);
                                                        int alen = a.size(i);
                                                        int[] aix = 
a.indexes(i);
@@ -5060,22 +5092,16 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                                                                        c[ 
cix+aix[j] ] = avals[ j ];
                                                }
                                        }
-                       }                       
+                       }
                }
-               else //DENSE <- DENSE
-               {
+               else { //DENSE <- DENSE
                        int mn = ret.rlen * ret.clen;
                        ret.allocateDenseBlock();
                        double[] a = getDenseBlockValues();
                        double[] c = ret.getDenseBlockValues();
-                       
-                       for( int i=0; i<mn; i++ ) 
-                       {
-                               double val = a[i];
-                               if( val== pattern || (NaNpattern && 
Double.isNaN(val)) )
-                                       c[i] = replacement;
-                               else
-                                       c[i] = a[i];
+                       for( int i=0; i<mn; i++ ) {
+                               c[i] = ( a[i]== pattern || (NaNpattern && 
Double.isNaN(a[i])) ) ?
+                                       replacement : a[i];
                        }
                }
                

Reply via email to