This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git

commit e7ae5cf73f5d783fd4d3c69cd3c716a1295dcd65
Author: Matthias Boehm <[email protected]>
AuthorDate: Fri Oct 13 15:16:53 2023 +0200

    [MINOR] Performance binary vector-scalar operations (specialized)
    
    This patch makes a minor improvement to the general kernel for
    matrix-scalar binary operation, by specializing for column vectors.
    In this case the kernel simplifies because, by definition, we never
    have to deal with large-partitioned blocks.
    
    In a scenario of very frequent vector-scalar operations (in a parfor
    with ~11M iterations, each iteration performing 11M vector-scalar
    operations), the performance improved by almost 2x (below a sample):
    
    Heavy hitter instructions:
      1  min          391.215   2309
      2  uak+         143.653   2310
    
    -->
    
    Heavy hitter instructions:
      1  min          198.776   2309
      2  uak+         137.910   2310
---
 .../runtime/matrix/data/LibMatrixBincell.java      | 25 ++++++++++++++++------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git 
a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixBincell.java 
b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixBincell.java
index 9314d153fc..ca3ecf583d 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixBincell.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixBincell.java
@@ -1577,15 +1577,26 @@ public class LibMatrixBincell {
                
                //compute scalar operation, incl nnz maintenance
                long nnz = 0;
-               for(int i=rl; i<ru; i++) {
-                       double[] a = da.values(i);
-                       double[] c = dc.values(i);
-                       int apos = da.pos(i), cpos = dc.pos(i);
-                       for(int j=0; j<clen; j++) {
-                               c[cpos+j] = op.executeScalar( a[apos+j] );
-                               nnz += (c[cpos+j] != 0) ? 1 : 0;
+               if( clen == 1 ) { //COL VECTOR
+                       double[] a = da.valuesAt(0);
+                       double[] c = dc.valuesAt(0);
+                       for(int i=rl; i<ru; i++) { //VECTOR
+                               c[i] = op.executeScalar( a[i] );
+                               nnz += (c[i] != 0) ? 1 : 0;
+                       }
+               }
+               else { //MULTI-COL MATRIX
+                       for(int i=rl; i<ru; i++) {
+                               double[] a = da.values(i);
+                               double[] c = dc.values(i);
+                               int apos = da.pos(i), cpos = dc.pos(i);
+                               for(int j=0; j<clen; j++) {
+                                       c[cpos+j] = op.executeScalar( a[apos+j] 
);
+                                       nnz += (c[cpos+j] != 0) ? 1 : 0;
+                               }
                        }
                }
+               
                return ret.nonZeros = nnz;
        }
 

Reply via email to