This is an automated email from the ASF dual-hosted git repository. mboehm7 pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/systemds.git
commit e7ae5cf73f5d783fd4d3c69cd3c716a1295dcd65 Author: Matthias Boehm <[email protected]> AuthorDate: Fri Oct 13 15:16:53 2023 +0200 [MINOR] Performance binary vector-scalar operations (specialized) This patch makes a minor improvement to the general kernel for matrix-scalar binary operation, by specializing for column vectors. In this case the kernel simplifies because, by definition, we never have to deal with large-partitioned blocks. In a scenario of very frequent vector-scalar operations (in a parfor with ~11M iterations, each iteration performing 11M vector-scalar operations), the performance improved by almost 2x (below a sample): Heavy hitter instructions: 1 min 391.215 2309 2 uak+ 143.653 2310 --> Heavy hitter instructions: 1 min 198.776 2309 2 uak+ 137.910 2310 --- .../runtime/matrix/data/LibMatrixBincell.java | 25 ++++++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixBincell.java b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixBincell.java index 9314d153fc..ca3ecf583d 100644 --- a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixBincell.java +++ b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixBincell.java @@ -1577,15 +1577,26 @@ public class LibMatrixBincell { //compute scalar operation, incl nnz maintenance long nnz = 0; - for(int i=rl; i<ru; i++) { - double[] a = da.values(i); - double[] c = dc.values(i); - int apos = da.pos(i), cpos = dc.pos(i); - for(int j=0; j<clen; j++) { - c[cpos+j] = op.executeScalar( a[apos+j] ); - nnz += (c[cpos+j] != 0) ? 1 : 0; + if( clen == 1 ) { //COL VECTOR + double[] a = da.valuesAt(0); + double[] c = dc.valuesAt(0); + for(int i=rl; i<ru; i++) { //VECTOR + c[i] = op.executeScalar( a[i] ); + nnz += (c[i] != 0) ? 1 : 0; + } + } + else { //MULTI-COL MATRIX + for(int i=rl; i<ru; i++) { + double[] a = da.values(i); + double[] c = dc.values(i); + int apos = da.pos(i), cpos = dc.pos(i); + for(int j=0; j<clen; j++) { + c[cpos+j] = op.executeScalar( a[apos+j] ); + nnz += (c[cpos+j] != 0) ? 1 : 0; + } } } + return ret.nonZeros = nnz; }
