This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/main by this push:
     new f43b313fee [SYSTEMDS-3919] Use SIMD Vector API in unary dense 
operations
f43b313fee is described below

commit f43b313feef1c7f3c3ff24b58fd8e869dad06297
Author: Matthias Boehm <[email protected]>
AuthorDate: Thu Oct 2 14:32:08 2025 +0200

    [SYSTEMDS-3919] Use SIMD Vector API in unary dense operations
    
    This patch introduces SIMD vector operations in selected unary builtin
    operations. However, the code-path is still disabled because experiments
    on different hardware platforms showed inconclusive results.
---
 .../sysds/runtime/functionobjects/Builtin.java     | 40 +++++++++++++++++++++-
 .../runtime/matrix/data/LibMatrixBincell.java      | 15 +++++---
 2 files changed, 50 insertions(+), 5 deletions(-)

diff --git 
a/src/main/java/org/apache/sysds/runtime/functionobjects/Builtin.java 
b/src/main/java/org/apache/sysds/runtime/functionobjects/Builtin.java
index 8e9aef9466..39735be62e 100644
--- a/src/main/java/org/apache/sysds/runtime/functionobjects/Builtin.java
+++ b/src/main/java/org/apache/sysds/runtime/functionobjects/Builtin.java
@@ -26,6 +26,9 @@ import org.apache.sysds.api.DMLScript;
 import org.apache.sysds.runtime.DMLRuntimeException;
 import org.apache.sysds.runtime.DMLScriptException;
 
+import jdk.incubator.vector.DoubleVector;
+import jdk.incubator.vector.VectorSpecies;
+
 
 /**
  *  Class with pre-defined set of objects. This class can not be instantiated 
elsewhere.
@@ -46,7 +49,7 @@ import org.apache.sysds.runtime.DMLScriptException;
 public class Builtin extends ValueFunction 
 {
        private static final long serialVersionUID = 3836744687789840574L;
-       
+               
        public enum BuiltinCode { AUTODIFF, SIN, COS, TAN, SINH, COSH, TANH, 
ASIN, ACOS, ATAN, LOG, LOG_NZ, MIN,
                MAX, ABS, SIGN, SQRT, EXP, PLOGP, PRINT, PRINTF, NROW, NCOL, 
LENGTH, LINEAGE, ROUND, MAXINDEX, MININDEX,
                STOP, CEIL, FLOOR, CUMSUM, ROWCUMSUM, CUMPROD, CUMMIN, CUMMAX, 
CUMSUMPROD, INVERSE, SPROP, SIGMOID, EVAL, LIST,
@@ -54,6 +57,9 @@ public class Builtin extends ValueFunction
                DROP_INVALID_LENGTH, VALUE_SWAP, FRAME_ROW_REPLICATE,
                MAP, COUNT_DISTINCT, COUNT_DISTINCT_APPROX, UNIQUE}
 
+       private static final VectorSpecies<Double> SPECIES = 
DoubleVector.SPECIES_PREFERRED;
+       private static final int vLen = SPECIES.length();
+
 
        public BuiltinCode bFunc;
        
@@ -197,6 +203,38 @@ public class Builtin extends ValueFunction
                                throw new 
DMLRuntimeException("Builtin.execute(): Unknown operation: " + bFunc);
                }
        }
+       
+       public long execute (double[] a, double[] c, int start, int end) {
+               long nnz = 0;
+               
+               //process rest or unsupported builtin codes
+               final int end2 = (bFunc==BuiltinCode.ABS || 
bFunc==BuiltinCode.SQRT)?
+                       start+((end-start)%vLen) : end;
+               for( int i = start; i < end2; i++) {
+                       c[i] = execute(a[i]);
+                       nnz += (c[i] != 0) ? 1 : 0;
+               }
+               
+               nnz += (end-end2);
+               if( bFunc == BuiltinCode.ABS) {
+                       for( int i = end2; i < end; i+=vLen ){
+                               DoubleVector aVec = 
DoubleVector.fromArray(SPECIES, a, i);
+                               DoubleVector cVec = aVec.abs();
+                               nnz -= cVec.eq(0).trueCount();
+                               cVec.intoArray(c, i);
+                       }
+               }
+               else if(bFunc == BuiltinCode.SQRT ) {
+                       for( int i = end2; i < end; i+=vLen ){
+                               DoubleVector aVec = 
DoubleVector.fromArray(SPECIES, a, i);
+                               DoubleVector cVec = aVec.sqrt();
+                               nnz -= cVec.eq(0).trueCount();
+                               cVec.intoArray(c, i);
+                       }
+               }
+               return nnz;
+       }
+
 
        @Override
        public double execute (long in) {
diff --git 
a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixBincell.java 
b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixBincell.java
index cbcd60465c..017c54f31a 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixBincell.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixBincell.java
@@ -3278,10 +3278,17 @@ public class LibMatrixBincell {
                                double[] avals = _a.values(_rl);
                                double[] cvals = _c.values(_rl);
                                int start = _a.pos(_rl), end = _a.pos(_ru);
-                               for( int i=start; i<end; i++ ) {
-                                       cvals[i] = _op.fn.execute(avals[i]);
-                                       nnz += (cvals[i] != 0) ? 1 : 0;
-                               }
+                               
+                               //TODO use of vector API inconclusive on 
different hardware
+                               //if( _op.fn instanceof Builtin ) {
+                               //      nnz += ((Builtin)_op.fn).execute(avals, 
cvals, start, end);
+                               //}
+                               //else {
+                                       for( int i=start; i<end; i++ ) {
+                                               cvals[i] = 
_op.fn.execute(avals[i]);
+                                               nnz += (cvals[i] != 0) ? 1 : 0;
+                                       }
+                               //}
                        }
                        //generic dense-dense, including large blocks
                        else {

Reply via email to