This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new f43b313fee [SYSTEMDS-3919] Use SIMD Vector API in unary dense
operations
f43b313fee is described below
commit f43b313feef1c7f3c3ff24b58fd8e869dad06297
Author: Matthias Boehm <[email protected]>
AuthorDate: Thu Oct 2 14:32:08 2025 +0200
[SYSTEMDS-3919] Use SIMD Vector API in unary dense operations
This patch introduces SIMD vector operations in selected unary builtin
operations. However, the code-path is still disabled because experiments
on different hardware platforms showed inconclusive results.
---
.../sysds/runtime/functionobjects/Builtin.java | 40 +++++++++++++++++++++-
.../runtime/matrix/data/LibMatrixBincell.java | 15 +++++---
2 files changed, 50 insertions(+), 5 deletions(-)
diff --git
a/src/main/java/org/apache/sysds/runtime/functionobjects/Builtin.java
b/src/main/java/org/apache/sysds/runtime/functionobjects/Builtin.java
index 8e9aef9466..39735be62e 100644
--- a/src/main/java/org/apache/sysds/runtime/functionobjects/Builtin.java
+++ b/src/main/java/org/apache/sysds/runtime/functionobjects/Builtin.java
@@ -26,6 +26,9 @@ import org.apache.sysds.api.DMLScript;
import org.apache.sysds.runtime.DMLRuntimeException;
import org.apache.sysds.runtime.DMLScriptException;
+import jdk.incubator.vector.DoubleVector;
+import jdk.incubator.vector.VectorSpecies;
+
/**
* Class with pre-defined set of objects. This class can not be instantiated
elsewhere.
@@ -46,7 +49,7 @@ import org.apache.sysds.runtime.DMLScriptException;
public class Builtin extends ValueFunction
{
private static final long serialVersionUID = 3836744687789840574L;
-
+
public enum BuiltinCode { AUTODIFF, SIN, COS, TAN, SINH, COSH, TANH,
ASIN, ACOS, ATAN, LOG, LOG_NZ, MIN,
MAX, ABS, SIGN, SQRT, EXP, PLOGP, PRINT, PRINTF, NROW, NCOL,
LENGTH, LINEAGE, ROUND, MAXINDEX, MININDEX,
STOP, CEIL, FLOOR, CUMSUM, ROWCUMSUM, CUMPROD, CUMMIN, CUMMAX,
CUMSUMPROD, INVERSE, SPROP, SIGMOID, EVAL, LIST,
@@ -54,6 +57,9 @@ public class Builtin extends ValueFunction
DROP_INVALID_LENGTH, VALUE_SWAP, FRAME_ROW_REPLICATE,
MAP, COUNT_DISTINCT, COUNT_DISTINCT_APPROX, UNIQUE}
+ private static final VectorSpecies<Double> SPECIES =
DoubleVector.SPECIES_PREFERRED;
+ private static final int vLen = SPECIES.length();
+
public BuiltinCode bFunc;
@@ -197,6 +203,38 @@ public class Builtin extends ValueFunction
throw new
DMLRuntimeException("Builtin.execute(): Unknown operation: " + bFunc);
}
}
+
+ public long execute (double[] a, double[] c, int start, int end) {
+ long nnz = 0;
+
+ //process rest or unsupported builtin codes
+ final int end2 = (bFunc==BuiltinCode.ABS ||
bFunc==BuiltinCode.SQRT)?
+ start+((end-start)%vLen) : end;
+ for( int i = start; i < end2; i++) {
+ c[i] = execute(a[i]);
+ nnz += (c[i] != 0) ? 1 : 0;
+ }
+
+ nnz += (end-end2);
+ if( bFunc == BuiltinCode.ABS) {
+ for( int i = end2; i < end; i+=vLen ){
+ DoubleVector aVec =
DoubleVector.fromArray(SPECIES, a, i);
+ DoubleVector cVec = aVec.abs();
+ nnz -= cVec.eq(0).trueCount();
+ cVec.intoArray(c, i);
+ }
+ }
+ else if(bFunc == BuiltinCode.SQRT ) {
+ for( int i = end2; i < end; i+=vLen ){
+ DoubleVector aVec =
DoubleVector.fromArray(SPECIES, a, i);
+ DoubleVector cVec = aVec.sqrt();
+ nnz -= cVec.eq(0).trueCount();
+ cVec.intoArray(c, i);
+ }
+ }
+ return nnz;
+ }
+
@Override
public double execute (long in) {
diff --git
a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixBincell.java
b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixBincell.java
index cbcd60465c..017c54f31a 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixBincell.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixBincell.java
@@ -3278,10 +3278,17 @@ public class LibMatrixBincell {
double[] avals = _a.values(_rl);
double[] cvals = _c.values(_rl);
int start = _a.pos(_rl), end = _a.pos(_ru);
- for( int i=start; i<end; i++ ) {
- cvals[i] = _op.fn.execute(avals[i]);
- nnz += (cvals[i] != 0) ? 1 : 0;
- }
+
+ //TODO use of vector API inconclusive on
different hardware
+ //if( _op.fn instanceof Builtin ) {
+ // nnz += ((Builtin)_op.fn).execute(avals,
cvals, start, end);
+ //}
+ //else {
+ for( int i=start; i<end; i++ ) {
+ cvals[i] =
_op.fn.execute(avals[i]);
+ nnz += (cvals[i] != 0) ? 1 : 0;
+ }
+ //}
}
//generic dense-dense, including large blocks
else {