This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new 7b34a67f2c [SYSTEMDS-3896] Leverage SIMD Vector API for Counting NNZ
7b34a67f2c is described below
commit 7b34a67f2cd220bb4451385e872ce6a1b0940421
Author: Matthias Boehm <[email protected]>
AuthorDate: Wed Jul 16 10:44:57 2025 +0200
[SYSTEMDS-3896] Leverage SIMD Vector API for Counting NNZ
This patch leverages the new Vector API for the core primitive of
counting the number of non-zeros (which is still single-threaded
because usually done for chunks as part of multi-threaded tasks).
For single-threaded computeNnz on an 8GB dense matrix after JIT
compilation, this patch improved performance from 1100ms to 850ms.
---
.../apache/sysds/runtime/util/UtilFunctions.java | 29 +++++++++-------------
1 file changed, 12 insertions(+), 17 deletions(-)
diff --git a/src/main/java/org/apache/sysds/runtime/util/UtilFunctions.java
b/src/main/java/org/apache/sysds/runtime/util/UtilFunctions.java
index f233b69a11..4f0a15d78f 100644
--- a/src/main/java/org/apache/sysds/runtime/util/UtilFunctions.java
+++ b/src/main/java/org/apache/sysds/runtime/util/UtilFunctions.java
@@ -58,9 +58,15 @@ import org.apache.sysds.runtime.matrix.data.Pair;
import org.apache.sysds.runtime.meta.TensorCharacteristics;
import org.apache.sysds.runtime.transform.encode.ColumnEncoderRecode;
+import jdk.incubator.vector.DoubleVector;
+import jdk.incubator.vector.VectorSpecies;
+
public class UtilFunctions {
protected static final Log LOG =
LogFactory.getLog(UtilFunctions.class.getName());
+ private static final VectorSpecies<Double> SPECIES =
DoubleVector.SPECIES_PREFERRED;
+ private static final int vLen = SPECIES.length();
+
private UtilFunctions(){
// empty private constructor
// making all calls static
@@ -876,25 +882,14 @@ public class UtilFunctions {
public static int computeNnz(final double[] a, final int ai, final int
len) {
int lnnz = 0;
final int end = ai + len;
- final int h = (end - ai) % 8;
+ final int rest = (end - ai) % vLen;
- for(int i = ai; i < ai + h; i++)
+ for(int i = ai; i < ai + rest; i++)
lnnz += (a[i] != 0.0) ? 1 : 0;
- for(int i = ai + h; i < end; i += 8)
- lnnz += computeNnzBy8(a, i);
- return lnnz;
- }
-
- private static int computeNnzBy8(final double[] a, final int i) {
- int lnnz = 0;
- lnnz += (a[i] != 0.0) ? 1 : 0;
- lnnz += (a[i+1] != 0.0) ? 1 : 0;
- lnnz += (a[i+2] != 0.0) ? 1 : 0;
- lnnz += (a[i+3] != 0.0) ? 1 : 0;
- lnnz += (a[i+4] != 0.0) ? 1 : 0;
- lnnz += (a[i+5] != 0.0) ? 1 : 0;
- lnnz += (a[i+6] != 0.0) ? 1 : 0;
- lnnz += (a[i+7] != 0.0) ? 1 : 0;
+ for(int i = ai + rest; i < end; i += 8) {
+ DoubleVector aVec = DoubleVector.fromArray(SPECIES, a,
i);
+ lnnz += vLen-aVec.eq(0).trueCount();
+ }
return lnnz;
}