This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new a997b4194c [SYSTEMDS-3636] Alternative ultra-sparse tsmm kernel (still
disabled)
a997b4194c is described below
commit a997b4194c81d6026780da4d0463d62ac5c34c59
Author: Matthias Boehm <[email protected]>
AuthorDate: Fri Oct 27 21:15:21 2023 +0200
[SYSTEMDS-3636] Alternative ultra-sparse tsmm kernel (still disabled)
---
.../sysds/runtime/matrix/data/LibMatrixMult.java | 41 ++++++++++++++++++++++
1 file changed, 41 insertions(+)
diff --git
a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixMult.java
b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixMult.java
index 0b8bd216f4..6fda33ad09 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixMult.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixMult.java
@@ -2405,6 +2405,47 @@ public class LibMatrixMult
}
}
}
+
+ //alternative matrixMultTransposeSelfUltraSparse2 w/ IKJ iteration
order and dense buffering
+ //(for moderately large graphs 4x improvement compared to above, but
for large graphs slower -> non-conclusive)
+ @SuppressWarnings("unused")
+ private static void matrixMultTransposeSelfUltraSparse2( MatrixBlock
m1, MatrixBlock m1t, MatrixBlock ret, boolean leftTranspose, int rl, int ru ) {
+ if( leftTranspose )
+ throw new DMLRuntimeException("Left tsmm with sparse
output not supported");
+
+ // Operation X%*%t(X), sparse input and output
+ SparseBlock a = m1.sparseBlock;
+ SparseBlock b = m1t.sparseBlock;
+ SparseBlock c = ret.sparseBlock;
+ int m = m1.rlen;
+ double[] tmp = new double[m];
+
+ for(int i=rl; i<ru; i++) { //rows in X
+ if( a.isEmpty(i) ) continue;
+ int apos = a.pos(i);
+ int alen = a.size(i);
+ int[] aix = a.indexes(i);
+ double[] avals = a.values(i);
+ //aggregate arow %*% B into tmp
+ Arrays.fill(tmp, 0);
+ for(int k=apos; k<apos+alen; k++) {
+ int aixk = aix[k];
+ double aval = avals[k];
+ if( b.isEmpty(aixk) ) continue;
+ int bpos = b.pos(aixk);
+ int bpos2 = b.posFIndexGTE(aixk, i);
+ if( bpos2 < 0 ) continue;
+ int blen = b.size(aixk);
+ int[] bix = b.indexes(aixk);
+ double[] bvals = b.values(aixk);
+ vectMultiplyAdd(aval, bvals, tmp, bix, bpos2,
0, bpos+blen-bpos2);
+ }
+ //copy non-zeros in tmp into sparse output
+ for(int j=0; j<m; j++)
+ if( tmp[j] != 0 )
+ c.append(i, j, tmp[j]);
+ }
+ }
private static void matrixMultPermuteDense(MatrixBlock pm1, MatrixBlock
m2, MatrixBlock ret1, MatrixBlock ret2, int rl, int ru) {
double[] a = pm1.getDenseBlockValues();