This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new 3081ecf4c1 [SYSTEMDS-3793] Fix transpose performance on ultra-sparse
matrices
3081ecf4c1 is described below
commit 3081ecf4c13f06be4beda9d016b23eadd75c5603
Author: Matthias Boehm <[email protected]>
AuthorDate: Tue Nov 19 18:07:04 2024 +0100
[SYSTEMDS-3793] Fix transpose performance on ultra-sparse matrices
The multi-threaded implementation of ultra-sparse matrices has a couple
of shortcomings (e.g., count column nnz, block allocation, too late
fallback to single-threaded). On a large 85M x 85M graph with 90M
non-zeros the transpose did not finish in hours. In this patch we now
introduces a more sophisticated sparse row iterator (row and column
lower/bounds) in order to facilitate a simple and fast transpose
ultra sparse operation. However, this implementation was still much
slower than falling back to single-threaded operations and thus use
single-threaded transpose for all ultra-sparse matrices instead of
if nnz < max(rows,cols). Now this operations completes in <9s.
---
.../org/apache/sysds/runtime/data/SparseBlock.java | 50 ++++++++++++++++------
.../sysds/runtime/matrix/data/LibMatrixReorg.java | 26 ++++++++---
.../sysds/runtime/matrix/data/MatrixBlock.java | 13 ++++++
3 files changed, 71 insertions(+), 18 deletions(-)
diff --git a/src/main/java/org/apache/sysds/runtime/data/SparseBlock.java
b/src/main/java/org/apache/sysds/runtime/data/SparseBlock.java
index 91bba40d0c..864569358f 100644
--- a/src/main/java/org/apache/sysds/runtime/data/SparseBlock.java
+++ b/src/main/java/org/apache/sysds/runtime/data/SparseBlock.java
@@ -561,6 +561,22 @@ public abstract class SparseBlock implements Serializable,
Block
//default generic iterator, override if necessary
return new SparseBlockIterator(rl, Math.min(ru,numRows()));
}
+
+ /**
+ * Get a non-zero iterator over the subblock [rl/cl, ru/cu). Note that
+ * the returned IJV object is reused across next calls and should
+ * be directly consumed or deep copied.
+ *
+ * @param rl inclusive lower row index starting at 0
+ * @param ru exclusive upper row index starting at 0
+ * @param cl inclusive lower column index starting at 0
+ * @param cu exclusive upper column index starting at 0
+ * @return IJV iterator
+ */
+ public Iterator<IJV> getIterator(int rl, int ru, int cl, int cu) {
+ //default generic iterator, override if necessary
+ return new SparseBlockIterator(rl, Math.min(ru,numRows()), cl,
cu);
+ }
/**
* Get an iterator over the indices of non-empty rows within the entire
sparse block.
@@ -694,19 +710,29 @@ public abstract class SparseBlock implements
Serializable, Block
private int _curColIx = -1; //current col index pos
private int[] _curIndexes = null; //current col indexes
private double[] _curValues = null; //current col values
- private boolean _noNext = false; //end indicator
+ private boolean _noNext = false; //end indicator
private IJV retijv = new IJV(); //reuse output tuple
+ private int _cl = 0;
+ private int _cu = Integer.MAX_VALUE;
protected SparseBlockIterator(int ru) {
_rlen = ru;
_curRow = 0;
- findNextNonZeroRow();
+ findNextNonZeroRow(0);
}
protected SparseBlockIterator(int rl, int ru) {
_rlen = ru;
_curRow = rl;
- findNextNonZeroRow();
+ findNextNonZeroRow(0);
+ }
+
+ protected SparseBlockIterator(int rl, int ru, int cl, int cu) {
+ _rlen = ru;
+ _curRow = rl;
+ _cl = cl;
+ _cu = cu;
+ findNextNonZeroRow(cl);
}
@Override
@@ -717,14 +743,12 @@ public abstract class SparseBlock implements
Serializable, Block
@Override
public IJV next( ) {
retijv.set(_curRow, _curIndexes[_curColIx],
_curValues[_curColIx]);
-
- //NOTE: no preincrement on curcolix to avoid OpenJDK8
escape analysis bug, encountered
- //with tests
SparsityRecompileTest/SparsityFunctionRecompileTest on parfor local result merge
- if( _curColIx < pos(_curRow)+size(_curRow)-1 )
+ if( _curColIx < pos(_curRow)+size(_curRow)-1 &&
_curIndexes[_curColIx+1] < _cu ) {
_curColIx++;
+ }
else {
_curRow++;
- findNextNonZeroRow();
+ findNextNonZeroRow(_cl);
}
return retijv;
@@ -733,19 +757,21 @@ public abstract class SparseBlock implements
Serializable, Block
@Override
public void remove() {
throw new RuntimeException("SparseBlockIterator is
unsupported!");
- }
+ }
/**
* Moves cursor to next non-zero value or indicates that no
more
* values are available.
*/
- private void findNextNonZeroRow() {
- while( _curRow<_rlen && isEmpty(_curRow))
+ private void findNextNonZeroRow(int cl) {
+ while( _curRow<_rlen && (isEmpty(_curRow)
+ || (cl>0 && posFIndexGTE(_curRow, cl) < 0)) )
_curRow++;
if(_curRow >= _rlen)
_noNext = true;
else {
- _curColIx = pos(_curRow);
+ _curColIx = (cl==0) ?
+ pos(_curRow) : posFIndexGTE(_curRow,
cl);
_curIndexes = indexes(_curRow);
_curValues = values(_curRow);
}
diff --git
a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixReorg.java
b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixReorg.java
index 9c751401cd..132154907c 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixReorg.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixReorg.java
@@ -234,8 +234,8 @@ public class LibMatrixReorg {
|| (SHALLOW_COPY_REORG && !in.sparse && !out.sparse &&
(in.rlen == 1 || in.clen == 1)) //
|| (in.sparse && !out.sparse && in.rlen == 1) //
|| (!in.sparse && out.sparse && in.rlen == 1) //
- || (in.sparse && out.sparse && in.nonZeros <
Math.max(in.rlen, in.clen)) // ultra-sparse
- ) {
+ || (in.sparse && out.sparse && in.isUltraSparse(false)))
+ {
return transpose(in, out);
}
// set meta data and allocate output arrays (if required)
@@ -250,7 +250,9 @@ public class LibMatrixReorg {
// Timing time = new Timing(true);
// CSR is only allowed in the transposed output if the number
of non zeros is counted in the columns
- allowCSR = allowCSR && (in.clen <= 4096 || out.nonZeros <
10000000);
+ // and the temporary count arrays are not larger than the
entire input
+ allowCSR = allowCSR && (in.clen <= 4096 || out.nonZeros <
10000000)
+ && (k*4*in.clen < in.getInMemorySize());
int[] cnt = null;
final ExecutorService pool = CommonThreadPool.get(k);
@@ -276,12 +278,13 @@ public class LibMatrixReorg {
out.allocateSparseRowsBlock(false);
else
out.allocateDenseBlock(false);
-
// compute actual transpose and check for errors
ArrayList<TransposeTask> tasks = new ArrayList<>();
- boolean allowReturnBlock = out.sparse && in.sparse &&
in.rlen >= in.clen && cnt == null;
- boolean row = (in.sparse || in.rlen >= in.clen) &&
(!out.sparse || allowReturnBlock);
+ boolean allowReturnBlock = out.sparse && in.sparse
+ && in.rlen >= in.clen && cnt == null &&
!in.isUltraSparse(false);
+ boolean row = (in.sparse || in.rlen >= in.clen)
+ && (!out.sparse || allowReturnBlock) &&
!in.isUltraSparse(false);
int len = row ? in.rlen : in.clen;
int blklen = (int) (Math.ceil((double) len / k));
blklen += (!out.sparse && (blklen % 8) != 0) ? 8 -
blklen % 8 : 0;
@@ -1192,6 +1195,15 @@ public class LibMatrixReorg {
out.setNonZeros(in.getNonZeros());
}
+ private static void transposeUltraSparse(MatrixBlock in, MatrixBlock
out, int rl, int ru, int cl, int cu) {
+ Iterator<IJV> iter = in.getSparseBlockIterator(rl, ru, cl, cu);
+ SparseBlock b = out.getSparseBlock();
+ while( iter.hasNext() ) {
+ IJV cell = iter.next();
+ b.append(cell.getJ(), cell.getI(), cell.getV());
+ }
+ }
+
private static void transposeSparseToSparse(MatrixBlock in, MatrixBlock
out, int rl, int ru, int cl, int cu,
int[] cnt) {
// NOTE: called only in sequential or column-wise parallel
execution
@@ -3861,6 +3873,8 @@ public class LibMatrixReorg {
transposeDenseToDense( _in, _out, rl, ru, cl,
cu );
else if( _in.sparse && _out.sparse && _out.sparseBlock
instanceof SparseBlockCSR)
transposeSparseToSparseCSR(_in, _out, rl, ru,
cl, cu, _cnt);
+ else if( _in.sparse && _out.sparse &&
_in.isUltraSparse(false) )
+ transposeUltraSparse(_in, _out, rl, ru, cl, cu);
else if( _in.sparse && _out.sparse ){
if(allowReturnBlock)
return
transposeSparseToSparseBlock(_in, rl, ru);
diff --git
a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
index 66d616a738..5bb7e2712c 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
@@ -686,6 +686,19 @@ public class MatrixBlock extends MatrixValue implements
CacheBlock<MatrixBlock>,
//get iterator over sparse block
return sparseBlock.getIterator(rl, ru);
}
+
+ public Iterator<IJV> getSparseBlockIterator(int rl, int ru, int cl, int
cu) {
+ //check for valid format, should have been checked from outside
+ if( !sparse )
+ throw new RuntimeException("getSparseBlockInterator
should not be called for dense format");
+
+ //check for existing sparse block: return empty list
+ if( sparseBlock==null )
+ return Collections.emptyListIterator();
+
+ //get iterator over sparse block
+ return sparseBlock.getIterator(rl, ru, cl, cu);
+ }
@Override
public double get(int r, int c) {