[systemds] 03/03: [SYSTEMDS-3388] TSMM sparse dense rows multi threaded bug

baunsgaard Thu, 09 Jun 2022 07:21:26 -0700

This is an automated email from the ASF dual-hosted git repository.

baunsgaard pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git


commit 7aa2e29d79de79f3962517d5087ded0b4748592e
Author: baunsgaard <[email protected]>
AuthorDate: Wed Jun 8 23:35:42 2022 +0200

    [SYSTEMDS-3388] TSMM sparse dense rows multi threaded bug
    
    This commit address a bug in TSMM with sparse matrix input with
    filled dense rows where if the TSMM is executed in parallel, the
    dense rows would not be fully computed
    
    The bug was introduced in a previous fix of the same issue that fixed
    single threaded execution but did not verify multithreaded execution.
    
    Closes #1629
    Closes #1630
    Closes #1631
---
 .../sysds/runtime/matrix/data/LibMatrixMult.java   | 48 +++++++++++-----------
 .../sysds/test/component/matrix/TSMMTest.java      | 42 ++++++++++++++-----
 2 files changed, 57 insertions(+), 33 deletions(-)

diff --git 
a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixMult.java 
b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixMult.java
index 026b090c9b..99fb4b30ca 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixMult.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixMult.java
@@ -431,7 +431,15 @@ public class LibMatrixMult
                //System.out.println("TSMM 
("+m1.isInSparseFormat()+","+m1.getNumRows()+","+m1.getNumColumns()+","+m1.getNonZeros()+","+leftTranspose+")
 in "+time.stop());
        }
 
-       public static void matrixMultTransposeSelf( MatrixBlock m1, MatrixBlock 
ret, boolean leftTranspose, int k ) {
+       /**
+        * TSMM with optional transposed left side or not (Transposed self 
matrix multiplication)
+        * 
+        * @param m1            The matrix to do tsmm
+        * @param ret           The output matrix to allocate the result to
+        * @param leftTranspose If the left side should be considered transposed
+        * @param k             the number of threads to use
+        */
+       public static void matrixMultTransposeSelf(MatrixBlock m1, MatrixBlock 
ret, boolean leftTranspose, int k) {
                //check inputs / outputs
                if( m1.isEmptyBlock(false) ) {
                        ret.examSparsity(); //turn empty dense into sparse
@@ -451,21 +459,22 @@ public class LibMatrixMult
                ret.allocateDenseBlock();
        
                //core multi-threaded matrix mult computation
+               ExecutorService pool = CommonThreadPool.get(k);
                try {
-                       ExecutorService pool = CommonThreadPool.get(k);
                        ArrayList<MatrixMultTransposeTask> tasks = new 
ArrayList<>();
                        //load balance via #tasks=2k due to triangular shape 
-                       int blklen = (int)(Math.ceil((double)ret.rlen/(2*k)));
-                       for( int i=0; i<2*k & i*blklen<ret.rlen; i++ )
-                               tasks.add(new MatrixMultTransposeTask(m1, ret, 
leftTranspose, i*blklen, Math.min((i+1)*blklen, ret.rlen)));
-                       List<Future<Object>> rtasks = pool.invokeAll(tasks);
-                       pool.shutdown();
-                       for( Future<Object> rtask : rtasks )
-                               rtask.get(); //error handling
+                       int blklen = (int)(Math.ceil((double)ret.rlen / (2 * 
k)));
+                       for(int i = 0; i < ret.rlen; i += blklen)
+                               tasks.add(new MatrixMultTransposeTask(m1, ret, 
leftTranspose, i, Math.min(i+blklen, ret.rlen)));
+                       for( Future<Object> rtask :  pool.invokeAll(tasks) )
+                               rtask.get();
                }
                catch(Exception ex) {
                        throw new DMLRuntimeException(ex);
                }
+               finally{
+                       pool.shutdown();
+               }
                
                //post-processing
                long nnz = copyUpperToLowerTriangle(ret);
@@ -2069,32 +2078,25 @@ public class LibMatrixMult
                                final int arlen = a.numRows();
                                for( int r=0; r<arlen; r++ ) {
                                        if( a.isEmpty(r) ) continue;
-                                       int alen = a.size(r);
-                                       double[] avals = a.values(r);
+                                       final int alen = a.size(r);
+                                       final double[] avals = a.values(r);
+                                       final int apos = a.pos(r);
                                        if( alen == n ) { //dense row
-                                               final int apos = a.pos(r);
                                                for (int i = rl; i < ru; i++){
                                                        double[] cvals = 
c.values(i);
                                                        int cix = c.pos(i);
                                                        double val = avals[i + 
apos];
-                                                       for(int j = i ; j < ru; 
j++){
-                                                               double d = val 
* avals[j + apos];
-                                                               cvals[cix + j] 
+=d;
-                                                       }
+                                                       for(int j = i; j < 
m1.clen; j++)
+                                                               cvals[cix + j] 
+=val * avals[j + apos];
                                                }
                                        }
                                        else { //non-full sparse row
-                                               int apos = a.pos(r);
                                                int[] aix = a.indexes(r);
                                                int rlix = (rl==0) ? 0 : 
a.posFIndexGTE(r, rl);
                                                rlix = (rlix>=0) ? apos+rlix : 
apos+alen;
                                                int len = apos + alen;
-                                               for(int i = rlix; i < len && 
aix[i]<ru; i++) {
-                                                       double val = avals[i];
-                                                       if( val != 0 )
-                                                               
vectMultiplyAdd(val, avals, c.values(aix[i]),
-                                                                       aix, i, 
c.pos(aix[i]), len-i);
-                                               }
+                                               for(int i = rlix; i < len && 
aix[i] < ru; i++)
+                                                       
vectMultiplyAdd(avals[i], avals, c.values(aix[i]), aix, i, c.pos(aix[i]), len - 
i);
                                        }
                                }
                        }
diff --git a/src/test/java/org/apache/sysds/test/component/matrix/TSMMTest.java 
b/src/test/java/org/apache/sysds/test/component/matrix/TSMMTest.java
index d195ea952b..e99f4803dd 100644
--- a/src/test/java/org/apache/sysds/test/component/matrix/TSMMTest.java
+++ b/src/test/java/org/apache/sysds/test/component/matrix/TSMMTest.java
@@ -48,13 +48,12 @@ public class TSMMTest {
                MatrixBlock mb;
                final double[] spar = new double[] {0.3, 0.1, 0.01};
                final int[] cols = new int[] {10, 6, 4, 3, 2, 1};
-               final int[] threads = new int[] {1, 10};
+               final int[] threads = new int[] {1, 3, 10, 11, 16, 32, 44};
                final int[] rows = new int[] {10};
                for(int i = 0; i < 3; i++) { // seeds
                        for(int s = 0; s < spar.length; s++) {
                                for(int c = 0; c < cols.length; c++) {
                                        for(int r = 0; r < rows.length; r++) {
-
                                                mb = 
TestUtils.round(TestUtils.generateTestMatrixBlock(rows[r], cols[c], 1, 10, 
spar[s], i));
                                                for(int t = 0; t < 
threads.length; t++)
                                                        tests.add(new Object[] 
{mb, threads[t]});
@@ -63,39 +62,62 @@ public class TSMMTest {
 
                        }
                }
+               MatrixBlock mb1 = new MatrixBlock(100, 100, true); // dense 
cols sparse block
+               for(int j : new int[] {1, 4, 44, 87})
+                       for(int i = 0; i < 100; i++)
+                               mb1.quickSetValue(i, j, 100);
+               for(int t = 0; t < threads.length; t++)
+                       tests.add(new Object[] {mb1, threads[t]});
+
+               MatrixBlock mb2 = new MatrixBlock(100, 100, true); // sparse 
but specific
+               for(int j : new int[] {44, 87})
+                       for(int i : new int[] {56, 92})
+                               mb2.quickSetValue(i, j, 100);
+               for(int t = 0; t < threads.length; t++)
+                       tests.add(new Object[] {mb2, threads[t]});
+
+               MatrixBlock mb3 = new MatrixBlock(100, 100, true); // dense 
rows sparse block
+               for(int j : new int[] {1, 4, 44, 87})
+                       for(int i = 0; i < 100; i++)
+                               mb3.quickSetValue(j, i, 100);
+               for(int t = 0; t < threads.length; t++)
+                       tests.add(new Object[] {mb3, threads[t]});
                return tests;
        }
 
        @Test
        public void testTSMMLeftSparseVSDense() {
                final MMTSJType mType = MMTSJType.LEFT;
-               final MatrixBlock expected = 
in.transposeSelfMatrixMultOperations(null, mType, k);
+               final MatrixBlock expected = 
in.transposeSelfMatrixMultOperations(null, mType, 1);
+
+               if(k > 1) // test multithread
+                       testCompare(expected, in, "Compare single vs 
multithread");
+
                final boolean isSparse = in.isInSparseFormat();
 
                if(isSparse) {
                        MatrixBlock m2 = new MatrixBlock();
                        m2.copy(in);
                        m2.sparseToDense();
-                       testCompare(expected, m2);
+                       testCompare(expected, m2, "Compare sparse vs dense");
                }
                else {
                        MatrixBlock m2 = new MatrixBlock();
                        m2.copy(in);
                        m2.denseToSparse(true);
-                       testCompare(expected, m2);
+                       testCompare(expected, m2, "Compare dense vs CSR 
Sparse");
 
                        MatrixBlock m3 = new MatrixBlock();
                        m3.copy(in);
-                       m3.copy(in);
                        m3.denseToSparse(false);
-                       testCompare(expected, m3);
+                       testCompare(expected, m3, "Compare dense vs MCSR 
Sparse");
                }
        }
 
-       private void testCompare(MatrixBlock expected, MatrixBlock m2) {
+       private void testCompare(MatrixBlock expected, MatrixBlock m2, String 
message) {
                final MMTSJType mType = MMTSJType.LEFT;
                final MatrixBlock actual = 
m2.transposeSelfMatrixMultOperations(null, mType, k);
-               final String inString = m2.toString();
-               TestUtils.compareMatricesBitAvgDistance(expected, actual, 10L, 
256L, inString);
+               // final String inString = m2.toString();
+               TestUtils.compareMatricesBitAvgDistance(expected, actual, 10L, 
256L, message);
        }
 }

[systemds] 03/03: [SYSTEMDS-3388] TSMM sparse dense rows multi threaded bug

Reply via email to