This is an automated email from the ASF dual-hosted git repository. baunsgaard pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/systemds.git
commit 7aa2e29d79de79f3962517d5087ded0b4748592e Author: baunsgaard <[email protected]> AuthorDate: Wed Jun 8 23:35:42 2022 +0200 [SYSTEMDS-3388] TSMM sparse dense rows multi threaded bug This commit address a bug in TSMM with sparse matrix input with filled dense rows where if the TSMM is executed in parallel, the dense rows would not be fully computed The bug was introduced in a previous fix of the same issue that fixed single threaded execution but did not verify multithreaded execution. Closes #1629 Closes #1630 Closes #1631 --- .../sysds/runtime/matrix/data/LibMatrixMult.java | 48 +++++++++++----------- .../sysds/test/component/matrix/TSMMTest.java | 42 ++++++++++++++----- 2 files changed, 57 insertions(+), 33 deletions(-) diff --git a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixMult.java b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixMult.java index 026b090c9b..99fb4b30ca 100644 --- a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixMult.java +++ b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixMult.java @@ -431,7 +431,15 @@ public class LibMatrixMult //System.out.println("TSMM ("+m1.isInSparseFormat()+","+m1.getNumRows()+","+m1.getNumColumns()+","+m1.getNonZeros()+","+leftTranspose+") in "+time.stop()); } - public static void matrixMultTransposeSelf( MatrixBlock m1, MatrixBlock ret, boolean leftTranspose, int k ) { + /** + * TSMM with optional transposed left side or not (Transposed self matrix multiplication) + * + * @param m1 The matrix to do tsmm + * @param ret The output matrix to allocate the result to + * @param leftTranspose If the left side should be considered transposed + * @param k the number of threads to use + */ + public static void matrixMultTransposeSelf(MatrixBlock m1, MatrixBlock ret, boolean leftTranspose, int k) { //check inputs / outputs if( m1.isEmptyBlock(false) ) { ret.examSparsity(); //turn empty dense into sparse @@ -451,21 +459,22 @@ public class LibMatrixMult ret.allocateDenseBlock(); //core multi-threaded matrix mult computation + ExecutorService pool = CommonThreadPool.get(k); try { - ExecutorService pool = CommonThreadPool.get(k); ArrayList<MatrixMultTransposeTask> tasks = new ArrayList<>(); //load balance via #tasks=2k due to triangular shape - int blklen = (int)(Math.ceil((double)ret.rlen/(2*k))); - for( int i=0; i<2*k & i*blklen<ret.rlen; i++ ) - tasks.add(new MatrixMultTransposeTask(m1, ret, leftTranspose, i*blklen, Math.min((i+1)*blklen, ret.rlen))); - List<Future<Object>> rtasks = pool.invokeAll(tasks); - pool.shutdown(); - for( Future<Object> rtask : rtasks ) - rtask.get(); //error handling + int blklen = (int)(Math.ceil((double)ret.rlen / (2 * k))); + for(int i = 0; i < ret.rlen; i += blklen) + tasks.add(new MatrixMultTransposeTask(m1, ret, leftTranspose, i, Math.min(i+blklen, ret.rlen))); + for( Future<Object> rtask : pool.invokeAll(tasks) ) + rtask.get(); } catch(Exception ex) { throw new DMLRuntimeException(ex); } + finally{ + pool.shutdown(); + } //post-processing long nnz = copyUpperToLowerTriangle(ret); @@ -2069,32 +2078,25 @@ public class LibMatrixMult final int arlen = a.numRows(); for( int r=0; r<arlen; r++ ) { if( a.isEmpty(r) ) continue; - int alen = a.size(r); - double[] avals = a.values(r); + final int alen = a.size(r); + final double[] avals = a.values(r); + final int apos = a.pos(r); if( alen == n ) { //dense row - final int apos = a.pos(r); for (int i = rl; i < ru; i++){ double[] cvals = c.values(i); int cix = c.pos(i); double val = avals[i + apos]; - for(int j = i ; j < ru; j++){ - double d = val * avals[j + apos]; - cvals[cix + j] +=d; - } + for(int j = i; j < m1.clen; j++) + cvals[cix + j] +=val * avals[j + apos]; } } else { //non-full sparse row - int apos = a.pos(r); int[] aix = a.indexes(r); int rlix = (rl==0) ? 0 : a.posFIndexGTE(r, rl); rlix = (rlix>=0) ? apos+rlix : apos+alen; int len = apos + alen; - for(int i = rlix; i < len && aix[i]<ru; i++) { - double val = avals[i]; - if( val != 0 ) - vectMultiplyAdd(val, avals, c.values(aix[i]), - aix, i, c.pos(aix[i]), len-i); - } + for(int i = rlix; i < len && aix[i] < ru; i++) + vectMultiplyAdd(avals[i], avals, c.values(aix[i]), aix, i, c.pos(aix[i]), len - i); } } } diff --git a/src/test/java/org/apache/sysds/test/component/matrix/TSMMTest.java b/src/test/java/org/apache/sysds/test/component/matrix/TSMMTest.java index d195ea952b..e99f4803dd 100644 --- a/src/test/java/org/apache/sysds/test/component/matrix/TSMMTest.java +++ b/src/test/java/org/apache/sysds/test/component/matrix/TSMMTest.java @@ -48,13 +48,12 @@ public class TSMMTest { MatrixBlock mb; final double[] spar = new double[] {0.3, 0.1, 0.01}; final int[] cols = new int[] {10, 6, 4, 3, 2, 1}; - final int[] threads = new int[] {1, 10}; + final int[] threads = new int[] {1, 3, 10, 11, 16, 32, 44}; final int[] rows = new int[] {10}; for(int i = 0; i < 3; i++) { // seeds for(int s = 0; s < spar.length; s++) { for(int c = 0; c < cols.length; c++) { for(int r = 0; r < rows.length; r++) { - mb = TestUtils.round(TestUtils.generateTestMatrixBlock(rows[r], cols[c], 1, 10, spar[s], i)); for(int t = 0; t < threads.length; t++) tests.add(new Object[] {mb, threads[t]}); @@ -63,39 +62,62 @@ public class TSMMTest { } } + MatrixBlock mb1 = new MatrixBlock(100, 100, true); // dense cols sparse block + for(int j : new int[] {1, 4, 44, 87}) + for(int i = 0; i < 100; i++) + mb1.quickSetValue(i, j, 100); + for(int t = 0; t < threads.length; t++) + tests.add(new Object[] {mb1, threads[t]}); + + MatrixBlock mb2 = new MatrixBlock(100, 100, true); // sparse but specific + for(int j : new int[] {44, 87}) + for(int i : new int[] {56, 92}) + mb2.quickSetValue(i, j, 100); + for(int t = 0; t < threads.length; t++) + tests.add(new Object[] {mb2, threads[t]}); + + MatrixBlock mb3 = new MatrixBlock(100, 100, true); // dense rows sparse block + for(int j : new int[] {1, 4, 44, 87}) + for(int i = 0; i < 100; i++) + mb3.quickSetValue(j, i, 100); + for(int t = 0; t < threads.length; t++) + tests.add(new Object[] {mb3, threads[t]}); return tests; } @Test public void testTSMMLeftSparseVSDense() { final MMTSJType mType = MMTSJType.LEFT; - final MatrixBlock expected = in.transposeSelfMatrixMultOperations(null, mType, k); + final MatrixBlock expected = in.transposeSelfMatrixMultOperations(null, mType, 1); + + if(k > 1) // test multithread + testCompare(expected, in, "Compare single vs multithread"); + final boolean isSparse = in.isInSparseFormat(); if(isSparse) { MatrixBlock m2 = new MatrixBlock(); m2.copy(in); m2.sparseToDense(); - testCompare(expected, m2); + testCompare(expected, m2, "Compare sparse vs dense"); } else { MatrixBlock m2 = new MatrixBlock(); m2.copy(in); m2.denseToSparse(true); - testCompare(expected, m2); + testCompare(expected, m2, "Compare dense vs CSR Sparse"); MatrixBlock m3 = new MatrixBlock(); m3.copy(in); - m3.copy(in); m3.denseToSparse(false); - testCompare(expected, m3); + testCompare(expected, m3, "Compare dense vs MCSR Sparse"); } } - private void testCompare(MatrixBlock expected, MatrixBlock m2) { + private void testCompare(MatrixBlock expected, MatrixBlock m2, String message) { final MMTSJType mType = MMTSJType.LEFT; final MatrixBlock actual = m2.transposeSelfMatrixMultOperations(null, mType, k); - final String inString = m2.toString(); - TestUtils.compareMatricesBitAvgDistance(expected, actual, 10L, 256L, inString); + // final String inString = m2.toString(); + TestUtils.compareMatricesBitAvgDistance(expected, actual, 10L, 256L, message); } }
