Repository: systemml Updated Branches: refs/heads/master 8df0697e0 -> 65e2a46d2
[SYSTEMML-1857] Performance codegen outer operators (degree of par) Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/06fa73ac Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/06fa73ac Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/06fa73ac Branch: refs/heads/master Commit: 06fa73acc4639dd446c2f36c62a956803c247753 Parents: 8df0697 Author: Matthias Boehm <mboe...@gmail.com> Authored: Tue Aug 22 19:26:00 2017 -0700 Committer: Matthias Boehm <mboe...@gmail.com> Committed: Wed Aug 23 12:41:45 2017 -0700 ---------------------------------------------------------------------- .../sysml/runtime/codegen/SpoofCellwise.java | 1 - .../runtime/codegen/SpoofMultiAggregate.java | 1 - .../sysml/runtime/codegen/SpoofOperator.java | 4 ++ .../runtime/codegen/SpoofOuterProduct.java | 47 ++++++++++++++------ .../sysml/runtime/codegen/SpoofRowwise.java | 1 - 5 files changed, 38 insertions(+), 16 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/06fa73ac/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java index 575043b..5e22406 100644 --- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java +++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java @@ -51,7 +51,6 @@ import org.apache.sysml.runtime.util.UtilFunctions; public abstract class SpoofCellwise extends SpoofOperator implements Serializable { private static final long serialVersionUID = 3442528770573293590L; - private static final long PAR_NUMCELL_THRESHOLD = 1024*1024; //Min 1M elements public enum CellType { NO_AGG, http://git-wip-us.apache.org/repos/asf/systemml/blob/06fa73ac/src/main/java/org/apache/sysml/runtime/codegen/SpoofMultiAggregate.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofMultiAggregate.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofMultiAggregate.java index ae3c353..4aa91bb 100644 --- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofMultiAggregate.java +++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofMultiAggregate.java @@ -48,7 +48,6 @@ import org.apache.sysml.runtime.util.UtilFunctions; public abstract class SpoofMultiAggregate extends SpoofOperator implements Serializable { private static final long serialVersionUID = -6164871955591089349L; - private static final long PAR_NUMCELL_THRESHOLD = 1024*1024; //Min 1M elements private final AggOp[] _aggOps; private final boolean _sparseSafe; http://git-wip-us.apache.org/repos/asf/systemml/blob/06fa73ac/src/main/java/org/apache/sysml/runtime/codegen/SpoofOperator.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofOperator.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofOperator.java index 3ea9246..fea51e3 100644 --- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofOperator.java +++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofOperator.java @@ -37,6 +37,10 @@ public abstract class SpoofOperator implements Serializable private static final long serialVersionUID = 3834006998853573319L; private static final Log LOG = LogFactory.getLog(SpoofOperator.class.getName()); + protected static final long PAR_NUMCELL_THRESHOLD = 1024*1024; //Min 1M elements + protected static final long PAR_MINFLOP_THRESHOLD = 2L*1024*1024; //MIN 2 MFLOP + + public abstract MatrixBlock execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalars, MatrixBlock out) throws DMLRuntimeException; http://git-wip-us.apache.org/repos/asf/systemml/blob/06fa73ac/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java index bc99859..1ec873f 100644 --- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java +++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java @@ -112,6 +112,9 @@ public abstract class SpoofOuterProduct extends SpoofOperator if( inputs.get(0).isEmptyBlock(false) ) return new DoubleObject(0); + if( 2*inputs.get(0).getNonZeros()*inputs.get(1).getNumColumns() < PAR_MINFLOP_THRESHOLD ) + return execute(inputs, scalarObjects); //sequential + //input preparation double[][] ab = getDenseMatrices(prepInputMatrices(inputs, 1, 2, true, false)); double[][] b = getDenseMatrices(prepInputMatrices(inputs, 3, true)); @@ -121,15 +124,14 @@ public abstract class SpoofOuterProduct extends SpoofOperator final int m = inputs.get(0).getNumRows(); final int n = inputs.get(0).getNumColumns(); final int k = inputs.get(1).getNumColumns(); // rank + final long nnz = inputs.get(0).getNonZeros(); double sum = 0; try { ExecutorService pool = Executors.newFixedThreadPool(k); ArrayList<ParOuterProdAggTask> tasks = new ArrayList<ParOuterProdAggTask>(); - //create tasks (for wdivmm-left, parallelization over columns; - //for wdivmm-right, parallelization over rows; both ensure disjoint results) - int numThreads2 = UtilFunctions.roundToNext(Math.min(8*k,m/32), k); + int numThreads2 = getPreferredNumberOfTasks(m, n, nnz, k, numThreads); int blklen = (int)(Math.ceil((double)m/numThreads2)); for( int i=0; i<numThreads2 & i*blklen<m; i++ ) tasks.add(new ParOuterProdAggTask(inputs.get(0), ab[0], ab[1], b, scalars, @@ -259,6 +261,9 @@ public abstract class SpoofOuterProduct extends SpoofOperator out.allocateDenseBlock(); } + if( 2*inputs.get(0).getNonZeros()*inputs.get(1).getNumColumns() < PAR_MINFLOP_THRESHOLD ) + return execute(inputs, scalarObjects, out); //sequential + //input preparation double[][] ab = getDenseMatrices(prepInputMatrices(inputs, 1, 2, true, false)); double[][] b = getDenseMatrices(prepInputMatrices(inputs, 3, true)); @@ -268,6 +273,7 @@ public abstract class SpoofOuterProduct extends SpoofOperator final int m = inputs.get(0).getNumRows(); final int n = inputs.get(0).getNumColumns(); final int k = inputs.get(1).getNumColumns(); // rank + final long nnz = inputs.get(0).getNonZeros(); MatrixBlock a = inputs.get(0); @@ -284,21 +290,24 @@ public abstract class SpoofOuterProduct extends SpoofOperator int numCG = ((CompressedMatrixBlock)a).getNumColGroups(); int blklen = (int)(Math.ceil((double)numCG/numThreads)); for( int j=0; j<numThreads & j*blklen<numCG; j++ ) - tasks.add(new ParExecTask(a, ab[0], ab[1], b, scalars, out, m, n, k, _outerProductType, 0, m, j*blklen, Math.min((j+1)*blklen, numCG))); + tasks.add(new ParExecTask(a, ab[0], ab[1], b, scalars, out, m, n, k, + _outerProductType, 0, m, j*blklen, Math.min((j+1)*blklen, numCG))); } else { //parallelize over column partitions int blklen = (int)(Math.ceil((double)n/numThreads)); for( int j=0; j<numThreads & j*blklen<n; j++ ) - tasks.add(new ParExecTask(a, ab[0], ab[1], b, scalars, out, m, n, k, _outerProductType, 0, m, j*blklen, Math.min((j+1)*blklen, n))); + tasks.add(new ParExecTask(a, ab[0], ab[1], b, scalars, out, m, n, k, + _outerProductType, 0, m, j*blklen, Math.min((j+1)*blklen, n))); } } else { //right or cell-wise //parallelize over row partitions - int numThreads2 = UtilFunctions.roundToNext(Math.min(8*k,m/32), k); + int numThreads2 = getPreferredNumberOfTasks(m, n, nnz, k, numThreads); int blklen = (int)(Math.ceil((double)m/numThreads2)); for( int i=0; i<numThreads2 & i*blklen<m; i++ ) - tasks.add(new ParExecTask(a, ab[0], ab[1], b, scalars, out, m, n, k, _outerProductType, i*blklen, Math.min((i+1)*blklen,m), 0, n)); + tasks.add(new ParExecTask(a, ab[0], ab[1], b, scalars, out, m, n, k, + _outerProductType, i*blklen, Math.min((i+1)*blklen,m), 0, n)); } List<Future<Long>> taskret = pool.invokeAll(tasks); pool.shutdown(); @@ -320,6 +329,13 @@ public abstract class SpoofOuterProduct extends SpoofOperator return out; } + private static int getPreferredNumberOfTasks(int m, int n, long nnz, int rank, int k) { + //compute number of tasks nk in range [k, 8k] + int base = (int) Math.min(Math.min(8*k, m/32), + Math.ceil((double)2*nnz*rank/PAR_MINFLOP_THRESHOLD)); + return UtilFunctions.roundToNext(base, k); + } + private void executeDense(double[] a, double[] u, double[] v, double[][] b, double[] scalars, double[] c, int m, int n, int k, OutProdType type, int rl, int ru, int cl, int cu ) { @@ -427,6 +443,7 @@ public abstract class SpoofOuterProduct extends SpoofOperator if( !out.isInSparseFormat() ) //DENSE { double[] c = out.getDenseBlock(); + double tmp = 0; for( int bi=rl; bi<ru; bi+=blocksizeIJ ) { int bimin = Math.min(ru, bi+blocksizeIJ); //prepare starting indexes for block row @@ -441,16 +458,20 @@ public abstract class SpoofOuterProduct extends SpoofOperator int[] wix = sblock.indexes(i); double[] wval = sblock.values(i); int index = wpos + curk[i-bi]; - for( ; index<wpos+wlen && wix[index]<bjmin; index++ ) { - if(type == OutProdType.CELLWISE_OUTER_PRODUCT) - c[wix[index]] = genexecCellwise( wval[index], u, uix, v, wix[index]*k, b, scalars, m, n, k, i, wix[index] ); - else - c[0] += genexecCellwise( wval[index], u, uix, v, wix[index]*k, b, scalars, m, n, k, i, wix[index]); - } + if( type == OutProdType.CELLWISE_OUTER_PRODUCT ) + for( ; index<wpos+wlen && wix[index]<bjmin; index++ ) + c[wix[index]] = genexecCellwise( wval[index], + u, uix, v, wix[index]*k, b, scalars, m, n, k, i, wix[index] ); + else + for( ; index<wpos+wlen && wix[index]<bjmin; index++ ) + tmp += genexecCellwise( wval[index], + u, uix, v, wix[index]*k, b, scalars, m, n, k, i, wix[index]); curk[i-bi] = index - wpos; } } } + if( type != OutProdType.CELLWISE_OUTER_PRODUCT ) + c[0] = tmp; } else //SPARSE { http://git-wip-us.apache.org/repos/asf/systemml/blob/06fa73ac/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java index e2d9f41..f1cef34 100644 --- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java +++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java @@ -44,7 +44,6 @@ import org.apache.sysml.runtime.util.UtilFunctions; public abstract class SpoofRowwise extends SpoofOperator { private static final long serialVersionUID = 6242910797139642998L; - private static final long PAR_NUMCELL_THRESHOLD = 1024*1024; //Min 1M elements public enum RowType { NO_AGG, //no aggregation