Repository: systemml Updated Branches: refs/heads/master 0751ddc0f -> 3acd94186
[SYSTEMML-1904] Improved codegen dynamic recompilation decisions This patch improves the decision on marking HOPs and thus, HOP DAGs for dynamic recompilation. For example, on AutoEncoder, the indexing of batches causes unknowns in the main DAG of the inner loop. Since the worst-case estimates are sufficient to compile these operations to CP, this DAG is not marked for dynamic recompilation. Hence, the codegen optimizer can never re-optimize this DAG again. We now use more aggressive recompilation decisions for codegen, effectively marking every HOP DAG with unknown dimensions for dynamic recompilation. On one epoch of AutoEncoder over Mnist60k, batch=512, H1=500, H2=2, this change improved the end-to-end performance from 39s to 24s. Furthermore, this also includes two minor improvements of codegen outer templates: (1) improved performance of full aggregations due to less array loads and stores, and (2) a fix of the fusion condition to avoid transpose operations on the main input and intermediates. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/808a8f4f Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/808a8f4f Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/808a8f4f Branch: refs/heads/master Commit: 808a8f4f89a9586a9fd556170db723edd3be64f2 Parents: 0751ddc Author: Matthias Boehm <mboe...@gmail.com> Authored: Tue Sep 12 00:15:26 2017 -0700 Committer: Matthias Boehm <mboe...@gmail.com> Committed: Tue Sep 12 20:13:17 2017 -0700 ---------------------------------------------------------------------- src/main/java/org/apache/sysml/hops/Hop.java | 9 +++++++-- .../sysml/hops/codegen/template/TemplateOuterProduct.java | 3 ++- .../org/apache/sysml/runtime/codegen/SpoofOuterProduct.java | 6 ++++-- 3 files changed, 13 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/808a8f4f/src/main/java/org/apache/sysml/hops/Hop.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/Hop.java b/src/main/java/org/apache/sysml/hops/Hop.java index b454771..7495d4f 100644 --- a/src/main/java/org/apache/sysml/hops/Hop.java +++ b/src/main/java/org/apache/sysml/hops/Hop.java @@ -1523,21 +1523,26 @@ public abstract class Hop implements ParseInfo /** * Marks the hop for dynamic recompilation, if dynamic recompilation is - * enabled and one of the two basic scenarios apply: + * enabled and one of the three basic scenarios apply: * <ul> * <li> The hop has unknown dimensions or sparsity and is scheduled for * remote execution, in which case the latency for distributed jobs easily * covers any recompilation overheads. </li> * <li> The hop has unknown dimensions and is scheduled for local execution * due to forced single node execution type. </li> + * <li> The hop has unknown dimensions and is scheduled for local execution + * due to good worst-case memory estimates but codegen is enabled, which + * requires (mostly) known sizes to validity conditions and cost estimation. </li> * <ul> <p> */ protected void setRequiresRecompileIfNecessary() { ExecType REMOTE = OptimizerUtils.isSparkExecutionMode() ? ExecType.SPARK : ExecType.MR; boolean caseRemote = (!dimsKnown(true) && _etype == REMOTE); boolean caseLocal = (!dimsKnown() && _etypeForced == ExecType.CP); + boolean caseCodegen = (!dimsKnown() && ConfigurationManager.isCodegenEnabled()); - if( ConfigurationManager.isDynamicRecompilation() && (caseRemote || caseLocal) ) + if( ConfigurationManager.isDynamicRecompilation() + && (caseRemote || caseLocal || caseCodegen) ) setRequiresRecompile(); } http://git-wip-us.apache.org/repos/asf/systemml/blob/808a8f4f/src/main/java/org/apache/sysml/hops/codegen/template/TemplateOuterProduct.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateOuterProduct.java b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateOuterProduct.java index e4fb464..3756447 100644 --- a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateOuterProduct.java +++ b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateOuterProduct.java @@ -69,7 +69,8 @@ public class TemplateOuterProduct extends TemplateBase { || (hop instanceof BinaryOp && TemplateUtils.isOperationSupported(hop) && (TemplateUtils.isBinaryMatrixColVector(hop) || HopRewriteUtils.isBinaryMatrixScalarOperation(hop) || (HopRewriteUtils.isBinaryMatrixMatrixOperation(hop) && HopRewriteUtils.isBinary(hop, OpOp2.MULT, OpOp2.DIV)) )) - || (HopRewriteUtils.isTransposeOperation(hop) && !HopRewriteUtils.isOuterProductLikeMM(input)) + || (HopRewriteUtils.isTransposeOperation(hop) && input instanceof AggBinaryOp + && !HopRewriteUtils.isOuterProductLikeMM(input)) || (hop instanceof AggBinaryOp && !HopRewriteUtils.isOuterProductLikeMM(hop) && TemplateUtils.containsOuterProduct(input, HopRewriteUtils.getOtherInput(hop, input))) || (hop instanceof AggUnaryOp && ((AggUnaryOp)hop).getDirection()==Direction.RowCol)); http://git-wip-us.apache.org/repos/asf/systemml/blob/808a8f4f/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java index 1ec873f..c25a522 100644 --- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java +++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java @@ -370,6 +370,7 @@ public abstract class SpoofOuterProduct extends SpoofOperator final int blocksizeIJ = 16; //u/v block (max at typical L2 size) //blocked execution + double sum = 0; for( int bi = rl; bi < ru; bi+=blocksizeIJ ) for( int bj = cl, bimin = Math.min(ru, bi+blocksizeIJ); bj < cu; bj+=blocksizeIJ ) { @@ -379,13 +380,14 @@ public abstract class SpoofOuterProduct extends SpoofOperator for( int i=bi, ix=bi*n, uix=bi*k; i<bimin; i++, ix+=n, uix+=k ) for( int j=bj, vix=bj*k; j<bjmin; j++, vix+=k) if( a[ix+j] != 0 ) { - //int cix = (type == OutProdType.LEFT_OUTER_PRODUCT) ? vix : uix; if(type == OutProdType.CELLWISE_OUTER_PRODUCT) c[ix+j] = genexecCellwise( a[ix+j], u, uix, v, vix, b, scalars, m, n, k, i, j ); else - c[0] += genexecCellwise( a[ix+j], u, uix, v, vix, b, scalars, m, n, k, i, j); + sum += genexecCellwise( a[ix+j], u, uix, v, vix, b, scalars, m, n, k, i, j); } } + if( type != OutProdType.CELLWISE_OUTER_PRODUCT ) + c[0] = sum; } private void executeSparse(SparseBlock sblock, double[] u, double[] v, double[][] b, double[] scalars,