Repository: systemml
Updated Branches:
  refs/heads/master 4d5a82ecf -> 3ca053535


[SYSTEMML-445] Integrate GPU exectype selection into our existing infrastructure

Closes #627.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/3ca05353
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/3ca05353
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/3ca05353

Branch: refs/heads/master
Commit: 3ca05353593e7847dc6d6a7e862e323ffa96bfcc
Parents: 4d5a82e
Author: Niketan Pansare <npan...@us.ibm.com>
Authored: Tue Aug 22 14:55:37 2017 -0700
Committer: Niketan Pansare <npan...@us.ibm.com>
Committed: Tue Aug 22 14:55:37 2017 -0700

----------------------------------------------------------------------
 .../java/org/apache/sysml/hops/AggBinaryOp.java | 53 +++++++++-----
 .../java/org/apache/sysml/hops/AggUnaryOp.java  | 53 ++++++++------
 .../java/org/apache/sysml/hops/BinaryOp.java    | 77 +++++++++++++-------
 .../org/apache/sysml/hops/ConvolutionOp.java    | 12 ++-
 .../java/org/apache/sysml/hops/DataGenOp.java   |  6 ++
 src/main/java/org/apache/sysml/hops/DataOp.java |  5 ++
 .../java/org/apache/sysml/hops/FunctionOp.java  |  5 ++
 src/main/java/org/apache/sysml/hops/Hop.java    | 32 +++++---
 .../java/org/apache/sysml/hops/IndexingOp.java  |  5 ++
 .../org/apache/sysml/hops/LeftIndexingOp.java   |  5 ++
 .../java/org/apache/sysml/hops/LiteralOp.java   |  5 ++
 .../java/org/apache/sysml/hops/MultipleOp.java  |  5 ++
 .../sysml/hops/ParameterizedBuiltinOp.java      |  5 ++
 .../org/apache/sysml/hops/QuaternaryOp.java     |  5 ++
 .../java/org/apache/sysml/hops/ReorgOp.java     | 34 +++++++--
 .../java/org/apache/sysml/hops/TernaryOp.java   | 30 +++++---
 .../java/org/apache/sysml/hops/UnaryOp.java     | 38 +++++++---
 .../apache/sysml/hops/codegen/SpoofFusedOp.java |  5 ++
 18 files changed, 278 insertions(+), 102 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/3ca05353/src/main/java/org/apache/sysml/hops/AggBinaryOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/AggBinaryOp.java 
b/src/main/java/org/apache/sysml/hops/AggBinaryOp.java
index 4f709b4..11a2399 100644
--- a/src/main/java/org/apache/sysml/hops/AggBinaryOp.java
+++ b/src/main/java/org/apache/sysml/hops/AggBinaryOp.java
@@ -48,7 +48,6 @@ import org.apache.sysml.parser.Expression.DataType;
 import org.apache.sysml.parser.Expression.ValueType;
 import 
org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat;
 import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext;
-import org.apache.sysml.runtime.instructions.gpu.context.GPUContextPool;
 import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
 import org.apache.sysml.runtime.matrix.data.MatrixBlock;
 import org.apache.sysml.runtime.matrix.mapred.DistributedCacheInput;
@@ -143,6 +142,33 @@ public class AggBinaryOp extends Hop implements 
MultiThreadedHop
                return _method;
        }
        
+       @Override
+       public boolean isGPUEnabled() {
+               if(!DMLScript.USE_ACCELERATOR)
+                       return false;
+               
+               Hop input1 = getInput().get(0);
+               Hop input2 = getInput().get(1);
+               //matrix mult operation selection part 2 (specific pattern)
+               MMTSJType mmtsj = checkTransposeSelf(); //determine tsmm pattern
+               ChainType chain = checkMapMultChain(); //determine mmchain 
pattern
+               
+               _method = optFindMMultMethodCP ( input1.getDim1(), 
input1.getDim2(),   
+                             input2.getDim1(), input2.getDim2(), mmtsj, chain, 
_hasLeftPMInput );
+               switch( _method ){
+                       case TSMM: 
+                               return true;
+                       case MAPMM_CHAIN:
+                               return false;
+                       case PMM:
+                               return false;
+                       case MM:
+                               return true;
+                       default:
+                               throw new RuntimeException("Unsupported 
method:" + _method);
+               }
+       }
+       
        /**
         * NOTE: overestimated mem in case of transpose-identity matmult, but 
3/2 at worst
         *       and existing mem estimate advantageous in terms of consistency 
hops/lops,
@@ -169,7 +195,7 @@ public class AggBinaryOp extends Hop implements 
MultiThreadedHop
                        MMTSJType mmtsj = checkTransposeSelf(); //determine 
tsmm pattern
                        ChainType chain = checkMapMultChain(); //determine 
mmchain pattern
                        
-                       if( et == ExecType.CP ) 
+                       if( et == ExecType.CP || et == ExecType.GPU ) 
                        {
                                //matrix mult operation selection part 3 (CP 
type)
                                _method = optFindMMultMethodCP ( 
input1.getDim1(), input1.getDim2(),   
@@ -178,7 +204,7 @@ public class AggBinaryOp extends Hop implements 
MultiThreadedHop
                                //dispatch CP lops construction 
                                switch( _method ){
                                        case TSMM: 
-                                               constructCPLopsTSMM( mmtsj );
+                                               constructCPLopsTSMM( mmtsj, et 
);
                                                break;
                                        case MAPMM_CHAIN:
                                                constructCPLopsMMChain( chain );
@@ -187,7 +213,7 @@ public class AggBinaryOp extends Hop implements 
MultiThreadedHop
                                                constructCPLopsPMM();
                                                break;
                                        case MM:
-                                               constructCPLopsMM();
+                                               constructCPLopsMM(et);
                                                break;
                                        default:
                                                throw new 
HopsException(this.printErrorLocation() + "Invalid Matrix Mult Method (" + 
_method + ") while constructing CP lops.");
@@ -344,7 +370,7 @@ public class AggBinaryOp extends Hop implements 
MultiThreadedHop
        {
                double ret = 0;
 
-               if (DMLScript.USE_ACCELERATOR) {
+               if (isGPUEnabled()) {
                        // In GPU Mode, intermediate memory is only needed in 
case of one of the matrix blocks is sparse
                        // When sparse block is converted to dense and a dense 
MM takes place, we need (dim1 * dim2)
                        // When dense block is converted to sparse and a sparse 
MM takes place, we need (dim1 * dim2 * 2)
@@ -581,17 +607,11 @@ public class AggBinaryOp extends Hop implements 
MultiThreadedHop
        // CP Lops generation
        /////////////////////////
        
-       private void constructCPLopsTSMM( MMTSJType mmtsj ) 
+       private void constructCPLopsTSMM( MMTSJType mmtsj, ExecType et ) 
                throws HopsException, LopsException
        {
                int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);
                
-               ExecType et = ExecType.CP;
-               if (DMLScript.USE_ACCELERATOR && (DMLScript.FORCE_ACCELERATOR
-                               || getMemEstimate() < 
Math.min(GPUContextPool.initialGPUMemBudget(), 
OptimizerUtils.getLocalMemBudget()))) {
-                       et = ExecType.GPU;
-               }
-               
                Lop matmultCP = new 
MMTSJ(getInput().get(mmtsj.isLeft()?1:0).constructLops(),
                                                 getDataType(), getValueType(), 
et, mmtsj, false, k);
        
@@ -662,13 +682,12 @@ public class AggBinaryOp extends Hop implements 
MultiThreadedHop
                HopRewriteUtils.removeChildReference(pmInput, nrow);
        }
 
-       private void constructCPLopsMM() 
+       private void constructCPLopsMM(ExecType et) 
                throws HopsException, LopsException
        {       
                Lop matmultCP = null;
 
-               if (DMLScript.USE_ACCELERATOR && (DMLScript.FORCE_ACCELERATOR
-                               || getMemEstimate() < 
Math.min(GPUContextPool.initialGPUMemBudget(), 
OptimizerUtils.getLocalMemBudget()))) {
+               if (et == ExecType.GPU) {
                        Hop h1 = getInput().get(0);
                        Hop h2 = getInput().get(1);
                        Lop left; Lop right;
@@ -691,7 +710,7 @@ public class AggBinaryOp extends Hop implements 
MultiThreadedHop
                        }
                        
                        matmultCP = new Binary(left, right, 
-                                                                        
Binary.OperationTypes.MATMULT, getDataType(), getValueType(), ExecType.GPU, 
isLeftTransposed, isRightTransposed);
+                                                                        
Binary.OperationTypes.MATMULT, getDataType(), getValueType(), et, 
isLeftTransposed, isRightTransposed);
                        setOutputDimensions(matmultCP);
                        setNnz(-1);
                }
@@ -702,7 +721,7 @@ public class AggBinaryOp extends Hop implements 
MultiThreadedHop
                        else { 
                                int k = 
OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);
                                matmultCP = new 
Binary(getInput().get(0).constructLops(),getInput().get(1).constructLops(), 
-                                                                               
 Binary.OperationTypes.MATMULT, getDataType(), getValueType(), ExecType.CP, k);
+                                                                               
 Binary.OperationTypes.MATMULT, getDataType(), getValueType(), et, k);
                        }
                        setOutputDimensions(matmultCP);
                }

http://git-wip-us.apache.org/repos/asf/systemml/blob/3ca05353/src/main/java/org/apache/sysml/hops/AggUnaryOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/AggUnaryOp.java 
b/src/main/java/org/apache/sysml/hops/AggUnaryOp.java
index 7a6d463..4f5e2bc 100644
--- a/src/main/java/org/apache/sysml/hops/AggUnaryOp.java
+++ b/src/main/java/org/apache/sysml/hops/AggUnaryOp.java
@@ -38,7 +38,6 @@ import org.apache.sysml.lops.LopProperties.ExecType;
 import org.apache.sysml.parser.Expression.DataType;
 import org.apache.sysml.parser.Expression.ValueType;
 import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext;
-import org.apache.sysml.runtime.instructions.gpu.context.GPUContextPool;
 import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
 
 
@@ -109,6 +108,30 @@ public class AggUnaryOp extends Hop implements 
MultiThreadedHop
        }
        
        @Override
+       public boolean isGPUEnabled() {
+               if(!DMLScript.USE_ACCELERATOR)
+                       return false;
+               
+               try {
+                       if( isTernaryAggregateRewriteApplicable() || 
isUnaryAggregateOuterCPRewriteApplicable() ) {
+                               return false;
+                       }
+                       else if ((_op == AggOp.SUM    && (_direction == 
Direction.RowCol || _direction == Direction.Row || _direction == Direction.Col))
+                                        || (_op == AggOp.SUM_SQ && (_direction 
== Direction.RowCol || _direction == Direction.Row || _direction == 
Direction.Col))
+                                        || (_op == AggOp.MAX    && (_direction 
== Direction.RowCol || _direction == Direction.Row || _direction == 
Direction.Col))
+                                        || (_op == AggOp.MIN    && (_direction 
== Direction.RowCol || _direction == Direction.Row || _direction == 
Direction.Col))
+                                        || (_op == AggOp.MEAN   && (_direction 
== Direction.RowCol || _direction == Direction.Row || _direction == 
Direction.Col))
+                                        || (_op == AggOp.VAR    && (_direction 
== Direction.RowCol || _direction == Direction.Row || _direction == 
Direction.Col))
+                                        || (_op == AggOp.PROD   && (_direction 
== Direction.RowCol))){
+                               return true;
+                       }
+               } catch (HopsException e) {
+                       throw new RuntimeException(e);
+               }
+               return false;
+       }
+       
+       @Override
        public Lop constructLops()
                throws HopsException, LopsException 
        {       
@@ -121,10 +144,10 @@ public class AggUnaryOp extends Hop implements 
MultiThreadedHop
                        ExecType et = optFindExecType();
                        Hop input = getInput().get(0);
                        
-                       if ( et == ExecType.CP ) 
+                       if ( et == ExecType.CP || et == ExecType.GPU ) 
                        {
                                Lop agg1 = null;
-                               if( isTernaryAggregateRewriteApplicable(et) ) {
+                               if( isTernaryAggregateRewriteApplicable() ) {
                                        agg1 = 
constructLopsTernaryAggregateRewrite(et);
                                }
                                else if( 
isUnaryAggregateOuterCPRewriteApplicable() )
@@ -149,20 +172,6 @@ public class AggUnaryOp extends Hop implements 
MultiThreadedHop
                                }                               
                                else { //general case           
                                        int k = 
OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);
-                                       if (DMLScript.USE_ACCELERATOR && 
(DMLScript.FORCE_ACCELERATOR
-                                                       || getMemEstimate() < 
Math.min(GPUContextPool.initialGPUMemBudget(), 
OptimizerUtils.getLocalMemBudget()))) {
-                                               // Only implemented methods for 
GPU
-                                               if ((_op == AggOp.SUM    && 
(_direction == Direction.RowCol || _direction == Direction.Row || _direction == 
Direction.Col))
-                                                || (_op == AggOp.SUM_SQ && 
(_direction == Direction.RowCol || _direction == Direction.Row || _direction == 
Direction.Col))
-                                                || (_op == AggOp.MAX    && 
(_direction == Direction.RowCol || _direction == Direction.Row || _direction == 
Direction.Col))
-                                                || (_op == AggOp.MIN    && 
(_direction == Direction.RowCol || _direction == Direction.Row || _direction == 
Direction.Col))
-                                                || (_op == AggOp.MEAN   && 
(_direction == Direction.RowCol || _direction == Direction.Row || _direction == 
Direction.Col))
-                                                || (_op == AggOp.VAR    && 
(_direction == Direction.RowCol || _direction == Direction.Row || _direction == 
Direction.Col))
-                                                || (_op == AggOp.PROD   && 
(_direction == Direction.RowCol))){
-                                                       et = ExecType.GPU;
-                                                       k = 1;
-                                               }
-                                       }
                                        agg1 = new 
PartialAggregate(input.constructLops(), 
                                                        HopsAgg2Lops.get(_op), 
HopsDirection2Lops.get(_direction), getDataType(),getValueType(), et, k);
                                }
@@ -251,7 +260,7 @@ public class AggUnaryOp extends Hop implements 
MultiThreadedHop
                                DirectionTypes dir = 
HopsDirection2Lops.get(_direction);
 
                                //unary aggregate
-                               if( isTernaryAggregateRewriteApplicable(et) ) 
+                               if( isTernaryAggregateRewriteApplicable() ) 
                                {
                                        Lop aggregate = 
constructLopsTernaryAggregateRewrite(et);
                                        setOutputDimensions(aggregate); //0x0 
(scalar)
@@ -330,7 +339,7 @@ public class AggUnaryOp extends Hop implements 
MultiThreadedHop
        protected double computeOutputMemEstimate( long dim1, long dim2, long 
nnz )
        {
                double sparsity = -1;
-               if (DMLScript.USE_ACCELERATOR) {
+               if (isGPUEnabled()) {
                        // The GPU version (for the time being) only does dense 
outputs
                        sparsity = 1.0;
                } else {
@@ -373,7 +382,7 @@ public class AggUnaryOp extends Hop implements 
MultiThreadedHop
                                break;
                        case VAR:
                                //worst-case correction LASTFOURROWS / 
LASTFOURCOLUMNS
-                               if (DMLScript.USE_ACCELERATOR) {
+                               if (isGPUEnabled()) {
                                        // The GPU implementation only operates 
on dense data
                                        // It allocates 2 dense blocks to help 
with these ops:
                                        // Assume Y = var(X) Or colVars(X), Or 
rowVars(X)
@@ -506,7 +515,7 @@ public class AggUnaryOp extends Hop implements 
MultiThreadedHop
                        return SparkAggType.MULTI_BLOCK;
        }
 
-       private boolean isTernaryAggregateRewriteApplicable(ExecType et) 
+       private boolean isTernaryAggregateRewriteApplicable() 
                throws HopsException 
        {
                boolean ret = false;
@@ -726,6 +735,8 @@ public class AggUnaryOp extends Hop implements 
MultiThreadedHop
                // The execution type of a unary aggregate instruction should 
depend on the execution type of inputs to avoid OOM
                // Since we only support matrix-vector and not vector-matrix, 
checking the execution type of input1 should suffice.
                ExecType et_input = input1.optFindExecType();
+               // Because ternary aggregate are not supported on GPU
+               et_input = et_input == ExecType.GPU ? ExecType.CP :  et_input;
                DirectionTypes dir = HopsDirection2Lops.get(_direction);
                
                return new TernaryAggregate(in1, in2, in3, 
Aggregate.OperationTypes.KahanSum, 

http://git-wip-us.apache.org/repos/asf/systemml/blob/3ca05353/src/main/java/org/apache/sysml/hops/BinaryOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/BinaryOp.java 
b/src/main/java/org/apache/sysml/hops/BinaryOp.java
index 54c06f7..ad9f0ad 100644
--- a/src/main/java/org/apache/sysml/hops/BinaryOp.java
+++ b/src/main/java/org/apache/sysml/hops/BinaryOp.java
@@ -53,7 +53,6 @@ import org.apache.sysml.lops.UnaryCP;
 import org.apache.sysml.parser.Expression.DataType;
 import org.apache.sysml.parser.Expression.ValueType;
 import 
org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat;
-import org.apache.sysml.runtime.instructions.gpu.context.GPUContextPool;
 import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
 import org.apache.sysml.runtime.matrix.mapred.DistributedCacheInput;
 
@@ -134,6 +133,56 @@ public class BinaryOp extends Hop
        }
        
        @Override
+       public boolean isGPUEnabled() {
+               if(!DMLScript.USE_ACCELERATOR)
+                       return false;
+               
+               switch(op) 
+               {
+                       case IQM:
+                       case CENTRALMOMENT:
+                       case COVARIANCE:
+                       case QUANTILE:
+                       case INTERQUANTILE:
+                       case MEDIAN:
+                               return false;
+                       case CBIND: 
+                       case RBIND: {
+                               DataType dt1 = getInput().get(0).getDataType();
+                               return dt1 == DataType.MATRIX; // only matrix 
cbind, rbind supported on GPU
+                       }
+                       default: {
+                               DataType dt1 = getInput().get(0).getDataType();
+                               DataType dt2 = getInput().get(1).getDataType();
+                               
+                               boolean isMatrixScalar = (dt1 == 
DataType.MATRIX && dt2 == DataType.SCALAR) || (dt1 == DataType.SCALAR && dt2 == 
DataType.MATRIX);
+                               boolean isMatrixMatrix = (dt1 == 
DataType.MATRIX && dt2 == DataType.MATRIX);
+                               
+                               OpOp2 [] supportedOps = { OpOp2.MULT, 
OpOp2.PLUS, OpOp2.MINUS, OpOp2.DIV, OpOp2.POW, OpOp2.MINUS1_MULT, 
+                                               OpOp2.MODULUS, OpOp2.INTDIV, 
OpOp2.LESS, OpOp2.LESSEQUAL, OpOp2.EQUAL, OpOp2.NOTEQUAL, OpOp2.GREATER, 
OpOp2.GREATEREQUAL};
+                       
+                               if(isMatrixScalar && op == OpOp2.MINUS_NZ) {
+                                       // Only supported for matrix scalar:
+                                       return true;
+                               }
+                               else if(isMatrixMatrix && op == OpOp2.SOLVE) {
+                                       // Only supported for matrix matrix:
+                                       return true;
+                               }
+                               else if(isMatrixScalar || isMatrixMatrix) {
+                                       for(OpOp2 supportedOp : supportedOps) {
+                                               if(op == supportedOp)
+                                                       return true;
+                                       }
+                                       return false;
+                               }
+                               else
+                                       return false;
+                       }
+               }
+       }
+       
+       @Override
        public Lop constructLops() 
                throws HopsException, LopsException 
        {       
@@ -527,11 +576,6 @@ public class BinaryOp extends Hop
                        }
                        else //CP
                        {
-                               if (DMLScript.USE_ACCELERATOR && dt1 == 
DataType.MATRIX && (DMLScript.FORCE_ACCELERATOR
-                                               || getMemEstimate() < 
GPUContextPool.initialGPUMemBudget())) {
-                                       et = ExecType.GPU;
-                               }
-
                                Lop offset = createOffsetLop( 
getInput().get(0), cbind ); //offset 1st input
                                append = new 
Append(getInput().get(0).constructLops(), getInput().get(1).constructLops(), 
offset, getDataType(), getValueType(), cbind, et);
                                
append.getOutputParameters().setDimensions(rlen, clen, getRowsInBlock(), 
getColsInBlock(), getNnz());
@@ -582,14 +626,6 @@ public class BinaryOp extends Hop
                        else //general case
                                ot = HopsOpOp2LopsU.get(op);
 
-                       if (DMLScript.USE_ACCELERATOR && 
(DMLScript.FORCE_ACCELERATOR
-                                       || getMemEstimate() < 
Math.min(GPUContextPool.initialGPUMemBudget(), 
OptimizerUtils.getLocalMemBudget()))
-                                       && (op == OpOp2.MULT || op == 
OpOp2.PLUS || op == OpOp2.MINUS || op == OpOp2.DIV || op == OpOp2.POW
-                                       || op == OpOp2.MINUS_NZ || op == 
OpOp2.MINUS1_MULT || op == OpOp2.MODULUS || op == OpOp2.INTDIV
-                                       || op == OpOp2.LESS || op == 
OpOp2.LESSEQUAL || op == OpOp2.EQUAL || op == OpOp2.NOTEQUAL
-                                       || op == OpOp2.GREATER || op == 
OpOp2.GREATEREQUAL)) {
-                               et = ExecType.GPU;
-                       }
                        Unary unary1 = new 
Unary(getInput().get(0).constructLops(),
                                                   
getInput().get(1).constructLops(), ot, getDataType(), getValueType(), et);
                
@@ -602,17 +638,8 @@ public class BinaryOp extends Hop
                {
                        // Both operands are Matrixes
                        ExecType et = optFindExecType();
-                       if ( et == ExecType.CP ) 
+                       if ( et == ExecType.CP || et == ExecType.GPU ) 
                        {
-                               if(DMLScript.USE_ACCELERATOR && 
(DMLScript.FORCE_ACCELERATOR
-                                               || getMemEstimate() < 
Math.min(GPUContextPool.initialGPUMemBudget(), 
OptimizerUtils.getLocalMemBudget()))
-                                               && (op == OpOp2.MULT || op == 
OpOp2.PLUS || op == OpOp2.MINUS || op == OpOp2.DIV || op == OpOp2.POW
-                                               || op == OpOp2.SOLVE || op == 
OpOp2.MINUS1_MULT || op == OpOp2.MODULUS || op == OpOp2.INTDIV
-                                               || op == OpOp2.LESS || op == 
OpOp2.LESSEQUAL || op == OpOp2.EQUAL || op == OpOp2.NOTEQUAL
-                                               || op == OpOp2.GREATER || op == 
OpOp2.GREATEREQUAL)) {
-                                       et = ExecType.GPU;
-                               }
-                               
                                Lop binary = null;
                                
                                boolean isLeftXGt = (getInput().get(0) 
instanceof BinaryOp) && ((BinaryOp) getInput().get(0)).getOp() == OpOp2.GREATER;
@@ -827,7 +854,7 @@ public class BinaryOp extends Hop
                        ret = getInput().get(0).getMemEstimate() * 3; 
                }
                else if ( op == OpOp2.SOLVE ) {
-                       if (DMLScript.USE_ACCELERATOR) {
+                       if (isGPUEnabled()) {
                                // Solve on the GPU takes an awful lot of 
intermediate space
                                // First the inputs are converted from 
row-major to column major
                                // Then a workspace and a temporary output 
(workSize, tauSize) are needed

http://git-wip-us.apache.org/repos/asf/systemml/blob/3ca05353/src/main/java/org/apache/sysml/hops/ConvolutionOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/ConvolutionOp.java 
b/src/main/java/org/apache/sysml/hops/ConvolutionOp.java
index a3d8a81..2b9335c 100644
--- a/src/main/java/org/apache/sysml/hops/ConvolutionOp.java
+++ b/src/main/java/org/apache/sysml/hops/ConvolutionOp.java
@@ -19,6 +19,7 @@
 
 package org.apache.sysml.hops;
 
+import org.apache.sysml.api.DMLScript;
 import org.apache.sysml.hops.Hop.MultiThreadedHop;
 import org.apache.sysml.lops.ConvolutionTransform;
 import org.apache.sysml.lops.ConvolutionTransform.OperationTypes;
@@ -79,6 +80,13 @@ public class ConvolutionOp extends Hop  implements 
MultiThreadedHop
        }
        
        @Override
+       public boolean isGPUEnabled() {
+               if(!DMLScript.USE_ACCELERATOR)
+                       return false;
+               return true;
+       }
+       
+       @Override
        public Lop constructLops()
                throws HopsException, LopsException 
        {
@@ -315,12 +323,12 @@ public class ConvolutionOp extends Hop  implements 
MultiThreadedHop
                
                if( _etypeForced != null )                      
                {
-                       _etype = findGPUExecTypeByMemEstimate(_etypeForced);
+                       _etype = _etypeForced;
                }
                else 
                {       
                        if ( OptimizerUtils.isMemoryBasedOptLevel() ) {
-                               _etype = 
findGPUExecTypeByMemEstimate(findExecTypeByMemEstimate());
+                               _etype = findExecTypeByMemEstimate();
                        }
                        else {
                                _etype = REMOTE;

http://git-wip-us.apache.org/repos/asf/systemml/blob/3ca05353/src/main/java/org/apache/sysml/hops/DataGenOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/DataGenOp.java 
b/src/main/java/org/apache/sysml/hops/DataGenOp.java
index ce08dbc..89a5814 100644
--- a/src/main/java/org/apache/sysml/hops/DataGenOp.java
+++ b/src/main/java/org/apache/sysml/hops/DataGenOp.java
@@ -146,6 +146,11 @@ public class DataGenOp extends Hop implements 
MultiThreadedHop
        }
        
        @Override
+       public boolean isGPUEnabled() {
+               return false;
+       }
+       
+       @Override
        public Lop constructLops() 
                throws HopsException, LopsException
        {
@@ -502,4 +507,5 @@ public class DataGenOp extends Hop implements 
MultiThreadedHop
                
                return ret;
        }
+
 }

http://git-wip-us.apache.org/repos/asf/systemml/blob/3ca05353/src/main/java/org/apache/sysml/hops/DataOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/DataOp.java 
b/src/main/java/org/apache/sysml/hops/DataOp.java
index bcded04..f410210 100644
--- a/src/main/java/org/apache/sysml/hops/DataOp.java
+++ b/src/main/java/org/apache/sysml/hops/DataOp.java
@@ -241,6 +241,11 @@ public class DataOp extends Hop
        }
        
        @Override
+       public boolean isGPUEnabled() {
+               return false;
+       }
+       
+       @Override
        public Lop constructLops()
                        throws HopsException, LopsException 
        {       

http://git-wip-us.apache.org/repos/asf/systemml/blob/3ca05353/src/main/java/org/apache/sysml/hops/FunctionOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/FunctionOp.java 
b/src/main/java/org/apache/sysml/hops/FunctionOp.java
index c677bb8..3ad2d15 100644
--- a/src/main/java/org/apache/sysml/hops/FunctionOp.java
+++ b/src/main/java/org/apache/sysml/hops/FunctionOp.java
@@ -209,6 +209,11 @@ public class FunctionOp extends Hop
        }
        
        @Override
+       public boolean isGPUEnabled() {
+               return false;
+       }
+       
+       @Override
        public Lop constructLops() 
                throws HopsException, LopsException 
        {

http://git-wip-us.apache.org/repos/asf/systemml/blob/3ca05353/src/main/java/org/apache/sysml/hops/Hop.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/Hop.java 
b/src/main/java/org/apache/sysml/hops/Hop.java
index bfbdbaf..1cf875f 100644
--- a/src/main/java/org/apache/sysml/hops/Hop.java
+++ b/src/main/java/org/apache/sysml/hops/Hop.java
@@ -192,7 +192,9 @@ public abstract class Hop
        
        public void checkAndSetForcedPlatform()
        {
-               if ( DMLScript.rtplatform == RUNTIME_PLATFORM.SINGLE_NODE )
+               if(DMLScript.USE_ACCELERATOR && DMLScript.FORCE_ACCELERATOR && 
isGPUEnabled())
+                       _etypeForced = ExecType.GPU;
+               else if ( DMLScript.rtplatform == RUNTIME_PLATFORM.SINGLE_NODE )
                        _etypeForced = ExecType.CP;
                else if ( DMLScript.rtplatform == RUNTIME_PLATFORM.HADOOP )
                        _etypeForced = ExecType.MR;
@@ -768,8 +770,12 @@ public abstract class Hop
        protected ExecType findExecTypeByMemEstimate() {
                ExecType et = null;
                char c = ' ';
-               if ( getMemEstimate() < OptimizerUtils.getLocalMemBudget() ) {
-                       et = ExecType.CP;
+               double memEst = getMemEstimate();
+               if ( memEst < OptimizerUtils.getLocalMemBudget() ) {
+                       if (DMLScript.USE_ACCELERATOR && isGPUEnabled() && 
memEst < GPUContextPool.initialGPUMemBudget())
+                               et = ExecType.GPU;
+                       else
+                               et = ExecType.CP;
                }
                else {
                        if( DMLScript.rtplatform == 
DMLScript.RUNTIME_PLATFORM.HYBRID )
@@ -788,14 +794,6 @@ public abstract class Hop
                
                return et;
        }
-       
-       protected ExecType findGPUExecTypeByMemEstimate(ExecType et) {
-               if (DMLScript.USE_ACCELERATOR && (DMLScript.FORCE_ACCELERATOR
-                               || getMemEstimate() < 
Math.min(GPUContextPool.initialGPUMemBudget(), 
OptimizerUtils.getLocalMemBudget()))) {
-                       return ExecType.GPU;
-               }
-               return et;
-       }
 
        public ArrayList<Hop> getParent() {
                return _parent;
@@ -850,6 +848,18 @@ public abstract class Hop
        
        public abstract String getOpString();
 
+       /**
+        * In memory-based optimizer mode (see 
OptimizerUtils.isMemoryBasedOptLevel()), 
+        * the exectype is determined by checking this method as well as memory 
budget of this Hop. 
+        * Please see findExecTypeByMemEstimate for more detail. 
+        * 
+        * This method is necessary because not all operator are supported 
efficiently
+        * on GPU (for example: operations on frames and scalar as well as 
operations such as table). 
+        * 
+        * @return true if the Hop is eligible for GPU Exectype.
+        */
+       public abstract boolean isGPUEnabled();
+       
        protected boolean isVector() {
                return (dimsKnown() && (_dim1 == 1 || _dim2 == 1) );
        }

http://git-wip-us.apache.org/repos/asf/systemml/blob/3ca05353/src/main/java/org/apache/sysml/hops/IndexingOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/IndexingOp.java 
b/src/main/java/org/apache/sysml/hops/IndexingOp.java
index 5a27ed6..5f2ce34 100644
--- a/src/main/java/org/apache/sysml/hops/IndexingOp.java
+++ b/src/main/java/org/apache/sysml/hops/IndexingOp.java
@@ -94,6 +94,11 @@ public class IndexingOp extends Hop
        public void setColLowerEqualsUpper(boolean passed) {
                _colLowerEqualsUpper = passed;
        }
+       
+       @Override
+       public boolean isGPUEnabled() {
+               return false;
+       }
 
        @Override
        public Lop constructLops()

http://git-wip-us.apache.org/repos/asf/systemml/blob/3ca05353/src/main/java/org/apache/sysml/hops/LeftIndexingOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/LeftIndexingOp.java 
b/src/main/java/org/apache/sysml/hops/LeftIndexingOp.java
index a641622..02e7753 100644
--- a/src/main/java/org/apache/sysml/hops/LeftIndexingOp.java
+++ b/src/main/java/org/apache/sysml/hops/LeftIndexingOp.java
@@ -99,6 +99,11 @@ public class LeftIndexingOp  extends Hop
        }
        
        @Override
+       public boolean isGPUEnabled() {
+               return false;
+       }
+       
+       @Override
        public Lop constructLops()
                throws HopsException, LopsException 
        {                       

http://git-wip-us.apache.org/repos/asf/systemml/blob/3ca05353/src/main/java/org/apache/sysml/hops/LiteralOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/LiteralOp.java 
b/src/main/java/org/apache/sysml/hops/LiteralOp.java
index b96d032..16ebf1b 100644
--- a/src/main/java/org/apache/sysml/hops/LiteralOp.java
+++ b/src/main/java/org/apache/sysml/hops/LiteralOp.java
@@ -73,6 +73,11 @@ public class LiteralOp extends Hop
        public void checkArity() throws HopsException {
                HopsException.check(_input.isEmpty(), this, "should have 0 
inputs but has %d inputs", _input.size());
        }
+       
+       @Override
+       public boolean isGPUEnabled() {
+               return false;
+       }
 
        @Override
        public Lop constructLops()

http://git-wip-us.apache.org/repos/asf/systemml/blob/3ca05353/src/main/java/org/apache/sysml/hops/MultipleOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/MultipleOp.java 
b/src/main/java/org/apache/sysml/hops/MultipleOp.java
index 5fb6b29..5c178c0 100644
--- a/src/main/java/org/apache/sysml/hops/MultipleOp.java
+++ b/src/main/java/org/apache/sysml/hops/MultipleOp.java
@@ -80,6 +80,11 @@ public class MultipleOp extends Hop {
        public String getOpString() {
                return "m(" + _op.name().toLowerCase() + ")";
        }
+       
+       @Override
+       public boolean isGPUEnabled() {
+               return false;
+       }
 
        /**
         * Construct the corresponding Lops for this Hop

http://git-wip-us.apache.org/repos/asf/systemml/blob/3ca05353/src/main/java/org/apache/sysml/hops/ParameterizedBuiltinOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/ParameterizedBuiltinOp.java 
b/src/main/java/org/apache/sysml/hops/ParameterizedBuiltinOp.java
index ab276d7..a611893 100644
--- a/src/main/java/org/apache/sysml/hops/ParameterizedBuiltinOp.java
+++ b/src/main/java/org/apache/sysml/hops/ParameterizedBuiltinOp.java
@@ -175,6 +175,11 @@ public class ParameterizedBuiltinOp extends Hop implements 
MultiThreadedHop
        }
        
        @Override
+       public boolean isGPUEnabled() {
+               return false;
+       }
+       
+       @Override
        public Lop constructLops() 
                throws HopsException, LopsException 
        {               

http://git-wip-us.apache.org/repos/asf/systemml/blob/3ca05353/src/main/java/org/apache/sysml/hops/QuaternaryOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/QuaternaryOp.java 
b/src/main/java/org/apache/sysml/hops/QuaternaryOp.java
index 6517de6..17188be 100644
--- a/src/main/java/org/apache/sysml/hops/QuaternaryOp.java
+++ b/src/main/java/org/apache/sysml/hops/QuaternaryOp.java
@@ -189,6 +189,11 @@ public class QuaternaryOp extends Hop implements 
MultiThreadedHop
        }
        
        @Override
+       public boolean isGPUEnabled() {
+               return false;
+       }
+       
+       @Override
        public Lop constructLops() 
                throws HopsException, LopsException 
        {       

http://git-wip-us.apache.org/repos/asf/systemml/blob/3ca05353/src/main/java/org/apache/sysml/hops/ReorgOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/ReorgOp.java 
b/src/main/java/org/apache/sysml/hops/ReorgOp.java
index 3e27eb3..f0560d3 100644
--- a/src/main/java/org/apache/sysml/hops/ReorgOp.java
+++ b/src/main/java/org/apache/sysml/hops/ReorgOp.java
@@ -34,7 +34,6 @@ import org.apache.sysml.lops.LopProperties.ExecType;
 import org.apache.sysml.lops.Transform.OperationTypes;
 import org.apache.sysml.parser.Expression.DataType;
 import org.apache.sysml.parser.Expression.ValueType;
-import org.apache.sysml.runtime.instructions.gpu.context.GPUContextPool;
 import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
 
 /**
@@ -129,6 +128,35 @@ public class ReorgOp extends Hop implements 
MultiThreadedHop
                s += "r(" + HopsTransf2String.get(op) + ")";
                return s;
        }
+       
+       @Override
+       public boolean isGPUEnabled() {
+               if(!DMLScript.USE_ACCELERATOR)
+                       return false;
+               switch( op ) {
+                       case TRANSPOSE: {
+                               Lop lin;
+                               try {
+                                       lin = getInput().get(0).constructLops();
+                               } catch (HopsException | LopsException e) {
+                                       throw new RuntimeException("Unable to 
create child lop", e);
+                               }
+                               if( lin instanceof Transform && 
((Transform)lin).getOperationType()==OperationTypes.Transpose )
+                                       return false; //if input is already a 
transpose, avoid redundant transpose ops
+                               else if( getDim1()==1 && getDim2()==1 )
+                                       return false; //if input of size 1x1, 
avoid unnecessary transpose
+                               else
+                                       return true;
+                       }
+                       case DIAG:
+                       case REV:
+                       case RESHAPE:
+                       case SORT:
+                               return false;
+                       default:
+                               throw new RuntimeException("Unsupported 
operator:" + op.name());
+               }
+       }
 
        @Override
        public Lop constructLops()
@@ -151,10 +179,6 @@ public class ReorgOp extends Hop implements 
MultiThreadedHop
                                        setLops(lin); //if input of size 1x1, 
avoid unnecessary transpose
                                else { //general case
                                        int k = 
OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);
-                                       if (DMLScript.USE_ACCELERATOR && 
(DMLScript.FORCE_ACCELERATOR
-                                                       || getMemEstimate() < 
Math.min(GPUContextPool.initialGPUMemBudget(), 
OptimizerUtils.getLocalMemBudget()))) {
-                                               et = ExecType.GPU;
-                                       }
                                        Transform transform1 = new Transform( 
lin, 
                                                        
HopsTransf2Lops.get(op), getDataType(), getValueType(), et, k);
                                        setOutputDimensions(transform1);

http://git-wip-us.apache.org/repos/asf/systemml/blob/3ca05353/src/main/java/org/apache/sysml/hops/TernaryOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/TernaryOp.java 
b/src/main/java/org/apache/sysml/hops/TernaryOp.java
index 98c8ad3..47b012e 100644
--- a/src/main/java/org/apache/sysml/hops/TernaryOp.java
+++ b/src/main/java/org/apache/sysml/hops/TernaryOp.java
@@ -42,7 +42,6 @@ import 
org.apache.sysml.lops.PartialAggregate.CorrectionLocationType;
 import org.apache.sysml.parser.Statement;
 import org.apache.sysml.parser.Expression.DataType;
 import org.apache.sysml.parser.Expression.ValueType;
-import org.apache.sysml.runtime.instructions.gpu.context.GPUContextPool;
 import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
 
 /** Primary use cases for now, are
@@ -128,6 +127,25 @@ public class TernaryOp extends Hop
        }
        
        @Override
+       public boolean isGPUEnabled() {
+               if(!DMLScript.USE_ACCELERATOR)
+                       return false;
+               switch( _op ) {
+                       case CENTRALMOMENT:
+                       case COVARIANCE:
+                       case CTABLE:
+                       case INTERQUANTILE:
+                       case QUANTILE:
+                               return false;
+                       case MINUS_MULT:
+                       case PLUS_MULT:
+                               return true;
+                       default:
+                               throw new RuntimeException("Unsupported 
operator:" + _op.name());
+               }
+       }
+       
+       @Override
        public Lop constructLops() 
                throws HopsException, LopsException 
        {       
@@ -631,13 +649,7 @@ public class TernaryOp extends Hop
                if ( _op != OpOp3.PLUS_MULT && _op != OpOp3.MINUS_MULT )
                        throw new HopsException("Unexpected operation: " + _op 
+ ", expecting " + OpOp3.PLUS_MULT + " or" +  OpOp3.MINUS_MULT);
                
-               ExecType et = null;
-               if (DMLScript.USE_ACCELERATOR && (DMLScript.FORCE_ACCELERATOR
-                               || getMemEstimate() < 
Math.min(GPUContextPool.initialGPUMemBudget(), 
OptimizerUtils.getLocalMemBudget()))) {
-                       et = ExecType.GPU;
-               } else {
-                       et = optFindExecType();
-               }
+               ExecType et = optFindExecType();
                PlusMult plusmult = null;
                
                if( et == ExecType.CP || et == ExecType.SPARK || et == 
ExecType.GPU ) {
@@ -711,7 +723,7 @@ public class TernaryOp extends Hop
                                return 
OptimizerUtils.estimateSizeExactSparsity(dim1, dim2, 1.0);
                        case PLUS_MULT:
                        case MINUS_MULT: {
-                               if (DMLScript.USE_ACCELERATOR) {
+                               if (isGPUEnabled()) {
                                        // For the GPU, the input is converted 
to dense
                                        sparsity = 1.0;
                                } else {

http://git-wip-us.apache.org/repos/asf/systemml/blob/3ca05353/src/main/java/org/apache/sysml/hops/UnaryOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/UnaryOp.java 
b/src/main/java/org/apache/sysml/hops/UnaryOp.java
index 2b31247..0a5bc65 100644
--- a/src/main/java/org/apache/sysml/hops/UnaryOp.java
+++ b/src/main/java/org/apache/sysml/hops/UnaryOp.java
@@ -99,6 +99,29 @@ public class UnaryOp extends Hop implements MultiThreadedHop
        }
        
        @Override
+       public boolean isGPUEnabled() {
+               if(!DMLScript.USE_ACCELERATOR)
+                       return false;
+               boolean isScalar = (    getDataType() == DataType.SCALAR 
//value type casts or matrix to scalar
+                               || (_op == OpOp1.CAST_AS_MATRIX && 
getInput().get(0).getDataType()==DataType.SCALAR)
+                               || (_op == OpOp1.CAST_AS_FRAME && 
getInput().get(0).getDataType()==DataType.SCALAR));
+               if(!isScalar) {
+                       switch(_op) {
+                               case SELP:case EXP:case SQRT:case LOG:case ABS:
+                               case ROUND:case FLOOR:case CEIL:
+                               case SIN:case COS: case TAN:case ASIN:case 
ACOS:case ATAN:
+                               case SIGN:
+                                       return true;
+                               default:
+                                       return false;
+                       }
+               }
+               else  {
+                       return false;
+               }
+       }
+       
+       @Override
        public Lop constructLops()
                throws HopsException, LopsException 
        {               
@@ -149,7 +172,7 @@ public class UnaryOp extends Hop implements MultiThreadedHop
                                ExecType et = optFindExecType();
                                
                                //special handling cumsum/cumprod/cummin/cumsum
-                               if( isCumulativeUnaryOperation() && et != 
ExecType.CP )  
+                               if( isCumulativeUnaryOperation() && !(et == 
ExecType.CP || et == ExecType.GPU) )  
                                {
                                        //TODO additional physical operation if 
offsets fit in memory
                                        Lop cumsumLop = null;
@@ -162,15 +185,6 @@ public class UnaryOp extends Hop implements 
MultiThreadedHop
                                else //default unary 
                                {
                                        int k = isCumulativeUnaryOperation() ? 
OptimizerUtils.getConstrainedNumThreads( _maxNumThreads ) : 1;
-                                       switch(_op) {
-                                               case SELP:case EXP:case 
SQRT:case LOG:case ABS:
-                                               case ROUND:case FLOOR:case CEIL:
-                                               case SIN:case COS: case 
TAN:case ASIN:case ACOS:case ATAN:
-                                               case SIGN:
-                                                       et = 
findGPUExecTypeByMemEstimate(et);
-                                                       break;
-                                               default:
-                                       }
                                        Unary unary1 = new 
Unary(input.constructLops(), HopsOpOp1LopsU.get(_op), 
                                                                         
getDataType(), getValueType(), et, k);
                                        setOutputDimensions(unary1);
@@ -550,7 +564,7 @@ public class UnaryOp extends Hop implements MultiThreadedHop
        protected double computeOutputMemEstimate( long dim1, long dim2, long 
nnz )
        {
                double sparsity = -1;
-               if (DMLScript.USE_ACCELERATOR) {
+               if (isGPUEnabled()) {
                        sparsity = 1.0; // Output is always dense (for now) on 
the GPU
                } else {
                        sparsity = OptimizerUtils.getSparsity(dim1, dim2, nnz);
@@ -569,7 +583,7 @@ public class UnaryOp extends Hop implements MultiThreadedHop
                        ret = getInput().get(0).getMemEstimate() * 3; 
                }
 
-               if (DMLScript.USE_ACCELERATOR) {
+               if (isGPUEnabled()) {
                        OptimizerUtils.estimateSize(dim1, dim2); // 
Intermediate memory required to convert sparse to dense
                }
                

http://git-wip-us.apache.org/repos/asf/systemml/blob/3ca05353/src/main/java/org/apache/sysml/hops/codegen/SpoofFusedOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/SpoofFusedOp.java 
b/src/main/java/org/apache/sysml/hops/codegen/SpoofFusedOp.java
index 0d4b8db..247a142 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/SpoofFusedOp.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/SpoofFusedOp.java
@@ -283,4 +283,9 @@ public class SpoofFusedOp extends Hop implements 
MultiThreadedHop
                
                return ret;
        }
+
+       @Override
+       public boolean isGPUEnabled() {
+               return false;
+       }
 }

Reply via email to