This is an automated email from the ASF dual-hosted git repository. niketanpansare pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/systemml.git
The following commit(s) were added to refs/heads/master by this push: new fbd3aab [SYSTEMML-540] Integrate the lstm builtin function in Keras2DML fbd3aab is described below commit fbd3aabbda8027e34744ad97a81f1376cf5f2041 Author: Niketan Pansare <npan...@us.ibm.com> AuthorDate: Wed Mar 20 10:54:48 2019 -0700 [SYSTEMML-540] Integrate the lstm builtin function in Keras2DML - Also, migrated the builtin function layer from staging to nn. - Updated the GPU tests. --- scripts/nn/layers/conv2d.dml | 2 ++ scripts/nn/layers/lstm.dml | 2 ++ .../nn/layers/{lstm_staging.dml => lstm_builtin.dml} | 4 ++-- scripts/nn/layers/max_pool2d.dml | 2 ++ src/main/python/systemml/mllearn/estimators.py | 5 ++++- .../scala/org/apache/sysml/api/dl/Caffe2DML.scala | 4 ++++ .../scala/org/apache/sysml/api/dl/CaffeLayer.scala | 19 +++++++++++++++---- .../java/org/apache/sysml/test/gpu/LstmCPUTest.java | 2 +- src/test/java/org/apache/sysml/test/gpu/LstmTest.java | 2 +- 9 files changed, 33 insertions(+), 9 deletions(-) diff --git a/scripts/nn/layers/conv2d.dml b/scripts/nn/layers/conv2d.dml index 49d887b..de40668 100644 --- a/scripts/nn/layers/conv2d.dml +++ b/scripts/nn/layers/conv2d.dml @@ -21,6 +21,8 @@ /* * 2D Convolutional layer. + * + * Consider using conv2d_builtin.dml for better performance. */ source("nn/util.dml") as util diff --git a/scripts/nn/layers/lstm.dml b/scripts/nn/layers/lstm.dml index cd1557d..838cc44 100644 --- a/scripts/nn/layers/lstm.dml +++ b/scripts/nn/layers/lstm.dml @@ -21,6 +21,8 @@ /* * LSTM layer. + * + * Consider using lstm_builtin.dml for better performance. */ source("nn/layers/sigmoid.dml") as sigmoid source("nn/layers/tanh.dml") as tanh diff --git a/scripts/nn/layers/lstm_staging.dml b/scripts/nn/layers/lstm_builtin.dml similarity index 98% rename from scripts/nn/layers/lstm_staging.dml rename to scripts/nn/layers/lstm_builtin.dml index f1934da..95661f8 100644 --- a/scripts/nn/layers/lstm_staging.dml +++ b/scripts/nn/layers/lstm_builtin.dml @@ -21,9 +21,9 @@ /* * LSTM layer. + * + * This implementation uses a built-in operator for higher performance. */ -source("nn/layers/sigmoid.dml") as sigmoid -source("nn/layers/tanh.dml") as tanh forward = function(matrix[double] X, matrix[double] W, matrix[double] b, boolean return_sequences, matrix[double] out0, matrix[double] c0) diff --git a/scripts/nn/layers/max_pool2d.dml b/scripts/nn/layers/max_pool2d.dml index fba1a4c..ee57141 100644 --- a/scripts/nn/layers/max_pool2d.dml +++ b/scripts/nn/layers/max_pool2d.dml @@ -21,6 +21,8 @@ /* * Max Pooling layer. + * + * Consider using max_pool2d_builtin.dml for better performance. */ source("nn/util.dml") as util diff --git a/src/main/python/systemml/mllearn/estimators.py b/src/main/python/systemml/mllearn/estimators.py index 144cf66..d6aa8e8 100644 --- a/src/main/python/systemml/mllearn/estimators.py +++ b/src/main/python/systemml/mllearn/estimators.py @@ -924,7 +924,7 @@ class Caffe2DML(BaseSystemMLClassifier): self.estimator.setWeightsToIgnore(ignore_weights) def set(self, debug=None, train_algo=None, test_algo=None, parallel_batches=None, - output_activations=None, perform_one_hot_encoding=None, parfor_parameters=None, inline_nn_library=None): + output_activations=None, perform_one_hot_encoding=None, parfor_parameters=None, inline_nn_library=None, use_builtin_lstm_fn=None): """ Set input to Caffe2DML @@ -938,6 +938,7 @@ class Caffe2DML(BaseSystemMLClassifier): perform_one_hot_encoding: should perform one-hot encoding in DML using table function (default: False) parfor_parameters: dictionary for parfor parameters when using allreduce-style algorithms (default: "") inline_nn_library: whether to inline the NN library when generating DML using Caffe2DML (default: False) + use_builtin_lstm_fn: whether to use builtin lstm function for LSTM layer (default: True) """ if debug is not None: self.estimator.setInput("$debug", str(debug).upper()) @@ -949,6 +950,8 @@ class Caffe2DML(BaseSystemMLClassifier): self.estimator.setInput("$test_algo", str(test_algo).lower()) if parallel_batches is not None: self.estimator.setInput("$parallel_batches", str(parallel_batches)) + if use_builtin_lstm_fn is not None: + self.estimator.setInput("$use_builtin_lstm_fn", str(use_builtin_lstm_fn).upper()) if output_activations is not None: self.estimator.setInput( "$output_activations", diff --git a/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala b/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala index 13f8a65..e480dfc 100644 --- a/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala +++ b/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala @@ -303,6 +303,10 @@ class Caffe2DML(val sc: SparkContext, def setDebugFlags(isDebug:Boolean):Unit = { net.getLayers.map(layer => {net.getCaffeLayer(layer).debugLayer = isDebug}) net.getLayers.map(layer => {net.getCaffeLayer(layer).caffe2dmlObj = this}) + net.getLayers.filter(layer => net.getCaffeLayer(layer).isInstanceOf[LSTM]).map(layer => { + if (inputs.containsKey("$use_builtin_lstm_fn")) + net.getCaffeLayer(layer).asInstanceOf[LSTM].useBuiltinFunction(inputs.get("$use_builtin_lstm_fn").toLowerCase.toBoolean) + }) } // Comma is included diff --git a/src/main/scala/org/apache/sysml/api/dl/CaffeLayer.scala b/src/main/scala/org/apache/sysml/api/dl/CaffeLayer.scala index f405fb2..47920ca 100644 --- a/src/main/scala/org/apache/sysml/api/dl/CaffeLayer.scala +++ b/src/main/scala/org/apache/sysml/api/dl/CaffeLayer.scala @@ -986,6 +986,10 @@ class RNN(val param: LayerParameter, val id: Int, val net: CaffeNetwork) extends class LSTM(val param: LayerParameter, val id: Int, val net: CaffeNetwork) extends CaffeLayer with HasWeight with HasBias { val return_sequences = param.getRecurrentParam.getReturnSequences + var _useBuiltinFunction = true + def useBuiltinFunction(enabled:Boolean): Unit = { + _useBuiltinFunction = enabled + } // --------------------------------------------------------- // Note: since Caffe doesnot have return_sequences, number of output is same as number of neurons def M():String = param.getRecurrentParam.getNumOutput.toString @@ -994,7 +998,7 @@ class LSTM(val param: LayerParameter, val id: Int, val net: CaffeNetwork) extend def timesteps():String = bottomLayerOutputShape._1 def input_features():String = bottomLayerOutputShape._2 def output_features():Int = param.getRecurrentParam.getNumOutput - override def sourceFileName = "lstm" + override def sourceFileName = if(_useBuiltinFunction) "lstm_builtin" else "lstm" override def outputShape = if(return_sequences) (timesteps, output_features.toString, "1") else (output_features.toString, "1", "1") override def biasShape(): Array[Int] = Array(1, 4*M.toInt) override def weightShape(): Array[Int] = Array(input_features.toInt + M.toInt, 4*M.toInt) @@ -1009,17 +1013,24 @@ class LSTM(val param: LayerParameter, val id: Int, val net: CaffeNetwork) extend val N:String = null // output_features.toString val T = timesteps() val D = input_features() - invokeForward(dmlScript, List[String](out, c, cache_out, cache_c, cache_ifog), X, weight, bias, T, D, return_sequences.toString.toUpperCase, out0, c0) + if(_useBuiltinFunction) + invokeForward(dmlScript, List[String](out, c, cache_out), X, weight, bias, return_sequences.toString.toUpperCase, out0, c0) + else + invokeForward(dmlScript, List[String](out, c, cache_out, cache_c, cache_ifog), X, weight, bias, T, D, return_sequences.toString.toUpperCase, out0, c0) } override def backward(dmlScript: StringBuilder, outSuffix: String) = { val T = timesteps() val D = input_features() - invokeBackward(dmlScript, outSuffix, List[String]("dOut" + id, dWeight, dBias, dout0, dc0), dout, dc0, X, weight, bias, + if(_useBuiltinFunction) + invokeBackward(dmlScript, outSuffix, List[String]("dOut" + id, dWeight, dBias, dout0, dc0), dout, dc0, X, weight, bias, + T, D, return_sequences.toString.toUpperCase, out0, c0, cache_out) + else + invokeBackward(dmlScript, outSuffix, List[String]("dOut" + id, dWeight, dBias, dout0, dc0), dout, dc0, X, weight, bias, T, D, return_sequences.toString.toUpperCase, out0, c0, cache_out, cache_c, cache_ifog) } - val cache_out = "cache_out_" + id + def cache_out() = if(_useBuiltinFunction) ("lstm_state_" + id) else ("cache_out_" + id) val out0 = "out0_" + id val dout0 = "dout0_" + id val c0 = "cellState0_" + id diff --git a/src/test/java/org/apache/sysml/test/gpu/LstmCPUTest.java b/src/test/java/org/apache/sysml/test/gpu/LstmCPUTest.java index 828a809..faa014e 100644 --- a/src/test/java/org/apache/sysml/test/gpu/LstmCPUTest.java +++ b/src/test/java/org/apache/sysml/test/gpu/LstmCPUTest.java @@ -34,7 +34,7 @@ public class LstmCPUTest extends GPUTests { private final static String TEST_NAME = "LstmTests"; private final int seed = 42; - private final static String builtinDML = "\"nn/layers/lstm_staging.dml\""; + private final static String builtinDML = "\"nn/layers/lstm_builtin.dml\""; private final static String nnDML = "\"nn/layers/lstm.dml\""; @Override diff --git a/src/test/java/org/apache/sysml/test/gpu/LstmTest.java b/src/test/java/org/apache/sysml/test/gpu/LstmTest.java index 996b12a..ffc6099 100644 --- a/src/test/java/org/apache/sysml/test/gpu/LstmTest.java +++ b/src/test/java/org/apache/sysml/test/gpu/LstmTest.java @@ -36,7 +36,7 @@ public class LstmTest extends GPUTests { private final static String TEST_NAME = "LstmTests"; private final int seed = 42; - private final static String builtinDML = "\"nn/layers/lstm_staging.dml\""; + private final static String builtinDML = "\"nn/layers/lstm_builtin.dml\""; private final static String nnDML = "\"nn/layers/lstm.dml\""; @Override