Repository: incubator-systemml
Updated Branches:
  refs/heads/master 259742c40 -> 700b08094


[MINOR] Added documentation for Caffe2DML APIs.


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/700b0809
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/700b0809
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/700b0809

Branch: refs/heads/master
Commit: 700b080940bf68a71728be12ea4e24c2450e9d13
Parents: 259742c
Author: Niketan Pansare <npan...@us.ibm.com>
Authored: Sun May 21 13:24:15 2017 -0700
Committer: Niketan Pansare <npan...@us.ibm.com>
Committed: Sun May 21 13:25:29 2017 -0700

----------------------------------------------------------------------
 .../org/apache/sysml/api/dl/Caffe2DML.scala     | 318 +++++++++----------
 .../org/apache/sysml/api/dl/CaffeSolver.scala   |  96 +++++-
 .../org/apache/sysml/api/dl/DMLGenerator.scala  | 112 ++++++-
 3 files changed, 338 insertions(+), 188 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/700b0809/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala
----------------------------------------------------------------------
diff --git a/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala 
b/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala
index f7f85c3..fe6b159 100644
--- a/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala
+++ b/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala
@@ -52,6 +52,43 @@ import org.apache.commons.logging.LogFactory
 import 
org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer
 
 
+/***************************************************************************************
+DESIGN OF CAFFE2DML:
+
+1. Caffe2DML is designed to fit well into the mllearn framework. Hence, the 
key methods that needed to be implemented are:
+- `getTrainingScript` for the Estimator class. 
+- `getPredictionScript` for the Model class.
+
+2. To simplify the DML generation in getTrainingScript and getPredictionScript 
method, we use DMLGenerator interface.
+This interface generates DML string for common operations such as loops (such 
as if, for, while) as well as built-in functions (read, write), etc.
+Also, this interface helps in "code reading" of this class :)
+
+3. Additionally, we created mapping classes for layer, solver and learning 
rate that maps the corresponding Caffe abstraction to the SystemML-NN library.
+This greatly simplifies adding new layers into Caffe2DML:
+trait CaffeLayer {
+  // Any layer that wants to reuse SystemML-NN has to override following 
methods that help in generating the DML for the given layer:
+  def sourceFileName:String;
+  def init(dmlScript:StringBuilder):Unit;
+  def forward(dmlScript:StringBuilder, isPrediction:Boolean):Unit;
+  def backward(dmlScript:StringBuilder, outSuffix:String):Unit;
+  ...
+} 
+trait CaffeSolver {
+  def sourceFileName:String;
+  def update(dmlScript:StringBuilder, layer:CaffeLayer):Unit;
+  def init(dmlScript:StringBuilder, layer:CaffeLayer):Unit;
+}
+
+4. To simplify the traversal of the network, we created a Network interface:
+trait Network {
+  def getLayers(): List[String]
+  def getCaffeLayer(layerName:String):CaffeLayer
+  def getBottomLayers(layerName:String): Set[String]
+  def getTopLayers(layerName:String): Set[String]
+  def getLayerID(layerName:String): Int
+}
+***************************************************************************************/
+
 object Caffe2DML  {
   val LOG = LogFactory.getLog(classOf[Caffe2DML].getName()) 
   // ------------------------------------------------------------------------
@@ -62,7 +99,7 @@ object Caffe2DML  {
   val X = "X"; val y = "y"; val batchSize = "BATCH_SIZE"; val numImages = 
"num_images"; val numValidationImages = "num_validation"
   val XVal = "X_val"; val yVal = "y_val"
   
-  var USE_NESTEROV_UDF = {
+  val USE_NESTEROV_UDF = {
     // Developer environment variable flag 'USE_NESTEROV_UDF' until codegen 
starts working.
     // Then, we will remove this flag and also the class 
org.apache.sysml.udf.lib.SGDNesterovUpdate
     val envFlagNesterovUDF = System.getenv("USE_NESTEROV_UDF")
@@ -118,6 +155,93 @@ class Caffe2DML(val sc: SparkContext, val 
solverParam:Caffe.SolverParameter,
   // Method called by Python mllearn to visualize variable of certain layer
   def visualizeLayer(layerName:String, varType:String, aggFn:String): Unit = 
visualizeLayer(net, layerName, varType, aggFn)
   
+  // 
================================================================================================
+  // The below method parses the provided network and solver file and 
generates DML script.
+       def getTrainingScript(isSingleNode:Boolean):(Script, String, String)  = 
{
+         val startTrainingTime = System.nanoTime()
+         
+    reset                                 // Reset the state of DML generator 
for training script.
+    
+    // Flags passed by user
+         val DEBUG_TRAINING = if(inputs.containsKey("$debug")) 
inputs.get("$debug").toLowerCase.toBoolean else false
+         assign(tabDMLScript, "debug", if(DEBUG_TRAINING) "TRUE" else "FALSE")
+         
+         appendHeaders(net, solver, true)      // Appends DML corresponding to 
source and externalFunction statements.
+         readInputData(net, true)              // Read X_full and y_full
+         // Initialize the layers and solvers. Reads weights and bias if 
$weights is set.
+         initWeights(net, solver, inputs.containsKey("$weights"), 
layersToIgnore)
+         
+         // Split into training and validation set
+         // Initializes Caffe2DML.X, Caffe2DML.y, Caffe2DML.XVal, 
Caffe2DML.yVal and Caffe2DML.numImages
+         val shouldValidate = solverParam.getTestInterval > 0 && 
solverParam.getTestIterCount > 0 && solverParam.getTestIter(0) > 0
+         trainTestSplit(if(shouldValidate) solverParam.getTestIter(0) else 0)
+         
+         // Set iteration-related variables such as max_epochs, 
num_iters_per_epoch, lr, etc.
+         setIterationVariables
+         val lossLayers = getLossLayers(net)
+         // 
----------------------------------------------------------------------------
+         // Main logic
+         forBlock("e", "1", "max_epochs") {
+           solverParam.getTrainAlgo.toLowerCase match {
+             case "minibatch" => 
+               forBlock("i", "1", "num_iters_per_epoch") {
+                 getTrainingBatch(tabDMLScript)
+                 tabDMLScript.append("iter = start_iter + i\n")
+                 forward; backward; update
+                 displayLoss(lossLayers(0), shouldValidate)
+            performSnapshot
+               }
+             case "batch" => {
+          tabDMLScript.append("iter = start_iter + i\n")
+          forward; backward; update
+          displayLoss(lossLayers(0), shouldValidate)
+          performSnapshot
+             }
+             case "allreduce" => {
+               forBlock("i", "1", "num_iters_per_epoch") {
+                 getTrainingBatch(tabDMLScript)
+                 assign(tabDMLScript, "X_group_batch", "Xb")
+                 assign(tabDMLScript, "y_group_batch", "yb")
+                 tabDMLScript.append("iter = start_iter + i\n")
+                 initAggGradients
+                 parForBlock("j", "1", "nrow(y_group_batch)") {
+                   assign(tabDMLScript, "Xb", "X_group_batch[j,]")
+                   assign(tabDMLScript, "yb", "y_group_batch[j,]")
+                   forward; backward("_agg")
+              flattenAndStoreAggGradients_j
+                 }
+                 aggregateAggGradients
+            tabDMLScript.append("iter = start_iter + parallel_batches\n")    
+                 update
+            displayLoss(lossLayers(0), shouldValidate)
+            performSnapshot
+               }
+             }
+             case _ => throw new DMLRuntimeException("Unsupported train algo:" 
+ solverParam.getTrainAlgo)
+           }
+           // After every epoch, update the learning rate
+           tabDMLScript.append("# Learning rate\n")
+           lrPolicy.updateLearningRate(tabDMLScript)
+           tabDMLScript.append("start_iter = start_iter + 
num_iters_per_epoch\n")
+         }
+         // 
----------------------------------------------------------------------------
+         
+         // Check if this is necessary
+         if(doVisualize) tabDMLScript.append("print(" + 
asDMLString("Visualization counter:") + " + viz_counter)")
+         
+         val trainingScript = tabDMLScript.toString()
+         // Print script generation time and the DML script on stdout
+         System.out.println("Time taken to generate training script from Caffe 
proto: " + ((System.nanoTime() - startTrainingTime)*1e-9) + " seconds." )
+         if(DEBUG_TRAINING) Utils.prettyPrintDMLScript(trainingScript)
+         
+         // Set input/output variables and execute the script
+         val script = dml(trainingScript).in(inputs)
+         net.getLayers.map(net.getCaffeLayer(_)).filter(_.weight != 
null).map(l => script.out(l.weight))
+         net.getLayers.map(net.getCaffeLayer(_)).filter(_.bias != null).map(l 
=> script.out(l.bias))
+         (script, "X_full", "y_full")
+       }
+       // 
================================================================================================
+  
   // 
-------------------------------------------------------------------------------------------
   // Helper functions to generate DML
   // Initializes Caffe2DML.X, Caffe2DML.y, Caffe2DML.XVal, Caffe2DML.yVal and 
Caffe2DML.numImages
@@ -153,30 +277,6 @@ class Caffe2DML(val sc: SparkContext, val 
solverParam:Caffe.SolverParameter,
     }
   }
   
-  private def printClassificationReport():Unit = {
-    ifBlock("debug"){
-      assign(tabDMLScript, "num_rows_error_measures", min("10", ncol("yb")))
-      assign(tabDMLScript, "error_measures", matrix("0", 
"num_rows_error_measures", "5"))
-      forBlock("class_i", "1", "num_rows_error_measures") {
-        assign(tabDMLScript, "tp", "sum( (true_yb == predicted_yb) * (true_yb 
== class_i) )")
-        assign(tabDMLScript, "tp_plus_fp", "sum( (predicted_yb == class_i) )")
-        assign(tabDMLScript, "tp_plus_fn", "sum( (true_yb == class_i) )")
-        assign(tabDMLScript, "precision", "tp / tp_plus_fp")
-        assign(tabDMLScript, "recall", "tp / tp_plus_fn")
-        assign(tabDMLScript, "f1Score", "2*precision*recall / 
(precision+recall)")
-        assign(tabDMLScript, "error_measures[class_i,1]", "class_i")
-        assign(tabDMLScript, "error_measures[class_i,2]", "precision")
-        assign(tabDMLScript, "error_measures[class_i,3]", "recall")
-        assign(tabDMLScript, "error_measures[class_i,4]", "f1Score")
-        assign(tabDMLScript, "error_measures[class_i,5]", "tp_plus_fn")
-      }
-      val dmlTab = "\\t"
-      val header = "class    " + dmlTab + "precision" + dmlTab + "recall  " + 
dmlTab + "f1-score" + dmlTab + "num_true_labels\\n"
-      val errorMeasures = "toString(error_measures, decimal=7, sep=" + 
asDMLString(dmlTab) + ")"
-      tabDMLScript.append(print(dmlConcat(asDMLString(header), errorMeasures)))
-    }
-  }
-  
   // Append the DML to display training and validation loss
   private def displayLoss(lossLayer:IsLossLayer, shouldValidate:Boolean):Unit 
= {
     if(solverParam.getDisplay > 0) {
@@ -275,54 +375,9 @@ class Caffe2DML(val sc: SparkContext, val 
solverParam:Caffe.SolverParameter,
           matrix(colSums(l.dBias + "_agg"), nrow(l.bias), ncol(l.bias)))
     })
   }
-  // 
-------------------------------------------------------------------------------------------
-  
-  private def multiply(v1:String, v2:String):String = v1 + "*" + v2
-  private def colSums(m:String):String = "colSums(" + m + ")"
-  
-       // Script generator
-       def getTrainingScript(isSingleNode:Boolean):(Script, String, String)  = 
{
-         val startTrainingTime = System.nanoTime()
-         val DEBUG_TRAINING = if(inputs.containsKey("$debug")) 
inputs.get("$debug").toLowerCase.toBoolean else false
-    reset()
-         
-         // Add source for layers as well as solver as well as visualization 
header
-         source(net, solver, Array[String]("l2_reg"))
-         appendVisualizationHeaders(dmlScript, numTabs)
-         
-         if(Caffe2DML.USE_NESTEROV_UDF) {
-           tabDMLScript(dmlScript, numTabs).append("update_nesterov = 
externalFunction(matrix[double] X, matrix[double] dX, double lr, double mu, 
matrix[double] v, double lambda) return (matrix[double] X, matrix[double] v) 
implemented in 
(classname=\"org.apache.sysml.udf.lib.SGDNesterovUpdate\",exectype=\"mem\");  
\n")
-         }
-         
-         // Read and convert to one-hote encoding
-         assign(tabDMLScript, "X_full", "read(\" \", format=\"csv\")")
-         assign(tabDMLScript, "y_full", "read(\" \", format=\"csv\")")
-         tabDMLScript.append(Caffe2DML.numImages).append(" = nrow(y_full)\n")
-         tabDMLScript.append("weights = ifdef($weights, \" \")\n")
-         tabDMLScript.append("debug = ifdef($debug, FALSE)\n")
-         tabDMLScript.append("# Convert to one-hot encoding (Assumption: 
1-based labels) \n")
-         tabDMLScript.append("y_full = table(seq(1," + Caffe2DML.numImages + 
",1), y_full, " + Caffe2DML.numImages + ", " + Utils.numClasses(net) + ")\n")
-         
-         // Initialize the layers and solvers
-         tabDMLScript.append("# Initialize the layers and solvers\n")
-         net.getLayers.map(layer => 
net.getCaffeLayer(layer).init(tabDMLScript))
-         if(inputs.containsKey("$weights")) {
-                 // Loading existing weights. Note: keeping the initialization 
code in case the layer wants to initialize non-weights and non-bias
-                 tabDMLScript.append("# Load the weights. Note: keeping the 
initialization code in case the layer wants to initialize non-weights and 
non-bias\n")
-                 net.getLayers.filter(l => 
!layersToIgnore.contains(l)).map(net.getCaffeLayer(_)).filter(_.weight != 
null).map(l => tabDMLScript.append(read(l.weight, l.param.getName + 
"_weight.mtx")))
-                 net.getLayers.filter(l => 
!layersToIgnore.contains(l)).map(net.getCaffeLayer(_)).filter(_.bias != 
null).map(l => tabDMLScript.append(read(l.bias, l.param.getName + "_bias.mtx")))
-         }
-         net.getLayers.map(layer => solver.init(tabDMLScript, 
net.getCaffeLayer(layer)))
-         
-         // Split into training and validation set
-         // Initializes Caffe2DML.X, Caffe2DML.y, Caffe2DML.XVal, 
Caffe2DML.yVal and Caffe2DML.numImages
-         val shouldValidate = solverParam.getTestInterval > 0 && 
solverParam.getTestIterCount > 0 && solverParam.getTestIter(0) > 0
-         trainTestSplit(if(shouldValidate) solverParam.getTestIter(0) else 0)
-         
-         // Set iteration-related variables such as max_epochs, 
num_iters_per_epoch, lr, etc.
-         val lossLayers = net.getLayers.filter(layer => 
net.getCaffeLayer(layer).isInstanceOf[IsLossLayer]).map(layer => 
net.getCaffeLayer(layer).asInstanceOf[IsLossLayer])
-         if(lossLayers.length != 1) throw new DMLRuntimeException("Expected 
exactly one loss layer")
-         solverParam.getTrainAlgo.toLowerCase match {
+  // Set iteration-related variables such as max_epochs, num_iters_per_epoch, 
lr, etc.
+  def setIterationVariables():Unit = {
+    solverParam.getTrainAlgo.toLowerCase match {
            case "batch" => 
              assign(tabDMLScript, "max_epochs", 
solverParam.getMaxIter.toString)
            case _ => {
@@ -332,68 +387,8 @@ class Caffe2DML(val sc: SparkContext, val 
solverParam:Caffe.SolverParameter,
          }
          assign(tabDMLScript, "start_iter", "0")
          assign(tabDMLScript, "lr", solverParam.getBaseLr.toString)
-         
-         // 
----------------------------------------------------------------------------
-         // Main logic
-         forBlock("e", "1", "max_epochs") {
-           solverParam.getTrainAlgo.toLowerCase match {
-             case "minibatch" => 
-               forBlock("i", "1", "num_iters_per_epoch") {
-                 getTrainingBatch(tabDMLScript)
-                 tabDMLScript.append("iter = start_iter + i\n")
-                 forward; backward; update
-                 displayLoss(lossLayers(0), shouldValidate)
-            performSnapshot
-               }
-             case "batch" => {
-          tabDMLScript.append("iter = start_iter + i\n")
-          forward; backward; update
-          displayLoss(lossLayers(0), shouldValidate)
-          performSnapshot
-             }
-             case "allreduce" => {
-               forBlock("i", "1", "num_iters_per_epoch") {
-                 getTrainingBatch(tabDMLScript)
-                 assign(tabDMLScript, "X_group_batch", "Xb")
-                 assign(tabDMLScript, "y_group_batch", "yb")
-                 tabDMLScript.append("iter = start_iter + i\n")
-                 initAggGradients
-                 parForBlock("j", "1", "nrow(y_group_batch)") {
-                   assign(tabDMLScript, "Xb", "X_group_batch[j,]")
-                   assign(tabDMLScript, "yb", "y_group_batch[j,]")
-                   forward; backward("_agg")
-              flattenAndStoreAggGradients_j
-                 }
-                 aggregateAggGradients
-            tabDMLScript.append("iter = start_iter + parallel_batches\n")    
-                 update
-            displayLoss(lossLayers(0), shouldValidate)
-            performSnapshot
-               }
-             }
-             case _ => throw new DMLRuntimeException("Unsupported train algo:" 
+ solverParam.getTrainAlgo)
-           }
-           // After every epoch, update the learning rate
-           tabDMLScript.append("# Learning rate\n")
-           lrPolicy.updateLearningRate(tabDMLScript)
-           tabDMLScript.append("start_iter = start_iter + 
num_iters_per_epoch\n")
-         }
-         // 
----------------------------------------------------------------------------
-         
-         // Check if this is necessary
-         if(doVisualize) tabDMLScript.append("print(" + 
asDMLString("Visualization counter:") + " + viz_counter)")
-         
-         val trainingScript = tabDMLScript.toString()
-         // Print script generation time and the DML script on stdout
-         System.out.println("Time taken to generate training script from Caffe 
proto: " + ((System.nanoTime() - startTrainingTime)*1e-9) + " seconds." )
-         if(DEBUG_TRAINING) Utils.prettyPrintDMLScript(trainingScript)
-         
-         // Set input/output variables and execute the script
-         val script = dml(trainingScript).in(inputs)
-         net.getLayers.map(net.getCaffeLayer(_)).filter(_.weight != 
null).map(l => script.out(l.weight))
-         net.getLayers.map(net.getCaffeLayer(_)).filter(_.bias != null).map(l 
=> script.out(l.bias))
-         (script, "X_full", "y_full")
-       }
+  }
+  // 
-------------------------------------------------------------------------------------------
 }
 
 class Caffe2DMLModel(val mloutput: MLResults,  
@@ -431,40 +426,33 @@ class Caffe2DMLModel(val mloutput: MLResults,
          ml.execute(script)
        }
     
+  // 
================================================================================================
+  // The below method parses the provided network and solver file and 
generates DML script.
   def getPredictionScript(mloutput: MLResults, isSingleNode:Boolean): (Script, 
String)  = {
-    reset()
     val startPredictionTime = System.nanoTime()
-         val DEBUG_PREDICTION = if(estimator.inputs.containsKey("$debug")) 
estimator.inputs.get("$debug").toLowerCase.toBoolean else false
+    
+         reset                                  // Reset the state of DML 
generator for training script.
          
-         // Append source statements for each layer
-         source(net, solver, null)
-    tabDMLScript.append("weights = ifdef($weights, \" \")\n")
-         // Initialize the layers and solvers
-         tabDMLScript.append("# Initialize the layers and solvers\n")
-         net.getLayers.map(layer => 
net.getCaffeLayer(layer).init(tabDMLScript))
-         if(mloutput == null && estimator.inputs.containsKey("$weights")) {
-                 // fit was not called
-                 net.getLayers.map(net.getCaffeLayer(_)).filter(_.weight != 
null).map(l => tabDMLScript.append(read(l.weight, l.param.getName + 
"_weight.mtx")))
-                 net.getLayers.map(net.getCaffeLayer(_)).filter(_.bias != 
null).map(l => tabDMLScript.append(read(l.bias, l.param.getName + "_bias.mtx")))
-         }
-         else if(mloutput == null) {
-                 throw new DMLRuntimeException("Cannot call predict/score 
without calling either fit or by providing weights")
+         val DEBUG_PREDICTION = if(estimator.inputs.containsKey("$debug")) 
estimator.inputs.get("$debug").toLowerCase.toBoolean else false
+         assign(tabDMLScript, "debug", if(DEBUG_PREDICTION) "TRUE" else 
"FALSE")
+    
+    appendHeaders(net, solver, false)      // Appends DML corresponding to 
source and externalFunction statements.
+    readInputData(net, false)              // Read X_full and y_full
+    assign(tabDMLScript, "X", "X_full")
+    
+    // Initialize the layers and solvers. Reads weights and bias if 
readWeights is true.
+    val readWeights = {
+           if(mloutput == null && estimator.inputs.containsKey("$weights")) 
true
+           else if(mloutput == null) throw new DMLRuntimeException("Cannot 
call predict/score without calling either fit or by providing weights")
+           else false
          }
-         net.getLayers.map(layer => solver.init(tabDMLScript, 
net.getCaffeLayer(layer)))
-         
-//       if(estimator.inputs.containsKey("$debug") && 
estimator.inputs.get("$debug").equals("TRUE")) {
-//               System.out.println("The output shape of layers:")
-//               net.getLayers.map(layer =>  
System.out.println(net.getCaffeLayer(layer).param.getName + " " + 
net.getCaffeLayer(layer).outputShape))
-//       }
+    initWeights(net, solver, readWeights)
          
          // Donot update mean and variance in batchnorm
-         
net.getLayers.filter(net.getCaffeLayer(_).isInstanceOf[BatchNorm]).map(net.getCaffeLayer(_).asInstanceOf[BatchNorm].update_mean_var
 = false)
-         tabDMLScript.append("X_full = read(\" \", format=\"csv\")\n")
-         assign(tabDMLScript, "X", "X_full")
-         tabDMLScript.append(Caffe2DML.numImages + " = nrow(X_full)\n")
+         updateMeanVarianceForBatchNorm(net, false)
+         
+         val lossLayers = getLossLayers(net)
          
-         val lossLayers = net.getLayers.filter(layer => 
net.getCaffeLayer(layer).isInstanceOf[IsLossLayer]).map(layer => 
net.getCaffeLayer(layer).asInstanceOf[IsLossLayer])
-         customAssert(lossLayers.length == 1, "Expected exactly one loss 
layer, but found " + lossLayers.length + ":" + net.getLayers.filter(layer => 
net.getCaffeLayer(layer).isInstanceOf[IsLossLayer]))
          assign(tabDMLScript, "Prob", matrix("0", Caffe2DML.numImages, 
numClasses))
          estimator.solverParam.getTestAlgo.toLowerCase match {
       case "minibatch" => {
@@ -495,8 +483,8 @@ class Caffe2DMLModel(val mloutput: MLResults,
                System.out.println("Time taken to generate prediction script 
from Caffe proto:" + ((System.nanoTime() - startPredictionTime)*1e-9) + "secs." 
)
                if(DEBUG_PREDICTION) 
Utils.prettyPrintDMLScript(predictionScript)
                
-               // Reset
-               
net.getLayers.filter(net.getCaffeLayer(_).isInstanceOf[BatchNorm]).map(net.getCaffeLayer(_).asInstanceOf[BatchNorm].update_mean_var
 = true)
+               // Reset state of BatchNorm layer
+               updateMeanVarianceForBatchNorm(net, true)
                
          val script = dml(predictionScript).out("Prob").in(estimator.inputs)
          if(mloutput != null) {
@@ -504,9 +492,9 @@ class Caffe2DMLModel(val mloutput: MLResults,
          net.getLayers.map(net.getCaffeLayer(_)).filter(_.weight != 
null).map(l => script.in(l.weight, mloutput.getBinaryBlockMatrix(l.weight)))
          net.getLayers.map(net.getCaffeLayer(_)).filter(_.bias != null).map(l 
=> script.in(l.bias, mloutput.getBinaryBlockMatrix(l.bias)))
          }
-         
          (script, "X_full")
   }
+  // 
================================================================================================
   
   // Prediction
   def transform(X: MatrixBlock): MatrixBlock = {

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/700b0809/src/main/scala/org/apache/sysml/api/dl/CaffeSolver.scala
----------------------------------------------------------------------
diff --git a/src/main/scala/org/apache/sysml/api/dl/CaffeSolver.scala 
b/src/main/scala/org/apache/sysml/api/dl/CaffeSolver.scala
index 0620e44..0e39192 100644
--- a/src/main/scala/org/apache/sysml/api/dl/CaffeSolver.scala
+++ b/src/main/scala/org/apache/sysml/api/dl/CaffeSolver.scala
@@ -88,11 +88,30 @@ class LearningRatePolicy(lr_policy:String="exp", 
base_lr:Double=0.01) {
   }
 }
 
-/**
- * lambda: regularization parameter
- * momentum: Momentum value. Typical values are in the range of [0.5, 0.99], 
usually started at the lower end and annealed towards the higher end.
- */
 class SGD(lambda:Double=5e-04, momentum:Double=0.9) extends CaffeSolver {
+  /*
+   * Performs an SGD update with momentum.
+   *
+   * In SGD with momentum, we assume that the parameters have a velocity
+   * that continues with some momentum, and that is influenced by the
+   * gradient.
+   *
+   * Inputs:
+   *  - X: Parameters to update, of shape (any, any).
+   *  - dX: Gradient wrt `X` of a loss function being optimized, of
+   *      same shape as `X`.
+   *  - lr: Learning rate.
+   *  - mu: Momentum value.
+   *      Typical values are in the range of [0.5, 0.99], usually
+   *      started at the lower end and annealed towards the higher end.
+   *  - v: State maintaining the velocity of the parameters `X`, of same
+   *      shape as `X`.
+   *
+   * Outputs:
+   *  - X: Updated parameters `X`, of same shape as input `X`.
+   *  - v: Updated velocity of the parameters `X`, of same shape as
+   *      input `X`.
+   */
   def update(dmlScript:StringBuilder, layer:CaffeLayer):Unit = {
     l2reg_update(lambda, dmlScript, layer)
     if(momentum == 0) {
@@ -117,13 +136,34 @@ class SGD(lambda:Double=5e-04, momentum:Double=0.9) 
extends CaffeSolver {
   def sourceFileName:String = if(momentum == 0) "sgd" else "sgd_momentum" 
 }
 
-/**
- * lambda: regularization parameter
- * epsilon: Smoothing term to avoid divide by zero errors. Typical values are 
in the range of [1e-8, 1e-4].
- * 
- * See Adaptive Subgradient Methods for Online Learning and Stochastic 
Optimization, Duchi et al.
- */
 class AdaGrad(lambda:Double=5e-04, epsilon:Double=1e-6) extends CaffeSolver {
+  /*
+   * Performs an Adagrad update.
+   *
+   * This is an adaptive learning rate optimizer that maintains the
+   * sum of squared gradients to automatically adjust the effective
+   * learning rate.
+   *
+   * Reference:
+   *  - Adaptive Subgradient Methods for Online Learning and Stochastic
+   *    Optimization, Duchi et al.
+   *      - http://jmlr.org/papers/v12/duchi11a.html
+   *
+   * Inputs:
+   *  - X: Parameters to update, of shape (any, any).
+   *  - dX: Gradient wrt `X` of a loss function being optimized, of
+   *      same shape as `X`.
+   *  - lr: Learning rate.
+   *  - epsilon: Smoothing term to avoid divide by zero errors.
+   *      Typical values are in the range of [1e-8, 1e-4].
+   *  - cache: State that maintains per-parameter sum of squared
+   *      gradients, of same shape as `X`.
+   *
+   * Outputs:
+   *  - X: Updated parameters `X`, of same shape as input `X`.
+   *  - cache: State that maintains per-parameter sum of squared
+   *      gradients, of same shape as `X`.
+   */
   def update(dmlScript:StringBuilder, layer:CaffeLayer):Unit = {
     l2reg_update(lambda, dmlScript, layer)
     if(layer.shouldUpdateWeight) dmlScript.append("\t").append("["+ 
commaSep(layer.weight, layer.weight+"_cache") + "] " + 
@@ -138,11 +178,39 @@ class AdaGrad(lambda:Double=5e-04, epsilon:Double=1e-6) 
extends CaffeSolver {
   def sourceFileName:String = "adagrad"
 }
 
-/**
- * lambda: regularization parameter
- * momentum: Momentum value. Typical values are in the range of [0.5, 0.99], 
usually started at the lower end and annealed towards the higher end.
- */
 class Nesterov(lambda:Double=5e-04, momentum:Double=0.9) extends CaffeSolver {
+  /*
+   * Performs an SGD update with Nesterov momentum.
+   *
+   * As with regular SGD with momentum, in SGD with Nesterov momentum,
+   * we assume that the parameters have a velocity that continues
+   * with some momentum, and that is influenced by the gradient.
+   * In this view specifically, we perform the position update from the
+   * position that the momentum is about to carry the parameters to,
+   * rather than from the previous position.  Additionally, we always
+   * store the parameters in their position after momentum.
+   *
+   * Reference:
+   *  - Advances in optimizing Recurrent Networks, Bengio et al.,
+   *    section 3.5.
+   *    - http://arxiv.org/abs/1212.0901
+   *
+   * Inputs:
+   *  - X: Parameters to update, of shape (any, any).
+   *  - dX: Gradient wrt `X` of a loss function being optimized, of
+   *      same shape as `X`.
+   *  - lr: Learning rate.
+   *  - mu: Momentum value.
+   *      Typical values are in the range of [0.5, 0.99], usually
+   *      started at the lower end and annealed towards the higher end.
+   *  - v: State maintaining the velocity of the parameters `X`, of same
+   *      shape as `X`.
+   *
+   * Outputs:
+   *  - X: Updated parameters X, of same shape as input X.
+   *  - v: Updated velocity of the parameters X, of same shape as
+   *      input v.
+   */
   def update(dmlScript:StringBuilder, layer:CaffeLayer):Unit = {
     val fn = if(Caffe2DML.USE_NESTEROV_UDF) "update_nesterov" else 
"sgd_nesterov::update"
     val lastParameter = if(Caffe2DML.USE_NESTEROV_UDF) (", " + lambda) else ""

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/700b0809/src/main/scala/org/apache/sysml/api/dl/DMLGenerator.scala
----------------------------------------------------------------------
diff --git a/src/main/scala/org/apache/sysml/api/dl/DMLGenerator.scala 
b/src/main/scala/org/apache/sysml/api/dl/DMLGenerator.scala
index 668d996..456b032 100644
--- a/src/main/scala/org/apache/sysml/api/dl/DMLGenerator.scala
+++ b/src/main/scala/org/apache/sysml/api/dl/DMLGenerator.scala
@@ -54,7 +54,7 @@ trait BaseDMLGenerator {
   def isNumber(x: String):Boolean = x forall Character.isDigit
   def transpose(x:String):String = "t(" + x + ")"
   def write(varName:String, fileName:String, format:String):String = "write(" 
+ varName + ", \"" + fileName + "\", format=\"" + format + "\")\n"
-  def read(varName:String, fileName:String, sep:String="/"):String =  varName 
+ " = read(weights + \"" + sep + fileName + "\")\n"
+  def readWeight(varName:String, fileName:String, sep:String="/"):String =  
varName + " = read(weights + \"" + sep + fileName + "\")\n"
   def asDMLString(str:String):String = "\"" + str + "\""
   def assign(dmlScript:StringBuilder, lhsVar:String, rhsVar:String):Unit = {
     dmlScript.append(lhsVar).append(" = ").append(rhsVar).append("\n")
@@ -132,6 +132,11 @@ trait BaseDMLGenerator {
   def nrow(m:String):String = "nrow(" + m + ")"
   def ncol(m:String):String = "ncol(" + m + ")"
   def customAssert(cond:Boolean, msg:String) = if(!cond) throw new 
DMLRuntimeException(msg)
+  def multiply(v1:String, v2:String):String = v1 + "*" + v2
+  def colSums(m:String):String = "colSums(" + m + ")"
+  def ifdef(cmdLineVar:String, defaultVal:String):String = "ifdef(" + 
cmdLineVar + ", " + defaultVal + ")"
+  def ifdef(cmdLineVar:String):String = ifdef(cmdLineVar, "\" \"")
+  def read(filePathVar:String, format:String):String = "read(" + filePathVar + 
", format=\""+ format + "\")"
 }
 
 trait TabbedDMLGenerator extends BaseDMLGenerator {
@@ -229,14 +234,6 @@ trait VisualizeDMLGenerator extends TabbedDMLGenerator {
         + ");\n")
     dmlScript.append("viz_counter = viz_counter + viz_counter1\n")
   }
-  def appendVisualizationHeaders(dmlScript:StringBuilder, numTabs:Int): Unit = 
{
-    if(doVisualize) {
-           tabDMLScript(dmlScript, numTabs).append("visualize = 
externalFunction(String layerName, String varType, String aggFn, Double x, 
Double y, String logDir) return (Double B) " +
-               "implemented in 
(classname=\"org.apache.sysml.udf.lib.Caffe2DMLVisualizeWrapper\",exectype=\"mem\");
 \n")
-           tabDMLScript(dmlScript, numTabs).append("viz_counter = 0\n")
-           System.out.println("Please use the following command for 
visualizing: tensorboard --logdir=" + tensorboardLogDir)
-         }
-  }
   def visualizeLayer(net:CaffeNetwork, layerName:String, varType:String, 
aggFn:String): Unit = {
          // 'weight', 'bias', 'dweight', 'dbias', 'output' or 'doutput'
          // 'sum', 'mean', 'var' or 'sd'
@@ -316,4 +313,101 @@ trait DMLGenerator extends SourceDMLGenerator with 
NextBatchGenerator with Visua
          tabDMLScript.append("}\n")
        }
        
+       def printClassificationReport():Unit = {
+    ifBlock("debug"){
+      assign(tabDMLScript, "num_rows_error_measures", min("10", ncol("yb")))
+      assign(tabDMLScript, "error_measures", matrix("0", 
"num_rows_error_measures", "5"))
+      forBlock("class_i", "1", "num_rows_error_measures") {
+        assign(tabDMLScript, "tp", "sum( (true_yb == predicted_yb) * (true_yb 
== class_i) )")
+        assign(tabDMLScript, "tp_plus_fp", "sum( (predicted_yb == class_i) )")
+        assign(tabDMLScript, "tp_plus_fn", "sum( (true_yb == class_i) )")
+        assign(tabDMLScript, "precision", "tp / tp_plus_fp")
+        assign(tabDMLScript, "recall", "tp / tp_plus_fn")
+        assign(tabDMLScript, "f1Score", "2*precision*recall / 
(precision+recall)")
+        assign(tabDMLScript, "error_measures[class_i,1]", "class_i")
+        assign(tabDMLScript, "error_measures[class_i,2]", "precision")
+        assign(tabDMLScript, "error_measures[class_i,3]", "recall")
+        assign(tabDMLScript, "error_measures[class_i,4]", "f1Score")
+        assign(tabDMLScript, "error_measures[class_i,5]", "tp_plus_fn")
+      }
+      val dmlTab = "\\t"
+      val header = "class    " + dmlTab + "precision" + dmlTab + "recall  " + 
dmlTab + "f1-score" + dmlTab + "num_true_labels\\n"
+      val errorMeasures = "toString(error_measures, decimal=7, sep=" + 
asDMLString(dmlTab) + ")"
+      tabDMLScript.append(print(dmlConcat(asDMLString(header), errorMeasures)))
+    }
+  }
+       
+       // Appends DML corresponding to source and externalFunction statements. 
+  def appendHeaders(net:CaffeNetwork, solver:CaffeSolver, 
isTraining:Boolean):Unit = {
+    // Append source statements for layers as well as solver
+         source(net, solver, if(isTraining) Array[String]("l2_reg") else null)
+         
+         if(isTraining) {
+         // Append external built-in function headers:
+         // 1. visualize external built-in function header
+      if(doVisualize) {
+           tabDMLScript.append("visualize = externalFunction(String layerName, 
String varType, String aggFn, Double x, Double y, String logDir) return (Double 
B) " +
+               "implemented in 
(classname=\"org.apache.sysml.udf.lib.Caffe2DMLVisualizeWrapper\",exectype=\"mem\");
 \n")
+           tabDMLScript.append("viz_counter = 0\n")
+           System.out.println("Please use the following command for 
visualizing: tensorboard --logdir=" + tensorboardLogDir)
+         }
+         // 2. update_nesterov external built-in function header
+         if(Caffe2DML.USE_NESTEROV_UDF) {
+           tabDMLScript.append("update_nesterov = 
externalFunction(matrix[double] X, matrix[double] dX, double lr, double mu, 
matrix[double] v, double lambda) return (matrix[double] X, matrix[double] v) 
implemented in 
(classname=\"org.apache.sysml.udf.lib.SGDNesterovUpdate\",exectype=\"mem\");  
\n")
+         }
+         }
+  }
+  
+  def readMatrix(varName:String, cmdLineVar:String):Unit = {
+    val pathVar = varName + "_path"
+    assign(tabDMLScript, pathVar, ifdef(cmdLineVar))
+    // Uncomment the following lines if we want to the user to pass the format
+    // val formatVar = varName + "_fmt"
+    // assign(tabDMLScript, formatVar, ifdef(cmdLineVar + "_fmt", "\"csv\""))
+    // assign(tabDMLScript, varName, "read(" + pathVar + ", format=" + 
formatVar + ")")
+    assign(tabDMLScript, varName, "read(" + pathVar + ")")
+  }
+  
+  def readInputData(net:CaffeNetwork, isTraining:Boolean):Unit = {
+    // Read and convert to one-hot encoding
+    readMatrix("X_full", "$X")
+         if(isTraining) {
+           readMatrix("y_full", "$y")
+         tabDMLScript.append(Caffe2DML.numImages).append(" = nrow(y_full)\n")
+         tabDMLScript.append("# Convert to one-hot encoding (Assumption: 
1-based labels) \n")
+           tabDMLScript.append("y_full = table(seq(1," + Caffe2DML.numImages + 
",1), y_full, " + Caffe2DML.numImages + ", " + Utils.numClasses(net) + ")\n")
+         }
+         else {
+           tabDMLScript.append(Caffe2DML.numImages + " = nrow(X_full)\n")
+         }
+  }
+  
+  def initWeights(net:CaffeNetwork, solver:CaffeSolver, readWeights:Boolean): 
Unit = {
+    initWeights(net, solver, readWeights, new HashSet[String]())
+  }
+  
+  def initWeights(net:CaffeNetwork, solver:CaffeSolver, readWeights:Boolean, 
layersToIgnore:HashSet[String]): Unit = {
+    tabDMLScript.append("weights = ifdef($weights, \" \")\n")
+         // Initialize the layers and solvers
+         tabDMLScript.append("# Initialize the layers and solvers\n")
+         net.getLayers.map(layer => 
net.getCaffeLayer(layer).init(tabDMLScript))
+         if(readWeights) {
+                 // Loading existing weights. Note: keeping the initialization 
code in case the layer wants to initialize non-weights and non-bias
+                 tabDMLScript.append("# Load the weights. Note: keeping the 
initialization code in case the layer wants to initialize non-weights and 
non-bias\n")
+                 net.getLayers.filter(l => 
!layersToIgnore.contains(l)).map(net.getCaffeLayer(_)).filter(_.weight != 
null).map(l => tabDMLScript.append(readWeight(l.weight, l.param.getName + 
"_weight.mtx")))
+                 net.getLayers.filter(l => 
!layersToIgnore.contains(l)).map(net.getCaffeLayer(_)).filter(_.bias != 
null).map(l => tabDMLScript.append(readWeight(l.bias, l.param.getName + 
"_bias.mtx")))
+         }
+         net.getLayers.map(layer => solver.init(tabDMLScript, 
net.getCaffeLayer(layer)))
+  }
+  
+  def getLossLayers(net:CaffeNetwork):List[IsLossLayer] = {
+    val lossLayers = net.getLayers.filter(layer => 
net.getCaffeLayer(layer).isInstanceOf[IsLossLayer]).map(layer => 
net.getCaffeLayer(layer).asInstanceOf[IsLossLayer])
+         if(lossLayers.length != 1) 
+           throw new DMLRuntimeException("Expected exactly one loss layer, but 
found " + lossLayers.length + ":" + net.getLayers.filter(layer => 
net.getCaffeLayer(layer).isInstanceOf[IsLossLayer]))
+         lossLayers
+  }
+  
+  def updateMeanVarianceForBatchNorm(net:CaffeNetwork, value:Boolean):Unit = {
+    
net.getLayers.filter(net.getCaffeLayer(_).isInstanceOf[BatchNorm]).map(net.getCaffeLayer(_).asInstanceOf[BatchNorm].update_mean_var
 = value)
+  }
 }
\ No newline at end of file


Reply via email to