Github user kaknikhil commented on a diff in the pull request:
https://github.com/apache/madlib/pull/243#discussion_r175627929
--- Diff: src/modules/convex/mlp_igd.cpp ---
@@ -130,6 +145,90 @@ mlp_igd_transition::run(AnyType &args) {
return state;
}
+/**
+ * @brief Perform the multilayer perceptron minibatch transition step
+ *
+ * Called for each tuple.
+ */
+AnyType
+mlp_minibatch_transition::run(AnyType &args) {
+ // For the first tuple: args[0] is nothing more than a marker that
+ // indicates that we should do some initial operations.
+ // For other tuples: args[0] holds the computation state until last
tuple
+ MLPMiniBatchState<MutableArrayHandle<double> > state = args[0];
+
+ // initilize the state if first tuple
+ if (state.algo.numRows == 0) {
+ if (!args[3].isNull()) {
+ MLPMiniBatchState<ArrayHandle<double> > previousState =
args[3];
+ state.allocate(*this, previousState.task.numberOfStages,
+ previousState.task.numbersOfUnits);
+ state = previousState;
+ } else {
+ // configuration parameters
+ ArrayHandle<double> numbersOfUnits =
args[4].getAs<ArrayHandle<double> >();
+ int numberOfStages = numbersOfUnits.size() - 1;
+
+ double stepsize = args[5].getAs<double>();
+
+ state.allocate(*this, numberOfStages,
+ reinterpret_cast<const double
*>(numbersOfUnits.ptr()));
+ state.task.stepsize = stepsize;
+ const int activation = args[6].getAs<int>();
+ const int is_classification = args[7].getAs<int>();
+ // args[8] is for weighting the input row, which is populated
later.
+ const bool warm_start = args[9].getAs<bool>();
+ const double lambda = args[11].getAs<double>();
+ state.algo.batchSize = args[12].getAs<int>();
+ state.algo.nEpochs = args[13].getAs<int>();
+ state.task.lambda = lambda;
+ MLPTask::lambda = lambda;
+
+ /* FIXME: The state is set back to zero for second row onwards
if
+ initialized as in IGD. The following avoids that, but there
is
+ some failure with debug build that must be fixed.
+ */
+ state.task.model.is_classification =
+ static_cast<double>(is_classification);
+ state.task.model.activation = static_cast<double>(activation);
+ MappedColumnVector initial_coeff =
args[10].getAs<MappedColumnVector>();
+ // copy initial_coeff into the model
+ Index fan_in, fan_out, layer_start = 0;
+ for (size_t k = 0; k < numberOfStages; ++k){
+ fan_in = numbersOfUnits[k];
+ fan_out = numbersOfUnits[k+1];
+ state.task.model.u[k] <<
initial_coeff.segment(layer_start, (fan_in+1)*fan_out);
+ layer_start = (fan_in + 1) * fan_out;
+ }
+ }
+ // resetting in either case
+ state.reset();
+ }
+
+ // meta data
+ const uint16_t N = state.task.numberOfStages;
--- End diff --
is there a reason we chose N and n as variable names ? Can we use more
descriptive names ?
---