Github user kaknikhil commented on a diff in the pull request: https://github.com/apache/madlib/pull/243#discussion_r175627929 --- Diff: src/modules/convex/mlp_igd.cpp --- @@ -130,6 +145,90 @@ mlp_igd_transition::run(AnyType &args) { return state; } +/** + * @brief Perform the multilayer perceptron minibatch transition step + * + * Called for each tuple. + */ +AnyType +mlp_minibatch_transition::run(AnyType &args) { + // For the first tuple: args[0] is nothing more than a marker that + // indicates that we should do some initial operations. + // For other tuples: args[0] holds the computation state until last tuple + MLPMiniBatchState<MutableArrayHandle<double> > state = args[0]; + + // initilize the state if first tuple + if (state.algo.numRows == 0) { + if (!args[3].isNull()) { + MLPMiniBatchState<ArrayHandle<double> > previousState = args[3]; + state.allocate(*this, previousState.task.numberOfStages, + previousState.task.numbersOfUnits); + state = previousState; + } else { + // configuration parameters + ArrayHandle<double> numbersOfUnits = args[4].getAs<ArrayHandle<double> >(); + int numberOfStages = numbersOfUnits.size() - 1; + + double stepsize = args[5].getAs<double>(); + + state.allocate(*this, numberOfStages, + reinterpret_cast<const double *>(numbersOfUnits.ptr())); + state.task.stepsize = stepsize; + const int activation = args[6].getAs<int>(); + const int is_classification = args[7].getAs<int>(); + // args[8] is for weighting the input row, which is populated later. + const bool warm_start = args[9].getAs<bool>(); + const double lambda = args[11].getAs<double>(); + state.algo.batchSize = args[12].getAs<int>(); + state.algo.nEpochs = args[13].getAs<int>(); + state.task.lambda = lambda; + MLPTask::lambda = lambda; + + /* FIXME: The state is set back to zero for second row onwards if + initialized as in IGD. The following avoids that, but there is + some failure with debug build that must be fixed. + */ + state.task.model.is_classification = + static_cast<double>(is_classification); + state.task.model.activation = static_cast<double>(activation); + MappedColumnVector initial_coeff = args[10].getAs<MappedColumnVector>(); + // copy initial_coeff into the model + Index fan_in, fan_out, layer_start = 0; + for (size_t k = 0; k < numberOfStages; ++k){ + fan_in = numbersOfUnits[k]; + fan_out = numbersOfUnits[k+1]; + state.task.model.u[k] << initial_coeff.segment(layer_start, (fan_in+1)*fan_out); + layer_start = (fan_in + 1) * fan_out; + } + } + // resetting in either case + state.reset(); + } + + // meta data + const uint16_t N = state.task.numberOfStages; --- End diff -- is there a reason we chose N and n as variable names ? Can we use more descriptive names ?
---