Github user kaknikhil commented on a diff in the pull request: https://github.com/apache/madlib/pull/313#discussion_r211725456 --- Diff: src/modules/convex/task/mlp.hpp --- @@ -192,18 +189,23 @@ MLP<Model, Tuple>::getLossAndUpdateModel( // 1. normalize to per row update // 2. discount by stepsize // 3. add regularization - // 4. make negative + // 4. make negative for descent for (Index k=0; k < model.num_layers; k++){ Matrix regularization = MLP<Model, Tuple>::lambda * model.u[k]; regularization.row(0).setZero(); // Do not update bias - total_gradient_per_layer[k] = -stepsize * (total_gradient_per_layer[k] / static_cast<double>(num_rows_in_batch) + - regularization); - model.updateVelocity(total_gradient_per_layer[k], k); - model.updatePosition(total_gradient_per_layer[k], k); + total_gradient_per_layer[k] = -stepsize * + (total_gradient_per_layer[k] / static_cast<double>(num_rows_in_batch) + + regularization); + + // total_gradient_per_layer is now the update vector + model.velocity[k] = model.momentum * model.velocity[k] + total_gradient_per_layer[k]; --- End diff -- can we also add a comment for why we are duplicating the code instead of creating a function for these updates ?
---