Github user njayaram2 commented on a diff in the pull request: https://github.com/apache/madlib/pull/243#discussion_r175947887 --- Diff: src/modules/convex/algo/igd.hpp --- @@ -90,20 +90,27 @@ IGD<State, ConstState, Task>::transition(state_type &state, for (int curr_epoch=0; curr_epoch < n_epochs; curr_epoch++) { double loss = 0.0; - for (int curr_batch=0, curr_batch_row_index=0; curr_batch < n_batches; - curr_batch++, curr_batch_row_index += batch_size) { - Matrix X_batch; - ColumnVector y_batch; - if (curr_batch == n_batches-1) { - // last batch - X_batch = tuple.indVar.bottomRows(n_rows-curr_batch_row_index); - y_batch = tuple.depVar.tail(n_rows-curr_batch_row_index); - } else { - X_batch = tuple.indVar.block(curr_batch_row_index, 0, batch_size, n_ind_cols); - y_batch = tuple.depVar.segment(curr_batch_row_index, batch_size); - } - loss += Task::getLossAndUpdateModel( - state.task.model, X_batch, y_batch, state.task.stepsize); + int random_curr_batch[n_batches]; + for(int i=0; i<n_batches; i++) { + random_curr_batch[i] = i; + } + int curr_batch_row_index = 0; + std::random_shuffle(&random_curr_batch[0], &random_curr_batch[n_batches]); + for (int i=0; i < n_batches; i++) { + int curr_batch = random_curr_batch[i]; + int curr_batch_row_index = curr_batch * batch_size; + Matrix X_batch; + Matrix Y_batch; + if (curr_batch == n_batches-1) { + // last batch + X_batch = tuple.indVar.bottomRows(n_rows-curr_batch_row_index); + Y_batch = tuple.depVar.bottomRows(n_rows-curr_batch_row_index); + } else { + X_batch = tuple.indVar.block(curr_batch_row_index, 0, batch_size, n_ind_cols); + Y_batch = tuple.depVar.block(curr_batch_row_index, 0, batch_size, n_ind_cols); --- End diff -- Yes, that seems right.
---