piiswrong commented on a change in pull request #8294: NCCL integration URL: https://github.com/apache/incubator-mxnet/pull/8294#discussion_r149841891
########## File path: python/mxnet/model.py ########## @@ -104,15 +105,18 @@ def _initialize_kvstore(kvstore, param_arrays, arg_params, param_names, update_o def _update_params_on_kvstore(param_arrays, grad_arrays, kvstore, param_names): """Perform update of param_arrays from grad_arrays on kvstore.""" - for index, pair in enumerate(zip(param_arrays, grad_arrays)): - arg_list, grad_list = pair - if grad_list[0] is None: - continue - name = param_names[index] + size = len(grad_arrays) + start = 0 + # Use aggregation by default only with NCCL + default_batch = 16 if 'nccl' in kvstore.type else 1 + batch = int(os.getenv('MXNET_UPDATE_AGGREGATION_SIZE', default_batch)) + while start < size: + end = start + batch if start + batch < size else size # push gradient, priority is negative index - kvstore.push(name, grad_list, priority=-index) + kvstore.push(param_names[start:end], grad_arrays[start:end], priority=-start) Review comment: what's the purpose of this? Why should it be done in frontend? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services