piiswrong closed pull request #11953: do not regularize beta and bias URL: https://github.com/apache/incubator-mxnet/pull/11953
This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/python/mxnet/gluon/nn/basic_layers.py b/python/mxnet/gluon/nn/basic_layers.py index ad69d4e9dd9..73ebbfb54f3 100644 --- a/python/mxnet/gluon/nn/basic_layers.py +++ b/python/mxnet/gluon/nn/basic_layers.py @@ -208,7 +208,7 @@ def __init__(self, units, activation=None, use_bias=True, flatten=True, if use_bias: self.bias = self.params.get('bias', shape=(units,), init=bias_initializer, dtype=dtype, - allow_deferred_init=True) + wd_mult=0.0, allow_deferred_init=True) else: self.bias = None if activation is not None: @@ -334,7 +334,7 @@ def __init__(self, axis=1, momentum=0.9, epsilon=1e-5, center=True, scale=True, differentiable=scale) self.beta = self.params.get('beta', grad_req='write' if center else 'null', shape=(in_channels,), init=beta_initializer, - allow_deferred_init=True, + wd_mult=0.0, allow_deferred_init=True, differentiable=center) self.running_mean = self.params.get('running_mean', grad_req='null', shape=(in_channels,), @@ -509,7 +509,7 @@ def __init__(self, axis=1, epsilon=1e-5, center=True, scale=False, allow_deferred_init=True) self.beta = self.params.get('beta', grad_req='write' if center else 'null', shape=(in_channels,), init=beta_initializer, - allow_deferred_init=True) + wd_mult=0.0, allow_deferred_init=True) def hybrid_forward(self, F, x, gamma, beta): if self._axis == 1: @@ -597,7 +597,7 @@ def __init__(self, axis=-1, epsilon=1e-5, center=True, scale=True, allow_deferred_init=True) self.beta = self.params.get('beta', grad_req='write' if center else 'null', shape=(in_channels,), init=beta_initializer, - allow_deferred_init=True) + wd_mult=0.0, allow_deferred_init=True) def hybrid_forward(self, F, data, gamma, beta): norm_data = F.LayerNorm(data, gamma=gamma, beta=beta, axis=self._axis, eps=self._epsilon) diff --git a/python/mxnet/gluon/nn/conv_layers.py b/python/mxnet/gluon/nn/conv_layers.py index e1f9b9fd05a..b7228fbe34c 100644 --- a/python/mxnet/gluon/nn/conv_layers.py +++ b/python/mxnet/gluon/nn/conv_layers.py @@ -118,7 +118,7 @@ def __init__(self, channels, kernel_size, strides, padding, dilation, allow_deferred_init=True) if use_bias: self.bias = self.params.get('bias', shape=wshapes[2], - init=bias_initializer, + init=bias_initializer, wd_mult=0.0, allow_deferred_init=True) else: self.bias = None diff --git a/python/mxnet/gluon/rnn/rnn_cell.py b/python/mxnet/gluon/rnn/rnn_cell.py index 21cc8043154..02d31577813 100644 --- a/python/mxnet/gluon/rnn/rnn_cell.py +++ b/python/mxnet/gluon/rnn/rnn_cell.py @@ -369,10 +369,10 @@ def __init__(self, hidden_size, activation='tanh', allow_deferred_init=True) self.i2h_bias = self.params.get('i2h_bias', shape=(hidden_size,), init=i2h_bias_initializer, - allow_deferred_init=True) + wd_mult=0.0, allow_deferred_init=True) self.h2h_bias = self.params.get('h2h_bias', shape=(hidden_size,), init=h2h_bias_initializer, - allow_deferred_init=True) + wd_mult=0.0, allow_deferred_init=True) def state_info(self, batch_size=0): return [{'shape': (batch_size, self._hidden_size), '__layout__': 'NC'}] @@ -482,10 +482,10 @@ def __init__(self, hidden_size, allow_deferred_init=True) self.i2h_bias = self.params.get('i2h_bias', shape=(4*hidden_size,), init=i2h_bias_initializer, - allow_deferred_init=True) + wd_mult=0.0, allow_deferred_init=True) self.h2h_bias = self.params.get('h2h_bias', shape=(4*hidden_size,), init=h2h_bias_initializer, - allow_deferred_init=True) + wd_mult=0.0, allow_deferred_init=True) self._activation = activation self._recurrent_activation = recurrent_activation @@ -597,10 +597,10 @@ def __init__(self, hidden_size, allow_deferred_init=True) self.i2h_bias = self.params.get('i2h_bias', shape=(3*hidden_size,), init=i2h_bias_initializer, - allow_deferred_init=True) + wd_mult=0.0, allow_deferred_init=True) self.h2h_bias = self.params.get('h2h_bias', shape=(3*hidden_size,), init=h2h_bias_initializer, - allow_deferred_init=True) + wd_mult=0.0, allow_deferred_init=True) def state_info(self, batch_size=0): return [{'shape': (batch_size, self._hidden_size), '__layout__': 'NC'}] diff --git a/python/mxnet/gluon/rnn/rnn_layer.py b/python/mxnet/gluon/rnn/rnn_layer.py index 418c497ce83..4c24c4de919 100644 --- a/python/mxnet/gluon/rnn/rnn_layer.py +++ b/python/mxnet/gluon/rnn/rnn_layer.py @@ -71,11 +71,11 @@ def __init__(self, hidden_size, num_layers, layout, self.i2h_bias.append( self.params.get('%s%d_i2h_bias'%(j, i), shape=(ng*nh,), init=i2h_bias_initializer, - allow_deferred_init=True)) + wd_mult=0.0, allow_deferred_init=True)) self.h2h_bias.append( self.params.get('%s%d_h2h_bias'%(j, i), shape=(ng*nh,), init=h2h_bias_initializer, - allow_deferred_init=True)) + wd_mult=0.0, allow_deferred_init=True)) ni = nh * self._dir self._unfused = self._unfuse() ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services