piiswrong closed pull request #11953: do not regularize beta and bias
URL: https://github.com/apache/incubator-mxnet/pull/11953
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/python/mxnet/gluon/nn/basic_layers.py 
b/python/mxnet/gluon/nn/basic_layers.py
index ad69d4e9dd9..73ebbfb54f3 100644
--- a/python/mxnet/gluon/nn/basic_layers.py
+++ b/python/mxnet/gluon/nn/basic_layers.py
@@ -208,7 +208,7 @@ def __init__(self, units, activation=None, use_bias=True, 
flatten=True,
             if use_bias:
                 self.bias = self.params.get('bias', shape=(units,),
                                             init=bias_initializer, dtype=dtype,
-                                            allow_deferred_init=True)
+                                            wd_mult=0.0, 
allow_deferred_init=True)
             else:
                 self.bias = None
             if activation is not None:
@@ -334,7 +334,7 @@ def __init__(self, axis=1, momentum=0.9, epsilon=1e-5, 
center=True, scale=True,
                                      differentiable=scale)
         self.beta = self.params.get('beta', grad_req='write' if center else 
'null',
                                     shape=(in_channels,), 
init=beta_initializer,
-                                    allow_deferred_init=True,
+                                    wd_mult=0.0, allow_deferred_init=True,
                                     differentiable=center)
         self.running_mean = self.params.get('running_mean', grad_req='null',
                                             shape=(in_channels,),
@@ -509,7 +509,7 @@ def __init__(self, axis=1, epsilon=1e-5, center=True, 
scale=False,
                                      allow_deferred_init=True)
         self.beta = self.params.get('beta', grad_req='write' if center else 
'null',
                                     shape=(in_channels,), 
init=beta_initializer,
-                                    allow_deferred_init=True)
+                                    wd_mult=0.0, allow_deferred_init=True)
 
     def hybrid_forward(self, F, x, gamma, beta):
         if self._axis == 1:
@@ -597,7 +597,7 @@ def __init__(self, axis=-1, epsilon=1e-5, center=True, 
scale=True,
                                      allow_deferred_init=True)
         self.beta = self.params.get('beta', grad_req='write' if center else 
'null',
                                     shape=(in_channels,), 
init=beta_initializer,
-                                    allow_deferred_init=True)
+                                    wd_mult=0.0, allow_deferred_init=True)
 
     def hybrid_forward(self, F, data, gamma, beta):
         norm_data = F.LayerNorm(data, gamma=gamma, beta=beta, axis=self._axis, 
eps=self._epsilon)
diff --git a/python/mxnet/gluon/nn/conv_layers.py 
b/python/mxnet/gluon/nn/conv_layers.py
index e1f9b9fd05a..b7228fbe34c 100644
--- a/python/mxnet/gluon/nn/conv_layers.py
+++ b/python/mxnet/gluon/nn/conv_layers.py
@@ -118,7 +118,7 @@ def __init__(self, channels, kernel_size, strides, padding, 
dilation,
                                           allow_deferred_init=True)
             if use_bias:
                 self.bias = self.params.get('bias', shape=wshapes[2],
-                                            init=bias_initializer,
+                                            init=bias_initializer, wd_mult=0.0,
                                             allow_deferred_init=True)
             else:
                 self.bias = None
diff --git a/python/mxnet/gluon/rnn/rnn_cell.py 
b/python/mxnet/gluon/rnn/rnn_cell.py
index 21cc8043154..02d31577813 100644
--- a/python/mxnet/gluon/rnn/rnn_cell.py
+++ b/python/mxnet/gluon/rnn/rnn_cell.py
@@ -369,10 +369,10 @@ def __init__(self, hidden_size, activation='tanh',
                                           allow_deferred_init=True)
         self.i2h_bias = self.params.get('i2h_bias', shape=(hidden_size,),
                                         init=i2h_bias_initializer,
-                                        allow_deferred_init=True)
+                                        wd_mult=0.0, allow_deferred_init=True)
         self.h2h_bias = self.params.get('h2h_bias', shape=(hidden_size,),
                                         init=h2h_bias_initializer,
-                                        allow_deferred_init=True)
+                                        wd_mult=0.0, allow_deferred_init=True)
 
     def state_info(self, batch_size=0):
         return [{'shape': (batch_size, self._hidden_size), '__layout__': 'NC'}]
@@ -482,10 +482,10 @@ def __init__(self, hidden_size,
                                           allow_deferred_init=True)
         self.i2h_bias = self.params.get('i2h_bias', shape=(4*hidden_size,),
                                         init=i2h_bias_initializer,
-                                        allow_deferred_init=True)
+                                        wd_mult=0.0, allow_deferred_init=True)
         self.h2h_bias = self.params.get('h2h_bias', shape=(4*hidden_size,),
                                         init=h2h_bias_initializer,
-                                        allow_deferred_init=True)
+                                        wd_mult=0.0, allow_deferred_init=True)
         self._activation = activation
         self._recurrent_activation = recurrent_activation
 
@@ -597,10 +597,10 @@ def __init__(self, hidden_size,
                                           allow_deferred_init=True)
         self.i2h_bias = self.params.get('i2h_bias', shape=(3*hidden_size,),
                                         init=i2h_bias_initializer,
-                                        allow_deferred_init=True)
+                                        wd_mult=0.0, allow_deferred_init=True)
         self.h2h_bias = self.params.get('h2h_bias', shape=(3*hidden_size,),
                                         init=h2h_bias_initializer,
-                                        allow_deferred_init=True)
+                                        wd_mult=0.0, allow_deferred_init=True)
 
     def state_info(self, batch_size=0):
         return [{'shape': (batch_size, self._hidden_size), '__layout__': 'NC'}]
diff --git a/python/mxnet/gluon/rnn/rnn_layer.py 
b/python/mxnet/gluon/rnn/rnn_layer.py
index 418c497ce83..4c24c4de919 100644
--- a/python/mxnet/gluon/rnn/rnn_layer.py
+++ b/python/mxnet/gluon/rnn/rnn_layer.py
@@ -71,11 +71,11 @@ def __init__(self, hidden_size, num_layers, layout,
                 self.i2h_bias.append(
                     self.params.get('%s%d_i2h_bias'%(j, i), shape=(ng*nh,),
                                     init=i2h_bias_initializer,
-                                    allow_deferred_init=True))
+                                    wd_mult=0.0, allow_deferred_init=True))
                 self.h2h_bias.append(
                     self.params.get('%s%d_h2h_bias'%(j, i), shape=(ng*nh,),
                                     init=h2h_bias_initializer,
-                                    allow_deferred_init=True))
+                                    wd_mult=0.0, allow_deferred_init=True))
             ni = nh * self._dir
 
         self._unfused = self._unfuse()


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to