anirudh2290 closed pull request #11827: fix for bug #10868: _backward_softsign 
activation is incorrect
URL: https://github.com/apache/incubator-mxnet/pull/11827
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/src/operator/nn/activation-inl.h b/src/operator/nn/activation-inl.h
index e6f8915ab2f..2705177f951 100644
--- a/src/operator/nn/activation-inl.h
+++ b/src/operator/nn/activation-inl.h
@@ -174,7 +174,7 @@ void ActivationGradComputeImpl(const nnvm::NodeAttrs& 
attrs, const OpContext &ct
       break;
     case activation::kSoftSign:
       ActivationBackward<xpu, mshadow_op::softsign, mshadow_op::softsign_grad>(
-          ctx, inputs[0], inputs[1], req[0], outputs[0]);
+          ctx, inputs[0], inputs[2], req[0], outputs[0]);
       break;
     default:
       LOG(FATAL) << "unknown activation type";
@@ -198,12 +198,13 @@ void ActivationGradCompute(const nnvm::NodeAttrs& attrs,
                            const std::vector<TBlob>& inputs,
                            const std::vector<OpReqType>& req,
                            const std::vector<TBlob>& outputs) {
-#if (MXNET_USE_CUDNN == 1 || MXNET_USE_MKLDNN == 1)
   const ActivationParam& param = nnvm::get<ActivationParam>(attrs.parsed);
+#if (MXNET_USE_CUDNN == 1 || MXNET_USE_MKLDNN == 1)
   bool relu = param.act_type == activation::kReLU;
   CHECK_EQ(inputs.size(), relu ? 2U : 3U);
 #else
-  CHECK_EQ(inputs.size(), 2U);
+  bool softsign = param.act_type == activation::kSoftSign;
+  CHECK_EQ(inputs.size(), softsign ? 3U : 2U);
 #endif
   CHECK_EQ(outputs.size(), 1U);
   CHECK_EQ(req.size(), 1U);
diff --git a/src/operator/nn/activation.cc b/src/operator/nn/activation.cc
index d723bbe62d7..3404b5b700a 100644
--- a/src/operator/nn/activation.cc
+++ b/src/operator/nn/activation.cc
@@ -44,11 +44,19 @@ struct ActivationGrad {
                                           const std::vector<nnvm::NodeEntry>& 
ograds) const {
     std::vector<nnvm::NodeEntry> heads(ograds.begin(), ograds.end());
     heads.emplace_back(nnvm::NodeEntry{n, activation::kOut, 0});
-#if (MXNET_USE_CUDNN == 1 || MXNET_USE_MKLDNN == 1)
+
     const NodeAttrs& attrs = n->attrs;
+    int act_type = dmlc::get<ActivationParam>(attrs.parsed).act_type;
+    if (act_type == activation::kSoftSign) {
+      // for softsign need the inputs to compute the activation.
+      heads.push_back(n->inputs[activation::kData]);
+    }
+
+#if (MXNET_USE_CUDNN == 1 || MXNET_USE_MKLDNN == 1)
     // for ReLU, no need to pass input data. This enables inplace optimization 
during the
     // forward pass.
-    if (dmlc::get<ActivationParam>(attrs.parsed).act_type != 
activation::kReLU) {
+    if (act_type != activation::kReLU &&
+        act_type != activation::kSoftSign) {
       heads.push_back(n->inputs[activation::kData]);
     }
 #endif
@@ -118,8 +126,8 @@ inline static bool BackwardActStorageType(const 
nnvm::NodeAttrs& attrs,
                                           std::vector<int> *in_attrs,
                                           std::vector<int> *out_attrs) {
   bool ret = false;
-#if (MXNET_USE_CUDNN == 1 || MXNET_USE_MKLDNN == 1)
   const ActivationParam& param = nnvm::get<ActivationParam>(attrs.parsed);
+#if (MXNET_USE_CUDNN == 1 || MXNET_USE_MKLDNN == 1)
   if (param.act_type != activation::kReLU) {
     CHECK_EQ(in_attrs->size(), 3U);
     ret = ElemwiseStorageType<3, 1, false, false, false>(attrs, dev_mask,
@@ -133,10 +141,17 @@ inline static bool BackwardActStorageType(const 
nnvm::NodeAttrs& attrs,
                                                          in_attrs, out_attrs);
   }
 #else
-  CHECK_EQ(in_attrs->size(), 2U);
-  ret = ElemwiseStorageType<2, 1, false, false, false>(attrs, dev_mask,
-                                                       dispatch_mode,
-                                                       in_attrs, out_attrs);
+  if (param.act_type == activation::kSoftSign) {
+    CHECK_EQ(in_attrs->size(), 3U);
+    ret = ElemwiseStorageType<3, 1, false, false, false>(attrs, dev_mask,
+                                                         dispatch_mode,
+                                                         in_attrs, out_attrs);
+  } else {
+    CHECK_EQ(in_attrs->size(), 2U);
+    ret = ElemwiseStorageType<2, 1, false, false, false>(attrs, dev_mask,
+                                                         dispatch_mode,
+                                                         in_attrs, out_attrs);
+  }
 #endif
   CHECK_EQ(out_attrs->size(), 1U);
 #if MXNET_USE_MKLDNN == 1
diff --git a/src/operator/nn/activation.cu b/src/operator/nn/activation.cu
index 68b4053efdd..8892cc34f71 100644
--- a/src/operator/nn/activation.cu
+++ b/src/operator/nn/activation.cu
@@ -87,7 +87,7 @@ void ActivationGradCompute<gpu>(const nnvm::NodeAttrs& attrs,
       ctx, inputs[0], inputs[1], req[0], outputs[0]);
   } else if (param.act_type == activation::kSoftSign) {
     ActivationBackward<gpu, mshadow_op::softsign, mshadow_op::softsign_grad>(
-      ctx, inputs[0], inputs[1], req[0], outputs[0]);
+      ctx, inputs[0], inputs[2], req[0], outputs[0]);
   } else {
     MSHADOW_REAL_TYPE_SWITCH(inputs[0].type_flag_, DType, {
       // XXX: for y = relu(x), y is passed as "in_data" to Backward()
diff --git a/tests/python/unittest/test_operator.py 
b/tests/python/unittest/test_operator.py
index c8707097dd3..add9ae26932 100644
--- a/tests/python/unittest/test_operator.py
+++ b/tests/python/unittest/test_operator.py
@@ -6850,6 +6850,10 @@ def test_activation():
                     lambda x: np.log(1. + np.exp(x)),
                     lambda x: 1. - 1 / (1 + np.exp(x)),
                     -3.0, 3.0],
+        'softsign': [lambda x: mx.sym.Activation(x, act_type='softsign'),
+                     lambda x: x / (1. + np.abs(x)),
+                     lambda x: 1. / np.square(1. + np.abs(x)),
+                     -3.0, 3.0],
     }
     # Loop over operators
     for name, op in unary_ops.items():


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to