[GitHub] szha commented on a change in pull request #13896: Cudnn dropout

GitBox Mon, 04 Feb 2019 20:06:17 -0800

szha commented on a change in pull request #13896: Cudnn dropout
URL: https://github.com/apache/incubator-mxnet/pull/13896#discussion_r253729889


 ##########
 File path: src/operator/nn/dropout-inl.h
 ##########
 @@ -227,52 +203,181 @@ class DropoutOp {
     }
   };
 
-  void Init(const DropoutParam &param) {
+  explicit DropoutOp(const DropoutParam &param, Context ctx) {
     this->pkeep_ = 1.0f - param.p;
     this->mode_ = static_cast<dropout::DropoutOpMode>(param.mode);
     this->axes_ = param.axes;
+    this->dropout_passthrough_ = true;
+#if MXNET_USE_CUDNN_DROPOUT
+    this->cudnn_off_ = param.cudnn_off && param.cudnn_off.value();
+    this->ctx_ = ctx;
+    if (ctx.dev_type == kGPU && this->pkeep_ > 0 && !this->cudnn_off_) {
+      dtype_ = mshadow::DataType<DType>::kCudnnFlag;
+      CUDNN_CALL(cudnnCreateTensorDescriptor(&x_desc_));
+      CUDNN_CALL(cudnnCreateTensorDescriptor(&y_desc_));
+      CUDNN_CALL(cudnnCreateTensorDescriptor(&dx_desc_));
+      CUDNN_CALL(cudnnCreateTensorDescriptor(&dy_desc_));
+      CUDNN_CALL(cudnnCreateDropoutDescriptor(&dropout_desc_));
+    }
+#endif  // MXNET_USE_CUDNN_DROPOUT
+  }
+
+  ~DropoutOp() {
+#if MXNET_USE_CUDNN_DROPOUT
+    if (this->ctx_.dev_type == kGPU && this->pkeep_ > 0 && !this->cudnn_off_) {
+      CUDNN_CALL(cudnnDestroyTensorDescriptor(x_desc_));
+      CUDNN_CALL(cudnnDestroyTensorDescriptor(y_desc_));
+      CUDNN_CALL(cudnnDestroyTensorDescriptor(dx_desc_));
+      CUDNN_CALL(cudnnDestroyTensorDescriptor(dy_desc_));
+      CUDNN_CALL(cudnnDestroyDropoutDescriptor(dropout_desc_));
+    }
+#endif  // MXNET_USE_CUDNN_DROPOUT
   }
 
+#if MXNET_USE_CUDNN_DROPOUT && defined(__CUDACC__)
+  inline bool CuDNNAvailable() {
+    return this->pkeep_ > 0 && !this->cudnn_off_;
+  }
+
+  inline void CuDNNForward(const OpContext &ctx,
+                           const TBlob &in,
+                           const TBlob &mask,
+                           const TBlob &out) {
+      Stream<xpu> *s = ctx.get_stream<xpu>();
+
+      // set dropout state.
+      ctx.requested[0].get_cudnn_dropout_desc(&dropout_desc_, s, 1.0f - 
this->pkeep_, seed_);
+
+      // describe input/output tensor
+      int dim[4], stride[4];
+      dim[0] = 1;
+      dim[1] = 1;
+      dim[2] = 1;
+      dim[3] = out.Size();
+      stride[0] = out.Size();
+      stride[1] = out.Size();
+      stride[2] = out.Size();
+      stride[3] = 1;
+      CUDNN_CALL(cudnnSetTensorNdDescriptor(x_desc_,
+                                            dtype_,
+                                            4,
+                                            dim,
+                                            stride));
+      CUDNN_CALL(cudnnSetTensorNdDescriptor(y_desc_,
+                                            dtype_,
+                                            4,
+                                            dim,
+                                            stride));
+
+      // perform dropout with cudnn
+      CUDNN_CALL(cudnnDropoutGetReserveSpaceSize(x_desc_, 
&dropout_reserve_byte_));
+      // cudnn uses bits to record the positions that are dropped, so reserve 
bytes is always
+      // 1/8 of input size.
+      CHECK_GE(mask.Size() * sizeof(DType), dropout_reserve_byte_) <<
+        "The size of the mask space is smaller than the required cudnn 
reserved space.";
+      CUDNN_CALL(cudnnDropoutForward(s->dnn_handle_,
+                                     dropout_desc_,
+                                     x_desc_,
+                                     in.dptr<DType>(),
+                                     y_desc_,
+                                     out.dptr<DType>(),
+                                     mask.dptr<DType>(),
+                                     dropout_reserve_byte_));
+  }
+
+  inline void CuDNNBackward(const OpContext &ctx,
+                            const TBlob &out_grad,
+                            const TBlob &mask,
+                            const TBlob &in_grad) {
+      Stream<xpu> *s = ctx.get_stream<xpu>();
+
+      // describe input/output tensor
+      int dim[4], stride[4];
+      dim[0] = 1;
+      dim[1] = 1;
+      dim[2] = 1;
+      dim[3] = in_grad.Size();
+      stride[0] = in_grad.Size();
+      stride[1] = in_grad.Size();
+      stride[2] = in_grad.Size();
+      stride[3] = 1;
+      CUDNN_CALL(cudnnSetTensorNdDescriptor(dy_desc_,
+                                            dtype_,
+                                            4,
+                                            dim,
+                                            stride));
+      CUDNN_CALL(cudnnSetTensorNdDescriptor(dx_desc_,
+                                            dtype_,
+                                            4,
+                                            dim,
+                                            stride));
+
+      // perform dropout with cudnn
+      CUDNN_CALL(cudnnDropoutBackward(s->dnn_handle_,
+                                      dropout_desc_,
+                                      dy_desc_,
+                                      out_grad.dptr<DType>(),
+                                      dx_desc_,
+                                      in_grad.dptr<DType>(),
+                                      mask.dptr<DType>(),
+                                      dropout_reserve_byte_));
+  }
+#endif  // MXNET_USE_CUDNN_DROPOUT && defined(__CUDACC__)
+
   void Forward(const OpContext &ctx,
                const std::vector<TBlob> &in_data,
                const std::vector<OpReqType> &req,
                const std::vector<TBlob> &out_data) {
+    this->dropout_passthrough_ = true;
     if (req[dropout::kOut] != kNullOp) {
       CHECK_EQ(in_data.size(), 1U);
       if (ctx.is_train) {
         CHECK_EQ(out_data.size(), 2U);
       }
       Stream<xpu> *s = ctx.get_stream<xpu>();
+      const TBlob &in = in_data[dropout::kData];
       const TBlob &out = out_data[dropout::kOut];
-      if (ctx.is_train || this->mode_ == dropout::kAlways) {
-        RandGenerator<xpu, DType> *pgen = 
ctx.requested[0].get_parallel_random<xpu, DType>();
-        CHECK_NOTNULL(pgen);
-        if (this->axes_.ndim() != 0 || !MKLForward(s, pgen, this->pkeep_, 
in_data, out_data)) {
-          const TBlob &mask = out_data[dropout::kMask];
+      const TBlob &mask = out_data[dropout::kMask];
+      if (this->pkeep_ < 1 && (ctx.is_train || this->mode_ == 
dropout::kAlways)) {
+        this->dropout_passthrough_ = false;
+        if (this->axes_.ndim() == 0) {
+#if MXNET_USE_MKL_DROPOUT
+          if (MKLAvailable()) {
+            MKLForward(ctx, in_data, out_data);
+            return;
+          }
+#endif  // MXNET_USE_MKL_DROPOUT
+#if MXNET_USE_CUDNN_DROPOUT && defined(__CUDACC__)
+          if (CuDNNAvailable()) {
+            CuDNNForward(ctx, in, mask, out);
+            return;
+          }
+#endif  // MXNET_USE_CUDNN_DROPOUT && defined(__CUDACC__)
+          RandGenerator<xpu, DType> *pgen = 
ctx.requested[0].get_parallel_random<xpu, DType>();
+          CHECK_NOTNULL(pgen);
           CHECK(req[dropout::kOut] != kAddTo);
 
 Review comment:
   Extending dropout to support addto isn't trivial and I can't think of a case 
that could trigger this... I will take a note and come back to this later.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

[GitHub] szha commented on a change in pull request #13896: Cudnn dropout

Reply via email to