szha commented on a change in pull request #13896: Cudnn dropout URL: https://github.com/apache/incubator-mxnet/pull/13896#discussion_r253729889
########## File path: src/operator/nn/dropout-inl.h ########## @@ -227,52 +203,181 @@ class DropoutOp { } }; - void Init(const DropoutParam ¶m) { + explicit DropoutOp(const DropoutParam ¶m, Context ctx) { this->pkeep_ = 1.0f - param.p; this->mode_ = static_cast<dropout::DropoutOpMode>(param.mode); this->axes_ = param.axes; + this->dropout_passthrough_ = true; +#if MXNET_USE_CUDNN_DROPOUT + this->cudnn_off_ = param.cudnn_off && param.cudnn_off.value(); + this->ctx_ = ctx; + if (ctx.dev_type == kGPU && this->pkeep_ > 0 && !this->cudnn_off_) { + dtype_ = mshadow::DataType<DType>::kCudnnFlag; + CUDNN_CALL(cudnnCreateTensorDescriptor(&x_desc_)); + CUDNN_CALL(cudnnCreateTensorDescriptor(&y_desc_)); + CUDNN_CALL(cudnnCreateTensorDescriptor(&dx_desc_)); + CUDNN_CALL(cudnnCreateTensorDescriptor(&dy_desc_)); + CUDNN_CALL(cudnnCreateDropoutDescriptor(&dropout_desc_)); + } +#endif // MXNET_USE_CUDNN_DROPOUT + } + + ~DropoutOp() { +#if MXNET_USE_CUDNN_DROPOUT + if (this->ctx_.dev_type == kGPU && this->pkeep_ > 0 && !this->cudnn_off_) { + CUDNN_CALL(cudnnDestroyTensorDescriptor(x_desc_)); + CUDNN_CALL(cudnnDestroyTensorDescriptor(y_desc_)); + CUDNN_CALL(cudnnDestroyTensorDescriptor(dx_desc_)); + CUDNN_CALL(cudnnDestroyTensorDescriptor(dy_desc_)); + CUDNN_CALL(cudnnDestroyDropoutDescriptor(dropout_desc_)); + } +#endif // MXNET_USE_CUDNN_DROPOUT } +#if MXNET_USE_CUDNN_DROPOUT && defined(__CUDACC__) + inline bool CuDNNAvailable() { + return this->pkeep_ > 0 && !this->cudnn_off_; + } + + inline void CuDNNForward(const OpContext &ctx, + const TBlob &in, + const TBlob &mask, + const TBlob &out) { + Stream<xpu> *s = ctx.get_stream<xpu>(); + + // set dropout state. + ctx.requested[0].get_cudnn_dropout_desc(&dropout_desc_, s, 1.0f - this->pkeep_, seed_); + + // describe input/output tensor + int dim[4], stride[4]; + dim[0] = 1; + dim[1] = 1; + dim[2] = 1; + dim[3] = out.Size(); + stride[0] = out.Size(); + stride[1] = out.Size(); + stride[2] = out.Size(); + stride[3] = 1; + CUDNN_CALL(cudnnSetTensorNdDescriptor(x_desc_, + dtype_, + 4, + dim, + stride)); + CUDNN_CALL(cudnnSetTensorNdDescriptor(y_desc_, + dtype_, + 4, + dim, + stride)); + + // perform dropout with cudnn + CUDNN_CALL(cudnnDropoutGetReserveSpaceSize(x_desc_, &dropout_reserve_byte_)); + // cudnn uses bits to record the positions that are dropped, so reserve bytes is always + // 1/8 of input size. + CHECK_GE(mask.Size() * sizeof(DType), dropout_reserve_byte_) << + "The size of the mask space is smaller than the required cudnn reserved space."; + CUDNN_CALL(cudnnDropoutForward(s->dnn_handle_, + dropout_desc_, + x_desc_, + in.dptr<DType>(), + y_desc_, + out.dptr<DType>(), + mask.dptr<DType>(), + dropout_reserve_byte_)); + } + + inline void CuDNNBackward(const OpContext &ctx, + const TBlob &out_grad, + const TBlob &mask, + const TBlob &in_grad) { + Stream<xpu> *s = ctx.get_stream<xpu>(); + + // describe input/output tensor + int dim[4], stride[4]; + dim[0] = 1; + dim[1] = 1; + dim[2] = 1; + dim[3] = in_grad.Size(); + stride[0] = in_grad.Size(); + stride[1] = in_grad.Size(); + stride[2] = in_grad.Size(); + stride[3] = 1; + CUDNN_CALL(cudnnSetTensorNdDescriptor(dy_desc_, + dtype_, + 4, + dim, + stride)); + CUDNN_CALL(cudnnSetTensorNdDescriptor(dx_desc_, + dtype_, + 4, + dim, + stride)); + + // perform dropout with cudnn + CUDNN_CALL(cudnnDropoutBackward(s->dnn_handle_, + dropout_desc_, + dy_desc_, + out_grad.dptr<DType>(), + dx_desc_, + in_grad.dptr<DType>(), + mask.dptr<DType>(), + dropout_reserve_byte_)); + } +#endif // MXNET_USE_CUDNN_DROPOUT && defined(__CUDACC__) + void Forward(const OpContext &ctx, const std::vector<TBlob> &in_data, const std::vector<OpReqType> &req, const std::vector<TBlob> &out_data) { + this->dropout_passthrough_ = true; if (req[dropout::kOut] != kNullOp) { CHECK_EQ(in_data.size(), 1U); if (ctx.is_train) { CHECK_EQ(out_data.size(), 2U); } Stream<xpu> *s = ctx.get_stream<xpu>(); + const TBlob &in = in_data[dropout::kData]; const TBlob &out = out_data[dropout::kOut]; - if (ctx.is_train || this->mode_ == dropout::kAlways) { - RandGenerator<xpu, DType> *pgen = ctx.requested[0].get_parallel_random<xpu, DType>(); - CHECK_NOTNULL(pgen); - if (this->axes_.ndim() != 0 || !MKLForward(s, pgen, this->pkeep_, in_data, out_data)) { - const TBlob &mask = out_data[dropout::kMask]; + const TBlob &mask = out_data[dropout::kMask]; + if (this->pkeep_ < 1 && (ctx.is_train || this->mode_ == dropout::kAlways)) { + this->dropout_passthrough_ = false; + if (this->axes_.ndim() == 0) { +#if MXNET_USE_MKL_DROPOUT + if (MKLAvailable()) { + MKLForward(ctx, in_data, out_data); + return; + } +#endif // MXNET_USE_MKL_DROPOUT +#if MXNET_USE_CUDNN_DROPOUT && defined(__CUDACC__) + if (CuDNNAvailable()) { + CuDNNForward(ctx, in, mask, out); + return; + } +#endif // MXNET_USE_CUDNN_DROPOUT && defined(__CUDACC__) + RandGenerator<xpu, DType> *pgen = ctx.requested[0].get_parallel_random<xpu, DType>(); + CHECK_NOTNULL(pgen); CHECK(req[dropout::kOut] != kAddTo); Review comment: Extending dropout to support addto isn't trivial and I can't think of a case that could trigger this... I will take a note and come back to this later. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services