reminisce commented on a change in pull request #8340: Fill optimizations
URL: https://github.com/apache/incubator-mxnet/pull/8340#discussion_r145600607
##
File path: src/operator/tensor/init_op.h
##
@@ -164,19 +164,38 @@ inline bool InitStorageType(const nnvm::NodeAttrs& attrs,
return true;
}
+/*! \brief Fill output with a scalar integer value */
template
void FillCompute(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector& inputs,
const std::vector& req,
const std::vector& outputs) {
- using namespace mshadow;
- using namespace mshadow::expr;
- Stream *s = ctx.get_stream();
- MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
-Tensor out = outputs[0].FlatTo1D(s);
-ASSIGN_DISPATCH(out, req[0], scalar(value));
- });
+ if (req[0] != kNullOp) {
+mshadow::Stream *s = ctx.get_stream();
+MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
+ mxnet_op::Kernel, xpu>::Launch(s,
+ outputs[0].Size(),
+
outputs[0].dptr());
+});
+ }
+}
+
+/*! \brief Fast CPU fill-zero version using memset */
+template<>
+inline void FillCompute(const nnvm::NodeAttrs& attrs,
+const OpContext& ctx,
+const std::vector& inputs,
+const std::vector& req,
+const std::vector& outputs) {
+ if (req[0] != kNullOp) {
+const size_t size = outputs[0].Size();
+if (size) {
+ MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
+memset(outputs[0].dptr(), 0, size * sizeof(DType));
Review comment:
`outputs[0].dptr_` is more efficient here than `outputs[0].dptr()`.
Question: How much faster is this compared to the original implementation of
filling up an `TBlob`?
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org
With regards,
Apache Git Services