haojin2 commented on a change in pull request #16720: [Numpy] Implement numpy 
operator 'average'
URL: https://github.com/apache/incubator-mxnet/pull/16720#discussion_r341962909
 
 

 ##########
 File path: src/operator/numpy/np_broadcast_reduce_op.h
 ##########
 @@ -398,6 +399,353 @@ void ReduceAxesComputeWithWorkspaceImpl(const OpContext& 
ctx,
   });
 }
 
+struct NumpyWeightedAverageParam : public 
dmlc::Parameter<NumpyWeightedAverageParam> {
+  dmlc::optional<mxnet::Tuple<int>> axis;
+  bool returned;
+  bool weighted;
+
+  DMLC_DECLARE_PARAMETER(NumpyWeightedAverageParam) {
+    DMLC_DECLARE_FIELD(axis)
+      .set_default(dmlc::optional<mxnet::Tuple<int>>())
+      .describe("Axis or axes along which a average is performed. The default, 
axis=None, will average "
+                "all of the elements of the input array. If axis is negative 
it counts from the "
+                "last to the first axis.");
+    DMLC_DECLARE_FIELD(returned)
+      .set_default(false)
+      .describe("If True, the tuple (average, sum_of_weights) is returned,"
+                "otherwise only the average is returned."
+                "If weights=None, sum_of_weights is equivalent to"
+                "the number of elements over which the average is taken.");
+    DMLC_DECLARE_FIELD(weighted)
+      .set_default(true)
+      .describe("Auxiliary flag to deal with none weights.");
+  }
+};
+
+inline bool NumpyWeightedAverageShape(const nnvm::NodeAttrs& attrs,
+                                      std::vector<TShape> *in_attrs,
+                                      std::vector<TShape> *out_attrs) {
+  const NumpyWeightedAverageParam& param = 
nnvm::get<NumpyWeightedAverageParam>(attrs.parsed);
+  CHECK_EQ(in_attrs->size(), (param.weighted ? 2U : 1U));
+  CHECK_EQ(out_attrs->size(), 2U);
+  if (!shape_is_known(in_attrs->at(0))) {
+    return false;
+  }
+
+  const TShape& a_shape = (*in_attrs)[0];
+  SHAPE_ASSIGN_CHECK(*out_attrs, 0,
+                     NumpyReduceAxesShapeImpl(a_shape, param.axis, false));
+
+  if (param.weighted) {
+    const TShape& w_shape = (*in_attrs)[1];
+    if (w_shape.ndim() != a_shape.ndim()) {
+      CHECK_EQ(w_shape.ndim(), 1U) << "1D weights expected when shapes of a 
and weights differ.";
+
+      CHECK_EQ(param.axis.has_value(), true) << "Axis must be specified when 
shapes of a and weights differ.";
+
+      mxnet::Tuple<int> axes(param.axis.value());
+
+      CHECK_EQ(axes.ndim(), 1U) << "Axis must be int when shapes of a and 
weights differ.";
+
+      int red_axis = axes[0] < 0 ? axes[0] + a_shape.ndim() : axes[0];
+
+      CHECK_EQ(a_shape[red_axis], w_shape[0]) << "Length of weights not 
compatible with specified "
+                                             "axis.";
+
+      SHAPE_ASSIGN_CHECK(*out_attrs, 1,
+                         NumpyReduceAxesShapeImpl(w_shape, 
dmlc::optional<mxnet::Tuple<int>>(), false));
+    } else {
+      for (int i = 0; i < w_shape.ndim(); i++) {
+        CHECK_EQ(w_shape[i], a_shape[i]);
+      }
+      SHAPE_ASSIGN_CHECK(*out_attrs, 1,
+                         NumpyReduceAxesShapeImpl(w_shape, param.axis, false));
+    }
+  } else {
+    SHAPE_ASSIGN_CHECK(*out_attrs, 1, TShape(0, -1));
+  }
+
+  return shape_is_known(out_attrs->at(0)) && shape_is_known(out_attrs->at(1));
+}
+
+template<int req, bool onedim = false>
+struct avg_grad_a_kernel {
+  template<typename DType>
+  MSHADOW_XINLINE static void Map(int i,
+                                  DType* out,
+                                  const DType* w,
+                                  const DType* scl,
+                                  const DType* ograd,
+                                  const mshadow::Shape<6>& small,
+                                  const mshadow::Shape<6>& big) {
+    // partial a = w / sum(w)
+    size_t big_idx = i;
+    size_t small_idx = i;
+    size_t big_stride = 1;
+    size_t small_stride = 1;
+    size_t red_axis_idx = 0;
+    for (int axis = 5; axis >= 0; --axis) {
+      size_t axis_idx = big_idx % big[axis];
+      small_idx -= axis_idx * big_stride;
+      if (small[axis] != 1) {
+        small_idx += axis_idx * small_stride;
+      } else if (onedim && small[axis] != big[axis]) {
+        red_axis_idx = axis_idx;
+      }
+      big_idx /= big[axis];
+      big_stride *= big[axis];
+      small_stride *= small[axis];
+    }
+    if (onedim) {
+      KERNEL_ASSIGN(out[i], req, (ograd[small_idx] * (w[red_axis_idx] / 
*scl)));
+    } else {
+      KERNEL_ASSIGN(out[i], req, (ograd[small_idx] * (w[i] / scl[small_idx])));
+    }
+  }
+};
+
+template<int req>
+struct avg_grad_w_kernel {
+  template<typename DType>
+  MSHADOW_XINLINE static void Map(int i,
+                                  DType* out,
+                                  const DType* a,
+                                  const DType* scl,
+                                  const DType* sum_of_wa,
+                                  const DType* ograd,
+                                  const mshadow::Shape<6>& small,
+                                  const mshadow::Shape<6>& big) {
+    // partial w = (a * sum(w) - sum(a*w)) / (sum(w) * sum(w))
+    size_t big_idx = i;
+    size_t small_idx = i;
+    size_t big_stride = 1;
+    size_t small_stride = 1;
+    for (int axis = 5; axis >= 0; --axis) {
+      size_t axis_idx = big_idx % big[axis];
+      small_idx -= axis_idx * big_stride;
+      if (small[axis] != 1) {
+        small_idx += axis_idx * small_stride;
+      }
+      big_idx /= big[axis];
+      big_stride *= big[axis];
+      small_stride *= small[axis];
+    }
+    DType ret = ograd[small_idx] * (((a[i] * scl[small_idx] - 
sum_of_wa[small_idx]) / scl[small_idx]) / scl[small_idx]);
+    KERNEL_ASSIGN(out[i], req, ret);
+  }
+};
+
+template<int req>
+struct avg_grad_w_1D_kernel {
+  template<typename DType>
+  MSHADOW_XINLINE static void Map(int i,
+                                  DType* out,
+                                  const DType* a,
+                                  const DType* scl,
+                                  const DType* sum_of_wa,
+                                  const DType* ograd,
+                                  const mshadow::Shape<6>& big,
+                                  const int red_axis) {
+    DType scl_val = *scl;
+    size_t tail = 1;
+    size_t head = 1;
+    for (int axis = 5; axis > red_axis; --axis) {
+      tail *= big[axis];
+    }
+    for (int axis = 0; axis < red_axis; ++axis) {
+      head *= big[axis];
+    }
+    DType ret = 0;
+    for (size_t j = 0; j < head; ++j) {
+      for (size_t k = 0; k < tail; ++k) {
+        size_t a_idx = j*(tail*big[red_axis]) + i * tail + k;
+        size_t small_idx = j*tail + k;
+        ret += (ograd[small_idx] * (((a[a_idx] * scl_val - 
sum_of_wa[small_idx]) / scl_val) / scl_val));
+      }
+    }
+    KERNEL_ASSIGN(out[i], req, ret);
+  }
+};
+
+inline mshadow::Shape<6> TShapeToMShadowShape(const mxnet::TShape& shape) {
+  mshadow::Shape<6> mshape;
+  for (int i = 0; i < 6; ++i) {
+    mshape[i] = (i < shape.ndim()) ? shape[i] : 1;
+  }
+  return mshape;
+}
+
+template<typename xpu, bool back = false>
+void NumpyWeightedAverageComputeImpl(const nnvm::NodeAttrs& attrs,
+                                     const OpContext& ctx,
+                                     const std::vector<TBlob>& inputs,
+                                     const std::vector<OpReqType>& req,
+                                     const std::vector<TBlob>& outputs,
+                                     const dmlc::optional<mxnet::Tuple<int>>& 
axis) {
+  using namespace mshadow;
+  using namespace mxnet_op;
+  Stream<xpu>* s = ctx.get_stream<xpu>();
+  const TBlob& data = inputs[0];
+  TShape small1 = NumpyReduceAxesShapeImpl(data.shape_, axis, true);
+  // Reshape weights
+  TShape small2 = small1;
+  TBlob weights = inputs[1];
+
+  bool one_dim = weights.shape_.ndim() != data.shape_.ndim();
+
+  int red_axis = -1;
+
+  if (one_dim) {
+    CHECK_EQ(weights.shape_.ndim(), 1U) << "1D weights expected when shapes of 
a and weights differ.";
+
+    CHECK_EQ(axis.has_value(), true) << "Axis must be specified when shapes of 
a and weights differ.";
+
+    Tuple<int> axes(axis.value());
+
+    CHECK_EQ(axes.ndim(), 1U) << "Axis must be int when shapes of a and 
weights differ.";
+
+    red_axis = axes[0] < 0 ? axes[0] + data.shape_.ndim() : axes[0];
+
+    CHECK_EQ(weights.shape_[0], data.shape_[red_axis]) << "Length of weights 
not compatible with specified axis.";
+
+    TShape new_w_shape(data.shape_.ndim(), 1);
+    new_w_shape[red_axis] = weights.shape_[0];
+    weights = weights.reshape(new_w_shape);
+    small2 = TShape(new_w_shape.ndim(), 1);
+  }
+  MSHADOW_TYPE_SWITCH(data.type_flag_, DType, {
+    // Get temp space
+    size_t temp_data_size = data.shape_.Size() * sizeof(DType);
+    size_t temp_sum_size = small1.Size() * sizeof(DType);
+    TShape src_shape, dst_shape;
+    BroadcastReduceShapeCompact(data.shape_, small1, &src_shape, &dst_shape);
+    size_t workspace_size = 0;
+    MXNET_NDIM_SWITCH(dst_shape.ndim(), NDim, {
+      workspace_size = broadcast::ReduceWorkspaceSize<NDim, DType>(
+          s, dst_shape, req[0], src_shape);
+    });
+    size_t temp_mem_size = temp_data_size + temp_sum_size + workspace_size;
+    Tensor<xpu, 1, char> temp_mem =
+    ctx.requested[0].get_space_typed<xpu, 1, char>(Shape1(temp_mem_size), s);
+    DType *temp_data_ptr = reinterpret_cast<DType*>(temp_mem.dptr_);
+    DType *temp_sum_ptr = reinterpret_cast<DType*>(temp_mem.dptr_ + 
temp_data_size);
+    char *workspace_ptr = temp_mem.dptr_ + temp_data_size + temp_sum_size;
+    Tensor<xpu, 1, char> workspace(workspace_ptr, Shape1(workspace_size), s);
+
+    // Compute weighted data
+    TBlob wa = TBlob(temp_data_ptr, data.shape_, s);
+    BinaryBroadcastCompute<xpu, mshadow_op::mul>(attrs, ctx, {data, weights}, 
req, {wa});
+
+    // Compute sum of weighted data
+    TBlob sum_of_wa = TBlob(temp_sum_ptr, small1, s);
+    ReduceAxesComputeWithWorkspaceImpl<xpu, mshadow_op::sum, true>(
+    ctx, {wa}, {kWriteTo}, {sum_of_wa}, workspace, src_shape, dst_shape);
+
+    if (!back) {
+      const TBlob& avg = outputs[0];
+      const TBlob& sum_of_weights = outputs[1];
+      TShape w_src_shape, w_dst_shape;
+      BroadcastReduceShapeCompact(weights.shape_, small2, &w_src_shape, 
&w_dst_shape);
+      // Compute sum of weight
+      TBlob scl = sum_of_weights.reshape(small2);
+      ReduceAxesComputeWithWorkspaceImpl<xpu, mshadow_op::sum, true>(
+          ctx, {weights}, {kWriteTo}, {scl}, workspace, w_src_shape, 
w_dst_shape);
+
+      DType* scl_ptr = scl.dptr<DType>();
+      for (size_t i = 0; i < scl.shape_.Size(); ++i) {
+        CHECK_NE(scl_ptr[i], 0) << "Weights sum to zero, can't be normalized";
+      }
+
+      // Compute avg and assign output
+      BinaryBroadcastCompute<xpu, mshadow_op::div>(attrs, ctx, {sum_of_wa, 
scl}, req, {avg.reshape(small1)});
+    } else {
+      // Compute and assign the derivatives of a and weights
+      const TBlob& igrad_a = outputs[0];
+      const TBlob& igrad_w = outputs[1];
+      const TBlob& scl = inputs[2];
+      const TBlob& ograd = inputs[3];
+      MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, {
+        if (one_dim) {
+          // 1D weights
+          Kernel<avg_grad_a_kernel<req_type, true>, xpu>::Launch(s, 
igrad_a.shape_.Size(), igrad_a.dptr<DType>(),
+                                                       weights.dptr<DType>(), 
scl.dptr<DType>(), ograd.dptr<DType>(),
+                                                       
TShapeToMShadowShape(small1),
+                                                       
TShapeToMShadowShape(igrad_a.shape_));
 
 Review comment:
   to avoid lines getting too long.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to