[GitHub] [tvm] kparzysz-quic commented on a diff in pull request #15057: [QNN] Implement quantized avg_pool2d

via GitHub Fri, 09 Jun 2023 07:29:40 -0700


kparzysz-quic commented on code in PR #15057:
URL: https://github.com/apache/tvm/pull/15057#discussion_r1224381566



##########
src/relay/qnn/op/avg_pool2d.cc:
##########
@@ -0,0 +1,225 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/relay/qnn/op/Avg_pool2d.cc
+ * \brief Property def of qnn Avg_pool2d operator.
+ */
+
+#include <tvm/relay/analysis.h>
+#include <tvm/relay/base.h>
+#include <tvm/relay/op.h>
+#include <tvm/relay/op_attr_types.h>
+#include <tvm/relay/qnn/attrs.h>
+#include <tvm/tir/expr.h>
+
+#include "../../op/nn/nn.h"
+#include "../../op/nn/pooling.h"
+#include "../../op/nn/pooling_common.h"
+#include "../../op/tensor/transform.h"
+#include "../../transforms/infer_layout_utils.h"
+#include "../../transforms/pattern_utils.h"
+#include "../utils.h"
+#include "op_common.h"
+
+namespace tvm {
+namespace relay {
+namespace qnn {
+
+// relay.op.qnn.avg_pool2d
+bool QnnAvgPool2DRel(const Array<Type>& types, int num_inputs, const Attrs& 
attrs,
+                     const TypeReporter& reporter) {
+  // Expected Types: data, input_zero_point, input_scale, output_zero_point, 
output_scale
+  // out_type
+
+  ICHECK_EQ(types.size(), 6);
+
+  const auto* data = types[0].as<TensorTypeNode>();
+  if (data == nullptr) return false;
+  ICHECK(data->dtype == DataType::Int(8) || data->dtype == DataType::UInt(8))
+      << "Expected quantized avg_pool2d type(int8, uint8) for input but was " 
<< data->dtype;
+
+  const auto* param = attrs.as<AvgPool2DAttrs>();
+  ICHECK(param != nullptr) << "AvgPool2DAttrs cannot be nullptr.";
+
+  // Check the types of scale and zero points.
+  for (size_t i = 1; i < 5; ++i) {
+    if (types[i].as<IncompleteTypeNode>()) {
+      return false;
+    }
+  }
+
+  ICHECK(IsScalarType(types[1], DataType::Float(32)));  // input_scale
+  ICHECK(IsScalarType(types[2], DataType::Int(32)));    // input_zero_point
+  ICHECK(IsScalarType(types[3], DataType::Float(32)));  // output_scale
+  ICHECK(IsScalarType(types[4], DataType::Int(32)));    // output_zero_point
+
+  // Find the output shape and data type
+  const auto dshape = data->shape;
+  ICHECK_GE(dshape.size(), 2U)
+      << "Pool2D only support input >= 2-D: input must have height and width";
+
+  // Check input and output layout
+  Layout layout(param->layout);
+  // The Layout is always NHWC
+  ICHECK(layout.Contains(LayoutAxis::Get('H')) && 
layout.Contains(LayoutAxis::Get('W')) &&
+         !layout.Contains(LayoutAxis::Get('h')) && 
!layout.Contains(LayoutAxis::Get('w')))
+      << "Invalid input layout " << layout
+      << ". qnn_avg_pool2d inut layout must have H and W, which cannot be 
split";
+
+  // Find the output shape and data type
+  const auto hidx = layout.IndexOf(LayoutAxis::Get('H'));
+  const auto widx = layout.IndexOf(LayoutAxis::Get('W'));
+
+  IndexExpr pad_h, pad_w;
+  if (param->padding.size() == 1) {
+    pad_h = param->padding[0] * 2;
+    pad_w = param->padding[0] * 2;
+  } else if (param->padding.size() == 2) {
+    // (top, left)
+    pad_h = param->padding[0] * 2;
+    pad_w = param->padding[1] * 2;
+  } else if (param->padding.size() == 4) {
+    // (top, left, bottom, right)
+    pad_h = param->padding[0] + param->padding[2];
+    pad_w = param->padding[1] + param->padding[3];
+  } else {
+    return false;
+  }
+
+  std::vector<IndexExpr> oshape(dshape.begin(), dshape.end());
+  if (dshape[hidx].as<tir::AnyNode>()) {
+    oshape[hidx] = dshape[hidx];
+  } else {
+    oshape[hidx] =
+        calculate_pool_dimension(dshape[hidx], pad_h, param->pool_size[0], 
param->dilation[0],
+                                 param->strides[0], param->ceil_mode);
+  }
+  if (dshape[widx].as<tir::AnyNode>()) {
+    oshape[widx] = dshape[widx];
+  } else {
+    oshape[widx] =
+        calculate_pool_dimension(dshape[widx], pad_w, param->pool_size[1], 
param->dilation[1],
+                                 param->strides[1], param->ceil_mode);
+  }
+
+  // assign output type
+  reporter->Assign(types[5], TensorType(oshape, data->dtype));
+  return true;
+}
+
+InferCorrectLayoutOutput QnnAvgPoolInferCorrectLayout(const Attrs& attrs,
+                                                      const Array<Layout>& 
new_in_layouts,
+                                                      const Array<Layout>& 
old_in_layouts,
+                                                      const 
Array<tvm::relay::Type>& old_in_types) {
+  // Use Relay AvgPool2D Infer correct layout.
+  auto avgpool_new_layouts =
+      PoolInferCorrectLayout<AvgPool2DAttrs>(attrs, new_in_layouts, 
old_in_layouts, old_in_types);
+
+  // Fill the layouts of remaining input tensors - scales and zero points. The 
layouts of these
+  // tensors can be treated as channel layout.
+  Layout channel_layout = Layout("C");
+  Array<Layout> input_layouts = {avgpool_new_layouts->input_layouts[0], 
channel_layout,
+                                 channel_layout, channel_layout, 
channel_layout};
+  Array<Layout> output_layouts = avgpool_new_layouts->output_layouts;
+  return InferCorrectLayoutOutput(input_layouts, output_layouts, attrs);
+}
+
+/*
+ * \brief Forward rewrite the qnn Avg_pool2d op.
+ * \param attrs The QNN Avg_pool2d attrs.
+ * \param new_args The new mutated args to the call node.
+ * \param arg_types The types of input and output.
+ * \return The sequence of Relay ops for qnn Avg_pool2d op.
+ * \note Lowering of the qnn.Avg_pool2d operator
+
+ *  Quantized Avg_pool2d will take one quantized input tensor and returns 
another

Review Comment:
   Done



##########
src/relay/qnn/op/avg_pool2d.cc:
##########
@@ -0,0 +1,225 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/relay/qnn/op/Avg_pool2d.cc
+ * \brief Property def of qnn Avg_pool2d operator.
+ */
+
+#include <tvm/relay/analysis.h>
+#include <tvm/relay/base.h>
+#include <tvm/relay/op.h>
+#include <tvm/relay/op_attr_types.h>
+#include <tvm/relay/qnn/attrs.h>
+#include <tvm/tir/expr.h>
+
+#include "../../op/nn/nn.h"
+#include "../../op/nn/pooling.h"
+#include "../../op/nn/pooling_common.h"
+#include "../../op/tensor/transform.h"
+#include "../../transforms/infer_layout_utils.h"
+#include "../../transforms/pattern_utils.h"
+#include "../utils.h"
+#include "op_common.h"
+
+namespace tvm {
+namespace relay {
+namespace qnn {
+
+// relay.op.qnn.avg_pool2d
+bool QnnAvgPool2DRel(const Array<Type>& types, int num_inputs, const Attrs& 
attrs,
+                     const TypeReporter& reporter) {
+  // Expected Types: data, input_zero_point, input_scale, output_zero_point, 
output_scale
+  // out_type
+
+  ICHECK_EQ(types.size(), 6);
+
+  const auto* data = types[0].as<TensorTypeNode>();
+  if (data == nullptr) return false;
+  ICHECK(data->dtype == DataType::Int(8) || data->dtype == DataType::UInt(8))
+      << "Expected quantized avg_pool2d type(int8, uint8) for input but was " 
<< data->dtype;
+
+  const auto* param = attrs.as<AvgPool2DAttrs>();
+  ICHECK(param != nullptr) << "AvgPool2DAttrs cannot be nullptr.";
+
+  // Check the types of scale and zero points.
+  for (size_t i = 1; i < 5; ++i) {
+    if (types[i].as<IncompleteTypeNode>()) {
+      return false;
+    }
+  }
+
+  ICHECK(IsScalarType(types[1], DataType::Float(32)));  // input_scale
+  ICHECK(IsScalarType(types[2], DataType::Int(32)));    // input_zero_point
+  ICHECK(IsScalarType(types[3], DataType::Float(32)));  // output_scale
+  ICHECK(IsScalarType(types[4], DataType::Int(32)));    // output_zero_point
+
+  // Find the output shape and data type
+  const auto dshape = data->shape;
+  ICHECK_GE(dshape.size(), 2U)
+      << "Pool2D only support input >= 2-D: input must have height and width";
+
+  // Check input and output layout
+  Layout layout(param->layout);
+  // The Layout is always NHWC
+  ICHECK(layout.Contains(LayoutAxis::Get('H')) && 
layout.Contains(LayoutAxis::Get('W')) &&
+         !layout.Contains(LayoutAxis::Get('h')) && 
!layout.Contains(LayoutAxis::Get('w')))
+      << "Invalid input layout " << layout
+      << ". qnn_avg_pool2d inut layout must have H and W, which cannot be 
split";
+
+  // Find the output shape and data type
+  const auto hidx = layout.IndexOf(LayoutAxis::Get('H'));
+  const auto widx = layout.IndexOf(LayoutAxis::Get('W'));
+
+  IndexExpr pad_h, pad_w;
+  if (param->padding.size() == 1) {
+    pad_h = param->padding[0] * 2;
+    pad_w = param->padding[0] * 2;
+  } else if (param->padding.size() == 2) {
+    // (top, left)
+    pad_h = param->padding[0] * 2;
+    pad_w = param->padding[1] * 2;
+  } else if (param->padding.size() == 4) {
+    // (top, left, bottom, right)
+    pad_h = param->padding[0] + param->padding[2];
+    pad_w = param->padding[1] + param->padding[3];
+  } else {
+    return false;
+  }
+
+  std::vector<IndexExpr> oshape(dshape.begin(), dshape.end());
+  if (dshape[hidx].as<tir::AnyNode>()) {
+    oshape[hidx] = dshape[hidx];
+  } else {
+    oshape[hidx] =
+        calculate_pool_dimension(dshape[hidx], pad_h, param->pool_size[0], 
param->dilation[0],
+                                 param->strides[0], param->ceil_mode);
+  }
+  if (dshape[widx].as<tir::AnyNode>()) {
+    oshape[widx] = dshape[widx];
+  } else {
+    oshape[widx] =
+        calculate_pool_dimension(dshape[widx], pad_w, param->pool_size[1], 
param->dilation[1],
+                                 param->strides[1], param->ceil_mode);
+  }
+
+  // assign output type
+  reporter->Assign(types[5], TensorType(oshape, data->dtype));
+  return true;
+}
+
+InferCorrectLayoutOutput QnnAvgPoolInferCorrectLayout(const Attrs& attrs,
+                                                      const Array<Layout>& 
new_in_layouts,
+                                                      const Array<Layout>& 
old_in_layouts,
+                                                      const 
Array<tvm::relay::Type>& old_in_types) {
+  // Use Relay AvgPool2D Infer correct layout.
+  auto avgpool_new_layouts =
+      PoolInferCorrectLayout<AvgPool2DAttrs>(attrs, new_in_layouts, 
old_in_layouts, old_in_types);
+
+  // Fill the layouts of remaining input tensors - scales and zero points. The 
layouts of these
+  // tensors can be treated as channel layout.
+  Layout channel_layout = Layout("C");
+  Array<Layout> input_layouts = {avgpool_new_layouts->input_layouts[0], 
channel_layout,
+                                 channel_layout, channel_layout, 
channel_layout};
+  Array<Layout> output_layouts = avgpool_new_layouts->output_layouts;
+  return InferCorrectLayoutOutput(input_layouts, output_layouts, attrs);
+}
+
+/*
+ * \brief Forward rewrite the qnn Avg_pool2d op.
+ * \param attrs The QNN Avg_pool2d attrs.
+ * \param new_args The new mutated args to the call node.
+ * \param arg_types The types of input and output.
+ * \return The sequence of Relay ops for qnn Avg_pool2d op.
+ * \note Lowering of the qnn.Avg_pool2d operator

Review Comment:
   Done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [tvm] kparzysz-quic commented on a diff in pull request #15057: [QNN] Implement quantized avg_pool2d

Reply via email to