[tvm] branch main updated: [TOPI] remove the i32 cast for output shape of pool (#14549)

tqchen Sun, 09 Apr 2023 18:59:28 -0700

This is an automated email from the ASF dual-hosted git repository.

tqchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git



The following commit(s) were added to refs/heads/main by this push:
     new 4e07a8ed66 [TOPI] remove the i32 cast for output shape of pool (#14549)
4e07a8ed66 is described below

commit 4e07a8ed6687a08b6b27db21af019a5a179b9ee1
Author: Yong Wu <[email protected]>
AuthorDate: Sun Apr 9 18:59:16 2023 -0700

    [TOPI] remove the i32 cast for output shape of pool (#14549)
    
    [TOPI] remove the cast for output shape of pool
---
 include/tvm/topi/nn/dilate.h              |  3 +-
 include/tvm/topi/nn/pooling.h             | 58 +++++++++++++------------------
 include/tvm/topi/transform.h              |  6 +---
 tests/python/relay/test_op_grad_level2.py |  2 +-
 tests/python/relay/test_op_level2.py      |  7 ++--
 5 files changed, 30 insertions(+), 46 deletions(-)

diff --git a/include/tvm/topi/nn/dilate.h b/include/tvm/topi/nn/dilate.h
index 3369316e4d..74c46e2694 100644
--- a/include/tvm/topi/nn/dilate.h
+++ b/include/tvm/topi/nn/dilate.h
@@ -76,8 +76,7 @@ inline Tensor dilate(const Tensor& x, Array<PrimExpr> 
strides, double dilation_v
   Array<PrimExpr> out_shape;
   arith::Analyzer analyzer;
   for (size_t i = 0; i < n; ++i) {
-    out_shape.push_back(
-        analyzer.Simplify((x->shape[i] - 1) * cast(DataType::Int(32), 
strides[i] + 1)));
+    out_shape.push_back(analyzer.Simplify((x->shape[i] - 1) * (strides[i] + 
1)));
   }
 
   return tvm::te::compute(
diff --git a/include/tvm/topi/nn/pooling.h b/include/tvm/topi/nn/pooling.h
index 5f365e5192..ac048f585c 100644
--- a/include/tvm/topi/nn/pooling.h
+++ b/include/tvm/topi/nn/pooling.h
@@ -57,18 +57,18 @@ inline Tensor pool_grad_impl(const Tensor& out_grad, const 
Tensor& x,
   ICHECK_EQ(stride_size.size(), 2) << "Pooling stride_size must have 2 
elements";
   ICHECK_EQ(padding_size.size(), 4) << "Pooling padding_size must have 4 
elements";
 
-  auto kernel_height = cast(DataType::DataType::Int(32), kernel_size[0]);
-  auto kernel_width = cast(DataType::DataType::Int(32), kernel_size[1]);
-  auto stride_height = cast(DataType::DataType::Int(32), stride_size[0]);
-  auto stride_width = cast(DataType::DataType::Int(32), stride_size[1]);
+  auto kernel_height = kernel_size[0];
+  auto kernel_width = kernel_size[1];
+  auto stride_height = stride_size[0];
+  auto stride_width = stride_size[1];
 
-  auto height = cast(DataType::DataType::Int(32), x->shape[height_axis]);
-  auto width = cast(DataType::DataType::Int(32), x->shape[width_axis]);
+  auto height = x->shape[height_axis];
+  auto width = x->shape[width_axis];
 
-  auto pad_top = cast(DataType::DataType::Int(32), padding_size[0]);
-  auto pad_left = cast(DataType::DataType::Int(32), padding_size[1]);
-  auto pad_bottom = cast(DataType::DataType::Int(32), padding_size[2]);
-  auto pad_right = cast(DataType::DataType::Int(32), padding_size[3]);
+  auto pad_top = padding_size[0];
+  auto pad_left = padding_size[1];
+  auto pad_bottom = padding_size[2];
+  auto pad_right = padding_size[3];
 
   if (ceil_mode) {
     // Additional padding to ensure we do ceil instead of floor when
@@ -94,10 +94,6 @@ inline Tensor pool_grad_impl(const Tensor& out_grad, const 
Tensor& x,
   auto dwidth = tvm::te::reduce_axis(Range(0, kernel_width), "dw");
 
   Array<PrimExpr> data_shape = x->shape;
-  for (size_t i = 0; i < data_shape.size(); ++i) {
-    data_shape.Set(i, cast(DataType::DataType::Int(32), data_shape[i]));
-  }
-
   Array<PrimExpr> out_shape = data_shape;
   out_shape.Set(height_axis, out_height);
   out_shape.Set(width_axis, out_width);
@@ -148,10 +144,10 @@ inline Tensor pool_grad_impl(const Tensor& out_grad, 
const Tensor& x,
           out_idx.Set(width_axis, (inds[width_axis] + pad_left) / stride_width 
- windoww);
 
           PrimExpr out_idx_lower_h = tir::Select(
-              pad_inds[height_axis] < kernel_height, 
make_const(DataType::DataType::Int(32), 0),
+              pad_inds[height_axis] < kernel_height, 
make_const(pad_inds[height_axis].dtype(), 0),
               (pad_inds[height_axis] - kernel_height) / stride_height + 1);
           PrimExpr out_idx_lower_w = tir::Select(
-              pad_inds[width_axis] < kernel_width, 
make_const(DataType::DataType::Int(32), 0),
+              pad_inds[width_axis] < kernel_width, 
make_const(pad_inds[width_axis].dtype(), 0),
               (pad_inds[width_axis] - kernel_width) / stride_width + 1);
 
           return tvm::sum(
@@ -179,10 +175,10 @@ inline Tensor pool_grad_impl(const Tensor& out_grad, 
const Tensor& x,
           out_idx.Set(width_axis, (pad_w_idx / stride_width - windoww));
 
           PrimExpr out_idx_lower_h =
-              tir::Select(pad_h_idx < kernel_height, 
make_const(DataType::Int(32), 0),
+              tir::Select(pad_h_idx < kernel_height, 
make_const(pad_h_idx.dtype(), 0),
                           (pad_h_idx - kernel_height) / stride_height + 1);
           PrimExpr out_idx_lower_w =
-              tir::Select(pad_w_idx < kernel_width, 
make_const(DataType::Int(32), 0),
+              tir::Select(pad_w_idx < kernel_width, 
make_const(pad_w_idx.dtype(), 0),
                           (pad_w_idx - kernel_width) / stride_width + 1);
 
           PrimExpr divide_factor;  // number of pooled elements
@@ -194,10 +190,10 @@ inline Tensor pool_grad_impl(const Tensor& out_grad, 
const Tensor& x,
 
             PrimExpr h_end = min(h_start + kernel_height, height);
             PrimExpr w_end = min(w_start + kernel_width, width);
-            h_start = max(h_start, make_const(DataType::Int(32), 0));
-            w_start = max(w_start, make_const(DataType::Int(32), 0));
+            h_start = max(h_start, make_const(h_start.dtype(), 0));
+            w_start = max(w_start, make_const(w_start.dtype(), 0));
             divide_factor =
-                max((h_end - h_start) * (w_end - w_start), 
make_const(DataType::Int(32), 1));
+                max((h_end - h_start) * (w_end - w_start), 
make_const(h_end.dtype(), 1));
           }
           return tvm::sum(
               tvm::if_then_else(tir::And(tir::And(out_idx[height_axis] >= 
out_idx_lower_h,
@@ -329,14 +325,11 @@ inline Tensor adaptive_pool_impl(const Tensor& x, const 
Array<PrimExpr>& output_
   ICHECK_EQ(axes.size(), n_dim) << "The number of axes not equal to the in/out 
dimension";
 
   Array<PrimExpr> data_shape = x->shape;
-  for (size_t i = 0; i < data_shape.size(); ++i) {
-    data_shape.Set(i, cast(DataType::DataType::Int(32), data_shape[i]));
-  }
   Array<PrimExpr> out_shape = data_shape;
   Array<PrimExpr> in_size, out_size;
   for (size_t i = 0; i < n_dim; ++i) {
     in_size.push_back(data_shape[axes[i]]);
-    out_size.push_back(cast(DataType::Int(32), output_size[i]));
+    out_size.push_back(output_size[i]);
     out_shape.Set(axes[i], out_size[i]);
   }
 
@@ -532,19 +525,16 @@ inline Tensor pool_impl_nd(const Tensor& x, const 
Array<PrimExpr>& kernel_size,
   Array<PrimExpr> pad_before(std::vector<PrimExpr>(x_size, 0));
   Array<PrimExpr> pad_after(std::vector<PrimExpr>(x_size, 0));
   Array<PrimExpr> data_shape = x->shape;
-  for (size_t i = 0; i < data_shape.size(); ++i) {
-    data_shape.Set(i, cast(DataType::DataType::Int(32), data_shape[i]));
-  }
   Array<PrimExpr> out_shape = data_shape;
 
   bool do_pad = false;
   for (int i = 0; i < k_size; i++) {
     int ii = axis[i];
-    kernel[i] = cast(DataType::Int(32), kernel_size[i]);
-    stride[i] = cast(DataType::Int(32), stride_size[i]);
-    dilation[i] = cast(DataType::Int(32), dilation_size[i]);
-    pad_head[i] = cast(DataType::Int(32), padding_size[i]);
-    pad_tail[i] = cast(DataType::Int(32), padding_size[i + k_size]);
+    kernel[i] = kernel_size[i];
+    stride[i] = stride_size[i];
+    dilation[i] = dilation_size[i];
+    pad_head[i] = padding_size[i];
+    pad_tail[i] = padding_size[i + k_size];
 
     if (ceil_mode) {
       // The offset[i] is an additional padding to ensure we do ceil instead 
of floor when
@@ -650,7 +640,7 @@ inline Tensor pool_impl_nd(const Tensor& x, const 
Array<PrimExpr>& kernel_size,
               // number that represents the number of steps along the dilated 
kernel to reach a
               // non-padded value. Otherwise this should be 0.
               PrimExpr jumps_to_non_pad = (dilation[i] - 1 - start[i]) / 
dilation[i];
-              jumps_to_non_pad = max(jumps_to_non_pad, 
make_const(DataType::Int(32), 0));
+              jumps_to_non_pad = max(jumps_to_non_pad, 
make_const(jumps_to_non_pad.dtype(), 0));
 
               end[i] = min(end[i], data_shape[ii] - 1);
               num_el *= (end[i] - (start[i] + dilation[i] * jumps_to_non_pad)) 
/ dilation[i] + 1;
diff --git a/include/tvm/topi/transform.h b/include/tvm/topi/transform.h
index aa75fb05a0..7fe56d9532 100644
--- a/include/tvm/topi/transform.h
+++ b/include/tvm/topi/transform.h
@@ -323,11 +323,7 @@ inline Tensor reshape(const Tensor& x, Array<PrimExpr> 
newshape, std::string nam
   Array<PrimExpr> target_shape;
 
   for (const auto& ele : newshape) {
-    if (ele.as<IntImmNode>()) {
-      target_shape.push_back(cast(DataType::Int(32), ele));
-    } else {
-      target_shape.push_back(ele);
-    }
+    target_shape.push_back(ele);
   }
 
   // If either the input shape or the target shape contains a zero, return an 
empty tensor.
diff --git a/tests/python/relay/test_op_grad_level2.py 
b/tests/python/relay/test_op_grad_level2.py
index bbd851dc9c..7a40a58ee8 100644
--- a/tests/python/relay/test_op_grad_level2.py
+++ b/tests/python/relay/test_op_grad_level2.py
@@ -154,7 +154,7 @@ def test_avg_pool2d_grad(executor_kind):
         ceil_mode=False,
         count_include_pad=False,
         executor_kind=executor_kind,
-        dtype="int32",
+        dtype="float16",
     )
 
 
diff --git a/tests/python/relay/test_op_level2.py 
b/tests/python/relay/test_op_level2.py
index f7cfc81fb2..b2a8c2cdb0 100644
--- a/tests/python/relay/test_op_level2.py
+++ b/tests/python/relay/test_op_level2.py
@@ -1108,7 +1108,6 @@ def test_pool2d():
         yy = run_infer_type(y)
         assert yy.checked_type == relay.TensorType((n, 10, 224, 224), dtype)
         # test execution
-        dtype = "int32"
         dshape = (1, 3, 28, 28)
         for shape_dtype in ["int32", "int64"]:
             x = relay.var("x", shape=[tvm.tir.IntImm(shape_dtype, x) for x in 
dshape], dtype=dtype)
@@ -1129,8 +1128,8 @@ def test_pool2d():
     _test_pool2d(relay.nn.avg_pool2d, "avg", pool_size=2, strides=2, padding=0)
     _test_pool2d(relay.nn.avg_pool2d, "avg", pool_size=2, strides=2, 
padding=0, dilation=2)
 
-    _test_pool2d_int(relay.nn.avg_pool2d, np.mean, "int32")
-    _test_pool2d_int(relay.nn.avg_pool2d, np.mean, "uint16")
+    _test_pool2d_int(relay.nn.avg_pool2d, np.mean, "int64")
+    _test_pool2d_int(relay.nn.avg_pool2d, np.mean, "float16")
     _test_global_pool2d(relay.nn.global_max_pool2d, np.max)
     _test_global_pool2d(relay.nn.global_avg_pool2d, np.mean)
 
@@ -1201,7 +1200,7 @@ def test_pool1d():
     _test_pool1d(relay.nn.max_pool1d, "max", pool_size=2, strides=2, padding=0)
     _test_pool1d(relay.nn.max_pool1d, "max", pool_size=2, strides=2, 
padding=0, dilation=2)
     _test_pool1d(relay.nn.avg_pool1d, "avg")
-    _test_pool1d(relay.nn.avg_pool1d, "avg", dtype="int32")
+    _test_pool1d(relay.nn.avg_pool1d, "avg", dtype="int64")
     _test_pool1d(relay.nn.avg_pool1d, "avg", pool_size=2, strides=2, padding=0)
     _test_pool1d(relay.nn.avg_pool1d, "avg", pool_size=2, strides=2, 
padding=0, dilation=2)
     _test_global_pool1d(relay.nn.global_max_pool1d, np.max)

[tvm] branch main updated: [TOPI] remove the i32 cast for output shape of pool (#14549)

Reply via email to