This is an automated email from the ASF dual-hosted git repository.

guberti pushed a commit to branch acc-tests-1
in repository https://gitbox.apache.org/repos/asf/tvm.git

commit ae2976d829ea50a3a985cafb4f9994336eb05f55
Author: Gavin Uberti <[email protected]>
AuthorDate: Sat Jan 21 22:29:48 2023 -0800

    Saturation
---
 vww/modified.c | 124 +++++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 117 insertions(+), 7 deletions(-)

diff --git a/vww/modified.c b/vww/modified.c
index b35a22dd4e..ad6272ac97 100644
--- a/vww/modified.c
+++ b/vww/modified.c
@@ -30932,6 +30932,120 @@ TVM_DLL int32_t 
tvmgen_default_fused_nn_conv2d_add_cast_multiply_add_right_shift
   return 0;
 }
 
+static const float __attribute__((section(".rodata.tvm"), aligned(16))) 
scale_3360883[8] = {
+    0x1.4e5b46p-124, 0x1.a1c562p-123, 0x1.52203ap-125, 0x1.2221a4p-121, 
0x1.adb328p-123, 0x1.bd606ep-110, 0x1.58c1ccp-121, 0x1.afdb16p-122
+};
+
+static const int32_t __attribute__((section(".rodata.tvm"), aligned(16))) 
bias_11590737[8] = {
+    +0x000034d6, +0x000021dd, +0x00004223, +0x00001f3f, +0x0000240d, 
-0x0000098b, -0x000004fc, -0x000020ce
+};
+
+static const int16_t __attribute__((section(".rodata.tvm"), aligned(16))) 
kernel_21585151[72] = {
+    -0x004a, -0x0005, -0x004d, +0x006c, +0x007f, +0x0028, +0x002a, +0x002c,
+    -0x005b, -0x0071, +0x0006, -0x0007, +0x004e, +0x002c, -0x001b, +0x007f,
+    +0x0042, -0x0068, -0x0047, -0x001e, -0x000b, +0x0010, +0x004b, -0x0019,
+    +0x007f, +0x0046, -0x0001, -0x001b, -0x0012, -0x0003, +0x0011, -0x0001,
+    -0x0003, -0x0001, +0x007f, -0x002e, -0x0028, -0x0028, +0x0019, +0x0034,
+    +0x0021, -0x003e, +0x0032, +0x007f, -0x0048, -0x0039, +0x0039, +0x0031,
+    +0x0017, -0x007f, +0x0018, +0x0011, +0x0032, -0x0052, +0x0051, +0x0001,
+    +0x002f, -0x0067, -0x007f, +0x000d, +0x0022, -0x0048, +0x004b, +0x0029,
+    +0x0031, -0x001a, -0x000e, -0x007f, +0x0026, -0x0049, -0x003f, +0x003c
+};
+
+__attribute__((always_inline)) static inline int32_t 
tensordot_opt_x2_int16_w48_3x3_000_1_8(
+    int32_t *output, int32_t *tensor, int32_t *kernel, int32_t *bias, int32_t 
*scale
+) {
+  int32_t sum_0 = *bias, sum_1 = *bias;
+
+  int32_t tensor__y00_x00__y00_x01 = tensor[0];
+  int32_t tensor__y00_x02__y00_x03 = tensor[1];
+  int32_t tensor__y01_x00__y01_x01 = tensor[24];
+  int32_t tensor__y01_x02__y01_x03 = tensor[25];
+  int32_t tensor__y02_x00__y02_x01 = tensor[48];
+  int32_t tensor__y02_x02__y02_x03 = tensor[49];
+
+  int32_t kernel__y00_x00__y00_x01 = kernel[0];
+  int32_t kernel__y00_x02__y01_x00 = kernel[1];
+  int32_t kernel__y01_x01__y01_x02 = kernel[2];
+  int32_t kernel__y02_x00__y02_x01 = kernel[3];
+  int32_t kernel__y02_x02__unknown = kernel[4];
+
+  sum_0 = __smlad(tensor__y00_x00__y00_x01, kernel__y00_x00__y00_x01, sum_0);
+  sum_0 = __smlabb(tensor__y00_x02__y00_x03, kernel__y00_x02__y01_x00, sum_0);
+  sum_0 = __smlabt(tensor__y01_x00__y01_x01, kernel__y00_x02__y01_x00, sum_0);
+  sum_0 = __smlatb(tensor__y01_x00__y01_x01, kernel__y01_x01__y01_x02, sum_0);
+  sum_0 = __smlabt(tensor__y01_x02__y01_x03, kernel__y01_x01__y01_x02, sum_0);
+  sum_0 = __smlad(tensor__y02_x00__y02_x01, kernel__y02_x00__y02_x01, sum_0);
+  sum_0 = __smlabb(tensor__y02_x02__y02_x03, kernel__y02_x02__unknown, sum_0);
+  sum_1 = __smlatb(tensor__y00_x00__y00_x01, kernel__y00_x00__y00_x01, sum_1);
+  sum_1 = __smlabt(tensor__y00_x02__y00_x03, kernel__y00_x00__y00_x01, sum_1);
+  sum_1 = __smlatb(tensor__y00_x02__y00_x03, kernel__y00_x02__y01_x00, sum_1);
+  sum_1 = __smlatt(tensor__y01_x00__y01_x01, kernel__y00_x02__y01_x00, sum_1);
+  sum_1 = __smlad(tensor__y01_x02__y01_x03, kernel__y01_x01__y01_x02, sum_1);
+  sum_1 = __smlatb(tensor__y02_x00__y02_x01, kernel__y02_x00__y02_x01, sum_1);
+  sum_1 = __smlabt(tensor__y02_x02__y02_x03, kernel__y02_x00__y02_x01, sum_1);
+  sum_1 = __smlatb(tensor__y02_x02__y02_x03, kernel__y02_x02__unknown, sum_1);
+
+  int32_t scale_val = *scale;
+  int32_t requant_0 = (sum_0 * (int64_t) scale_val) >> 32;
+  requant_0 = (requant_0 + 1) >> 1;
+  requant_0 = __ssat(requant_0 + -128, 8);
+  int32_t requant_1 = (sum_1 * (int64_t) scale_val) >> 32;
+  requant_1 = (requant_1 + 1) >> 1;
+  requant_1 = __ssat(requant_1 + -128, 8);
+
+  ((int16_t*) output)[0] = (int16_t) requant_0;
+  ((int16_t*) output)[8] = (int16_t) requant_1;
+  return 0;
+}
+
+__attribute__((always_inline)) static inline int32_t 
tensordot_opt_x2_int16_w48_3x3_010_1_8(
+    int32_t *output, int32_t *tensor, int32_t *kernel, int32_t *bias, int32_t 
*scale
+) {
+  int32_t sum_0 = *bias, sum_1 = *bias;
+
+  int32_t tensor__y00_x00__y00_x01 = tensor[0];
+  int32_t tensor__y00_x02__y00_x03 = tensor[1];
+  int32_t tensor__y01_x00__y01_x01 = tensor[24];
+  int32_t tensor__y01_x02__y01_x03 = tensor[25];
+  int32_t tensor__y02_x00__y02_x01 = tensor[48];
+  int32_t tensor__y02_x02__y02_x03 = tensor[49];
+
+  int32_t kernel__unknown__y00_x00 = kernel[0];
+  int32_t kernel__y00_x01__y00_x02 = kernel[1];
+  int32_t kernel__y01_x00__y01_x01 = kernel[2];
+  int32_t kernel__y01_x02__y02_x00 = kernel[3];
+  int32_t kernel__y02_x01__y02_x02 = kernel[4];
+
+  sum_0 = __smlabt(tensor__y00_x00__y00_x01, kernel__unknown__y00_x00, sum_0);
+  sum_0 = __smlatb(tensor__y00_x00__y00_x01, kernel__y00_x01__y00_x02, sum_0);
+  sum_0 = __smlabt(tensor__y00_x02__y00_x03, kernel__y00_x01__y00_x02, sum_0);
+  sum_0 = __smlad(tensor__y01_x00__y01_x01, kernel__y01_x00__y01_x01, sum_0);
+  sum_0 = __smlabb(tensor__y01_x02__y01_x03, kernel__y01_x02__y02_x00, sum_0);
+  sum_0 = __smlabt(tensor__y02_x00__y02_x01, kernel__y01_x02__y02_x00, sum_0);
+  sum_0 = __smlatb(tensor__y02_x00__y02_x01, kernel__y02_x01__y02_x02, sum_0);
+  sum_0 = __smlabt(tensor__y02_x02__y02_x03, kernel__y02_x01__y02_x02, sum_0);
+  sum_1 = __smlatt(tensor__y00_x00__y00_x01, kernel__unknown__y00_x00, sum_1);
+  sum_1 = __smlad(tensor__y00_x02__y00_x03, kernel__y00_x01__y00_x02, sum_1);
+  sum_1 = __smlatb(tensor__y01_x00__y01_x01, kernel__y01_x00__y01_x01, sum_1);
+  sum_1 = __smlabt(tensor__y01_x02__y01_x03, kernel__y01_x00__y01_x01, sum_1);
+  sum_1 = __smlatb(tensor__y01_x02__y01_x03, kernel__y01_x02__y02_x00, sum_1);
+  sum_1 = __smlatt(tensor__y02_x00__y02_x01, kernel__y01_x02__y02_x00, sum_1);
+  sum_1 = __smlad(tensor__y02_x02__y02_x03, kernel__y02_x01__y02_x02, sum_1);
+
+  int32_t scale_val = *scale;
+  int32_t requant_0 = (sum_0 * (int64_t) scale_val) >> 32;
+  requant_0 = (requant_0 + 1) >> 1;
+  requant_0 = __ssat(requant_0 + -128, 8);
+  int32_t requant_1 = (sum_1 * (int64_t) scale_val) >> 32;
+  requant_1 = (requant_1 + 1) >> 1;
+  requant_1 = __ssat(requant_1 + -128, 8);
+
+  ((int16_t*) output)[0] = (int16_t) requant_0;
+  ((int16_t*) output)[8] = (int16_t) requant_1;
+  return 0;
+}
+
 #ifdef __cplusplus
 extern "C"
 #endif
@@ -30991,13 +31105,9 @@ TVM_DLL int32_t 
tvmgen_default_fused_nn_conv2d_add_cast_multiply_add_right_shift
     for (int32_t ax2_1 = 0; ax2_1 < 48; ++ax2_1) {
       for (int32_t ax3_1 = 0; ax3_1 < 8; ++ax3_1) {
         int32_t cse_var_5 = (((ax1_1 * 384) + (ax2_1 * 8)) + ax3_1);
-        int32_t __1 = 
((int32_t)((((((int64_t)((int32_t*)depthwise_conv2d)[cse_var_5]) + 
((int64_t)((int32_t*)fused_nn_conv2d_constant_6)[ax3_1])) * 
((int64_t*)fused_nn_conv2d_add_cast_constant_7)[ax3_1]) + 
((int64_t*)fused_nn_conv2d_add_cast_multiply_constant_8)[ax3_1]) >> 
((int64_t*)fused_nn_conv2d_add_cast_multiply_add_constant_9)[ax3_1])) - 128;
-        int32_t __2 = (__1) < (127) ? (__1) : (127);
-        int8_t __3 = (int8_t)((__2) > (-128) ? (__2) : (-128));
-        int8_t __4 = (int8_t)127;
-        int8_t __5 = (__3) < (__4) ? (__3) : (__4);
-        int8_t __6 = (int8_t)-128;
-        ((int16_t*)T_subtract)[cse_var_5] = (((int16_t)((__5) > (__6) ? (__5) 
: (__6))) - (int16_t)-128);
+        int32_t __1 = 
((int32_t)((((((int64_t)((int32_t*)depthwise_conv2d)[cse_var_5]) + 
((int64_t)((int32_t*)fused_nn_conv2d_constant_6)[ax3_1])) * 
((int64_t*)fused_nn_conv2d_add_cast_constant_7)[ax3_1]) + 
((int64_t*)fused_nn_conv2d_add_cast_multiply_constant_8)[ax3_1]) >> 
((int64_t*)fused_nn_conv2d_add_cast_multiply_add_constant_9)[ax3_1]));
+        int32_t requant_0 = __ssat(__1 - 128, 8);
+        ((int16_t*)T_subtract)[cse_var_5] = (((int16_t) requant_0) - 
(int16_t)-128);
       }
     }
   }

Reply via email to