https://gcc.gnu.org/g:95e4252f53bc0e5b66a200c611fd2c9f6f7f2a62

commit r11-11466-g95e4252f53bc0e5b66a200c611fd2c9f6f7f2a62
Author: Richard Sandiford <richard.sandif...@arm.com>
Date:   Tue Jun 4 13:47:35 2024 +0100

    vect: Tighten vect_determine_precisions_from_range [PR113281]
    
    This was another PR caused by the way that
    vect_determine_precisions_from_range handles shifts.  We tried to
    narrow 32768 >> x to a 16-bit shift based on range information for
    the inputs and outputs, with vect_recog_over_widening_pattern
    (after PR110828) adjusting the shift amount.  But this doesn't
    work for the case where x is in [16, 31], since then 32-bit
    32768 >> x is a well-defined zero, whereas no well-defined
    16-bit 32768 >> y will produce 0.
    
    We could perhaps generate x < 16 ? 32768 >> x : 0 instead,
    but since vect_determine_precisions_from_range was never really
    supposed to rely on fix-ups, it seems better to fix that instead.
    
    The patch also makes the code more selective about which codes
    can be narrowed based on input and output ranges.  This showed
    that vect_truncatable_operation_p was missing cases for
    BIT_NOT_EXPR (equivalent to BIT_XOR_EXPR of -1) and NEGATE_EXPR
    (equivalent to BIT_NOT_EXPR followed by a PLUS_EXPR of 1).
    
    pr113281-1.c is the original testcase.  pr113281-[23].c failed
    before the patch due to overly optimistic narrowing.  pr113281-[45].c
    previously passed and are meant to protect against accidental
    optimisation regressions.
    
    gcc/
            PR target/113281
            * tree-vect-patterns.c (vect_recog_over_widening_pattern): Remove
            workaround for right shifts.
            (vect_truncatable_operation_p): Handle NEGATE_EXPR and BIT_NOT_EXPR.
            (vect_determine_precisions_from_range): Be more selective about
            which codes can be narrowed based on their input and output ranges.
            For shifts, require at least one more bit of precision than the
            maximum shift amount.
    
    gcc/testsuite/
            PR target/113281
            * gcc.dg/vect/pr113281-1.c: New test.
            * gcc.dg/vect/pr113281-2.c: Likewise.
            * gcc.dg/vect/pr113281-3.c: Likewise.
            * gcc.dg/vect/pr113281-4.c: Likewise.
            * gcc.dg/vect/pr113281-5.c: Likewise.
    
    (cherry picked from commit 1a8261e047f7a2c2b0afb95716f7615cba718cd1)

Diff:
---
 gcc/testsuite/gcc.dg/vect/pr113281-1.c |  17 ++++++
 gcc/testsuite/gcc.dg/vect/pr113281-2.c |  50 +++++++++++++++
 gcc/testsuite/gcc.dg/vect/pr113281-3.c |  39 ++++++++++++
 gcc/testsuite/gcc.dg/vect/pr113281-4.c |  55 +++++++++++++++++
 gcc/testsuite/gcc.dg/vect/pr113281-5.c |  66 ++++++++++++++++++++
 gcc/tree-vect-patterns.c               | 107 ++++++++++++++++++++++++---------
 6 files changed, 305 insertions(+), 29 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/pr113281-1.c 
b/gcc/testsuite/gcc.dg/vect/pr113281-1.c
new file mode 100644
index 00000000000..6df4231cb5f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr113281-1.c
@@ -0,0 +1,17 @@
+#include "tree-vect.h"
+
+unsigned char a;
+
+int main() {
+  check_vect ();
+
+  short b = a = 0;
+  for (; a != 19; a++)
+    if (a)
+      b = 32872 >> a;
+
+  if (b == 0)
+    return 0;
+  else
+    return 1;
+}
diff --git a/gcc/testsuite/gcc.dg/vect/pr113281-2.c 
b/gcc/testsuite/gcc.dg/vect/pr113281-2.c
new file mode 100644
index 00000000000..3a1170c28b6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr113281-2.c
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+
+#define N 128
+
+short x[N];
+short y[N];
+
+void
+f1 (void)
+{
+  for (int i = 0; i < N; ++i)
+    x[i] >>= y[i];
+}
+
+void
+f2 (void)
+{
+  for (int i = 0; i < N; ++i)
+    x[i] >>= (y[i] < 32 ? y[i] : 32);
+}
+
+void
+f3 (void)
+{
+  for (int i = 0; i < N; ++i)
+    x[i] >>= (y[i] < 31 ? y[i] : 31);
+}
+
+void
+f4 (void)
+{
+  for (int i = 0; i < N; ++i)
+    x[i] >>= (y[i] & 31);
+}
+
+void
+f5 (void)
+{
+  for (int i = 0; i < N; ++i)
+    x[i] >>= 0x8000 >> y[i];
+}
+
+void
+f6 (void)
+{
+  for (int i = 0; i < N; ++i)
+    x[i] >>= 0x8000 >> (y[i] & 31);
+}
+
+/* { dg-final { scan-tree-dump-not {can narrow[^\n]+>>} "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr113281-3.c 
b/gcc/testsuite/gcc.dg/vect/pr113281-3.c
new file mode 100644
index 00000000000..5982dd2d16f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr113281-3.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+
+#define N 128
+
+short x[N];
+short y[N];
+
+void
+f1 (void)
+{
+  for (int i = 0; i < N; ++i)
+    x[i] >>= (y[i] < 30 ? y[i] : 30);
+}
+
+void
+f2 (void)
+{
+  for (int i = 0; i < N; ++i)
+    x[i] >>= ((y[i] & 15) + 2);
+}
+
+void
+f3 (void)
+{
+  for (int i = 0; i < N; ++i)
+    x[i] >>= (y[i] < 16 ? y[i] : 16);
+}
+
+void
+f4 (void)
+{
+  for (int i = 0; i < N; ++i)
+    x[i] = 32768 >> ((y[i] & 15) + 3);
+}
+
+/* { dg-final { scan-tree-dump {can narrow to signed:31 without loss [^\n]+>>} 
"vect" } } */
+/* { dg-final { scan-tree-dump {can narrow to signed:18 without loss [^\n]+>>} 
"vect" } } */
+/* { dg-final { scan-tree-dump {can narrow to signed:17 without loss [^\n]+>>} 
"vect" } } */
+/* { dg-final { scan-tree-dump {can narrow to unsigned:19 without loss 
[^\n]+>>} "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr113281-4.c 
b/gcc/testsuite/gcc.dg/vect/pr113281-4.c
new file mode 100644
index 00000000000..10fbc0e8405
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr113281-4.c
@@ -0,0 +1,55 @@
+/* { dg-do compile } */
+
+#define N 128
+
+short x[N];
+short y[N];
+
+void
+f1 (void)
+{
+  for (int i = 0; i < N; ++i)
+    x[i] >>= (y[i] & 15);
+}
+
+void
+f2 (void)
+{
+  for (int i = 0; i < N; ++i)
+    x[i] >>= ((y[i] & 7) + 8);
+}
+
+void
+f3 (void)
+{
+  for (int i = 0; i < N; ++i)
+    x[i] >>= ((y[i] & 7) ^ 11);
+}
+
+void
+f4 (void)
+{
+  for (int i = 0; i < N; ++i)
+    x[i] >>= (y[i] < 15 ? y[i] : 15);
+}
+
+void
+f5 (void)
+{
+  for (int i = 0; i < N; ++i)
+    x[i] >>= (y[i] < 15 ? y[i] : 1);
+}
+
+void
+f6 (void)
+{
+  for (int i = 0; i < N; ++i)
+    x[i] = 32768 >> (y[i] & 15);
+}
+
+/* { dg-final { scan-tree-dump {:11:[^\n]+can narrow to signed:16 without loss 
[^\n]+>>} "vect" } } */
+/* { dg-final { scan-tree-dump {:18:[^\n]+can narrow to signed:16 without loss 
[^\n]+>>} "vect" } } */
+/* { dg-final { scan-tree-dump {:25:[^\n]+can narrow to signed:16 without loss 
[^\n]+>>} "vect" } } */
+/* { dg-final { scan-tree-dump {:32:[^\n]+can narrow to signed:16 without loss 
[^\n]+>>} "vect" } } */
+/* { dg-final { scan-tree-dump {:39:[^\n]+can narrow to signed:16 without loss 
[^\n]+>>} "vect" } } */
+/* { dg-final { scan-tree-dump {can narrow to unsigned:16 without loss 
[^\n]+>>} "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr113281-5.c 
b/gcc/testsuite/gcc.dg/vect/pr113281-5.c
new file mode 100644
index 00000000000..4a4571792e2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr113281-5.c
@@ -0,0 +1,66 @@
+/* { dg-do compile } */
+
+#define N 128
+
+short x[N];
+short y[N];
+
+void
+f1 (void)
+{
+  for (int i = 0; i < N; ++i)
+    {
+      int a = y[i];
+      int b = ~a;
+      x[i] = b;
+    }
+}
+
+void
+f2 (void)
+{
+  for (int i = 0; i < N; ++i)
+    {
+      int a = y[i];
+      int b = -a;
+      x[i] = b;
+    }
+}
+
+void
+f3 (void)
+{
+  for (int i = 0; i < N; ++i)
+    {
+      int a = x[i];
+      int b = a / y[i];
+      x[i] = b;
+    }
+}
+
+void
+f4 (void)
+{
+  for (int i = 0; i < N; ++i)
+    {
+      int a = x[i];
+      int b = a < y[i] ? a : y[i];
+      x[i] = b;
+    }
+}
+
+void
+f5 (void)
+{
+  for (int i = 0; i < N; ++i)
+    {
+      int a = x[i];
+      int b = a > y[i] ? a : y[i];
+      x[i] = b;
+    }
+}
+
+/* { dg-final { scan-tree-dump {can narrow to signed:17 without loss [^\n]+= 
-} "vect" } } */
+/* { dg-final { scan-tree-dump {can narrow to signed:16 without loss [^\n]+= 
~} "vect" } } */
+/* { dg-final { scan-tree-dump {can narrow to signed:16 without loss [^\n]+ 
MIN_EXPR} "vect" } } */
+/* { dg-final { scan-tree-dump {can narrow to signed:16 without loss [^\n]+ 
MAX_EXPR} "vect" } } */
diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
index 01bcea9dca2..14e5ba54ae7 100644
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -4745,9 +4745,11 @@ vect_truncatable_operation_p (tree_code code)
 {
   switch (code)
     {
+    case NEGATE_EXPR:
     case PLUS_EXPR:
     case MINUS_EXPR:
     case MULT_EXPR:
+    case BIT_NOT_EXPR:
     case BIT_AND_EXPR:
     case BIT_IOR_EXPR:
     case BIT_XOR_EXPR:
@@ -4906,38 +4908,85 @@ vect_determine_precisions_from_range (stmt_vec_info 
stmt_info, gassign *stmt)
   unsigned int nops = gimple_num_ops (stmt);
 
   if (!vect_truncatable_operation_p (code))
-    /* Check that all relevant input operands are compatible, and update
-       [MIN_VALUE, MAX_VALUE] to include their ranges.  */
-    for (unsigned int i = 1; i < nops; ++i)
-      {
-       tree op = gimple_op (stmt, i);
-       if (TREE_CODE (op) == INTEGER_CST)
-         {
-           /* Don't require the integer to have RHS_TYPE (which it might
-              not for things like shift amounts, etc.), but do require it
-              to fit the type.  */
-           if (!int_fits_type_p (op, type))
-             return;
-
-           min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
-           max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
-         }
-       else if (TREE_CODE (op) == SSA_NAME)
-         {
-           /* Ignore codes that don't take uniform arguments.  */
-           if (!types_compatible_p (TREE_TYPE (op), type))
-             return;
+    {
+      /* Handle operations that can be computed in type T if all inputs
+        and outputs can be represented in type T.  Also handle left and
+        right shifts, where (in addition) the maximum shift amount must
+        be less than the number of bits in T.  */
+      bool is_shift;
+      switch (code)
+       {
+       case LSHIFT_EXPR:
+       case RSHIFT_EXPR:
+         is_shift = true;
+         break;
 
-           wide_int op_min_value, op_max_value;
-           if (!vect_get_range_info (op, &op_min_value, &op_max_value))
-             return;
+       case ABS_EXPR:
+       case MIN_EXPR:
+       case MAX_EXPR:
+       case TRUNC_DIV_EXPR:
+       case CEIL_DIV_EXPR:
+       case FLOOR_DIV_EXPR:
+       case ROUND_DIV_EXPR:
+       case EXACT_DIV_EXPR:
+         /* Modulus is excluded because it is typically calculated by doing
+            a division, for which minimum signed / -1 isn't representable in
+            the original signed type.  We could take the division range into
+            account instead, if handling modulus ever becomes important.  */
+         is_shift = false;
+         break;
 
-           min_value = wi::min (min_value, op_min_value, sign);
-           max_value = wi::max (max_value, op_max_value, sign);
-         }
-       else
+       default:
          return;
-      }
+       }
+      for (unsigned int i = 1; i < nops; ++i)
+       {
+         tree op = gimple_op (stmt, i);
+         wide_int op_min_value, op_max_value;
+         if (TREE_CODE (op) == INTEGER_CST)
+           {
+             unsigned int op_precision = TYPE_PRECISION (TREE_TYPE (op));
+             op_min_value = op_max_value = wi::to_wide (op, op_precision);
+           }
+         else if (TREE_CODE (op) == SSA_NAME)
+           {
+             if (!vect_get_range_info (op, &op_min_value, &op_max_value))
+               return;
+           }
+         else
+           return;
+
+         if (is_shift && i == 2)
+           {
+             /* There needs to be one more bit than the maximum shift amount.
+
+                If the maximum shift amount is already 1 less than PRECISION
+                then we can't narrow the shift further.  Dealing with that
+                case first ensures that we can safely use an unsigned range
+                below.
+
+                op_min_value isn't relevant, since shifts by negative amounts
+                are UB.  */
+             if (wi::geu_p (op_max_value, precision - 1))
+               return;
+             unsigned int min_bits = op_max_value.to_uhwi () + 1;
+
+             /* As explained below, we can convert a signed shift into an
+                unsigned shift if the sign bit is always clear.  At this
+                point we've already processed the ranges of the output and
+                the first input.  */
+             auto op_sign = sign;
+             if (sign == SIGNED && !wi::neg_p (min_value))
+               op_sign = UNSIGNED;
+             op_min_value = wide_int::from (wi::min_value (min_bits, op_sign),
+                                            precision, op_sign);
+             op_max_value = wide_int::from (wi::max_value (min_bits, op_sign),
+                                            precision, op_sign);
+           }
+         min_value = wi::min (min_value, op_min_value, sign);
+         max_value = wi::max (max_value, op_max_value, sign);
+       }
+    }
 
   /* Try to switch signed types for unsigned types if we can.
      This is better for two reasons.  First, unsigned ops tend

Reply via email to