Hi!

The following patch adjusts simplify_rotate to recognize more rotates,
basically we allow even some same precision integral -> integral
conversions, with the requirement that the RSHIFT_EXPR operand has to be
done in unsigned type (i.e. logical right shift), so that we compensate for
the combiner no longer being able to simplify those into rotates on some
targets.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2020-03-31  Jakub Jelinek  <ja...@redhat.com>

        PR rtl-optimization/94344
        * tree-ssa-forwprop.c (simplify_rotate): Handle also same precision
        conversions, either on both operands of |^+ or just one.  Handle
        also extra same precision conversion on RSHIFT_EXPR first operand
        provided RSHIFT_EXPR is performed in unsigned type.

        * gcc.dg/pr94344.c: New test.

--- gcc/tree-ssa-forwprop.c.jj  2020-03-23 19:43:00.309774530 +0100
+++ gcc/tree-ssa-forwprop.c     2020-03-30 14:50:16.303668479 +0200
@@ -1562,14 +1562,14 @@ simplify_rotate (gimple_stmt_iterator *g
   for (i = 0; i < 2; i++)
     defcodefor_name (arg[i], &def_code[i], &def_arg1[i], &def_arg2[i]);
 
-  /* Look through narrowing conversions.  */
+  /* Look through narrowing (or same precision) conversions.  */
   if (CONVERT_EXPR_CODE_P (def_code[0])
       && CONVERT_EXPR_CODE_P (def_code[1])
       && INTEGRAL_TYPE_P (TREE_TYPE (def_arg1[0]))
       && INTEGRAL_TYPE_P (TREE_TYPE (def_arg1[1]))
       && TYPE_PRECISION (TREE_TYPE (def_arg1[0]))
         == TYPE_PRECISION (TREE_TYPE (def_arg1[1]))
-      && TYPE_PRECISION (TREE_TYPE (def_arg1[0])) > TYPE_PRECISION (rtype)
+      && TYPE_PRECISION (TREE_TYPE (def_arg1[0])) >= TYPE_PRECISION (rtype)
       && has_single_use (arg[0])
       && has_single_use (arg[1]))
     {
@@ -1579,6 +1579,21 @@ simplify_rotate (gimple_stmt_iterator *g
          defcodefor_name (arg[i], &def_code[i], &def_arg1[i], &def_arg2[i]);
        }
     }
+  else
+    {
+      /* Handle signed rotate; the RSHIFT_EXPR has to be done
+        in unsigned type but LSHIFT_EXPR could be signed.  */
+      i = (def_code[0] == LSHIFT_EXPR || def_code[0] == RSHIFT_EXPR);
+      if (CONVERT_EXPR_CODE_P (def_code[i])
+         && (def_code[1 - i] == LSHIFT_EXPR || def_code[1 - i] == RSHIFT_EXPR)
+         && INTEGRAL_TYPE_P (TREE_TYPE (def_arg1[i]))
+         && TYPE_PRECISION (rtype) == TYPE_PRECISION (TREE_TYPE (def_arg1[i]))
+         && has_single_use (arg[i]))
+       {
+         arg[i] = def_arg1[i];
+         defcodefor_name (arg[i], &def_code[i], &def_arg1[i], &def_arg2[i]);
+       }
+    }
 
   /* One operand has to be LSHIFT_EXPR and one RSHIFT_EXPR.  */
   for (i = 0; i < 2; i++)
@@ -1608,8 +1623,33 @@ simplify_rotate (gimple_stmt_iterator *g
   if (!operand_equal_for_phi_arg_p (def_arg1[0], def_arg1[1])
       || !types_compatible_p (TREE_TYPE (def_arg1[0]),
                              TREE_TYPE (def_arg1[1])))
-    return false;
-  if (!TYPE_UNSIGNED (TREE_TYPE (def_arg1[0])))
+    {
+      if ((TYPE_PRECISION (TREE_TYPE (def_arg1[0]))
+          != TYPE_PRECISION (TREE_TYPE (def_arg1[1])))
+         || (TYPE_UNSIGNED (TREE_TYPE (def_arg1[0]))
+             == TYPE_UNSIGNED (TREE_TYPE (def_arg1[1]))))
+       return false;
+
+      /* Handle signed rotate; the RSHIFT_EXPR has to be done
+        in unsigned type but LSHIFT_EXPR could be signed.  */
+      i = def_code[0] != RSHIFT_EXPR;
+      if (!TYPE_UNSIGNED (TREE_TYPE (def_arg1[i])))
+       return false;
+
+      tree tem;
+      enum tree_code code;
+      defcodefor_name (def_arg1[i], &code, &tem, NULL);
+      if (!CONVERT_EXPR_CODE_P (code)
+         || !INTEGRAL_TYPE_P (TREE_TYPE (tem))
+         || TYPE_PRECISION (TREE_TYPE (tem)) != TYPE_PRECISION (rtype))
+       return false;
+      def_arg1[i] = tem;
+      if (!operand_equal_for_phi_arg_p (def_arg1[0], def_arg1[1])
+         || !types_compatible_p (TREE_TYPE (def_arg1[0]),
+                                 TREE_TYPE (def_arg1[1])))
+       return false;
+    }
+  else if (!TYPE_UNSIGNED (TREE_TYPE (def_arg1[0])))
     return false;
 
   /* CNT1 + CNT2 == B case above.  */
--- gcc/testsuite/gcc.dg/pr94344.c.jj   2020-03-30 14:49:53.005016600 +0200
+++ gcc/testsuite/gcc.dg/pr94344.c      2020-03-30 14:47:41.495981569 +0200
@@ -0,0 +1,53 @@
+/* PR rtl-optimization/94344 */
+/* { dg-do compile { target { ilp32 || lp64 } } } */
+/* { dg-options "-O2 -fdump-tree-forwprop1" } */
+/* { dg-final { scan-tree-dump-times " r>> 27;" 4 "forwprop1" } } */
+/* { dg-final { scan-tree-dump-times " r>> 59;" 4 "forwprop1" } } */
+
+int
+f1 (int x)
+{
+  return (x << 5) | (int)((unsigned int)x >> 27);
+}
+
+unsigned int
+f2 (int x)
+{
+  return (x << 5) | ((unsigned int)x >> 27);
+}
+
+long long int
+f3 (long long int x)
+{
+  return (x << 5) | (long long int)((unsigned long long int)x >> 59);
+}
+
+unsigned long long int
+f4 (long long int x)
+{
+  return (x << 5) | ((unsigned long long int)x >> 59);
+}
+
+int
+f5 (int x)
+{
+  return (int)((unsigned int)x >> 27) | (x << 5);
+}
+
+unsigned int
+f6 (int x)
+{
+  return ((unsigned int)x >> 27) | (x << 5);
+}
+
+long long int
+f7 (long long int x)
+{
+  return (long long int)((unsigned long long int)x >> 59) | (x << 5);
+}
+
+unsigned long long int
+f8 (long long int x)
+{
+  return ((unsigned long long int)x >> 59) | (x << 5);
+}

        Jakub

Reply via email to