________________________________________
From: Jeff Law <l...@redhat.com>
Sent: 11 February 2016 23:26
To: Bin.Cheng
Cc: Bin Cheng; gcc-patches@gcc.gnu.org; nd
Subject: Re: [PATCH PR69052]Check if loop inv can be propagated into mem ref 
with additional addr expr canonicalization

>> On 02/11/2016 10:59 AM, Bin.Cheng wrote:

>> Hi Jeff,
>> Thanks for detailed review.  I also think a generic canonical
>> interface for RTL is much better.  I will give it a try.  But with
>> high chance it's a next stage1 stuff.
> That is, of course, fine.  However, if you do get something ready, I'd
> support using it within LICM for gcc-6, then using it in other places
> for gcc-7.
Hi,
This is the updated version patch.  It fixes the problem by introducing a 
generic address canonicalization interface.  This new interface canonicalizes 
address expression in following steps:
     1) Rewrite ASHIFT into MULT recursively.
     2) Divide address into sub expressions with PLUS as the separator.
     3) Sort sub expressions according to precedence defined for communative 
operations.
     4) Simplify CONST_INT_P sub expressions.
     5) Create new canonicalized address and return.

According to review comments, this interface is now restricted in LCIM, and 
will probably be expanded to other passes like fwprop and combine after 
entering GCC7.
Bootstrap and test on x86_64 and AArch64.  Is it OK?

Thanks,
bin

2016-02-15  Bin Cheng  <bin.ch...@arm.com>

        PR tree-optimization/69052
        * loop-invariant.c (canonicalize_address_mult): New function.
        (MAX_CANON_ADDR_PARTS): New macro.
        (collect_address_parts): New function.
        (compare_address_parts, canonicalize_address): New functions.
        (inv_can_prop_to_addr_use): Check validity of address expression
        which is canonicalized by above canonicalize_address.

gcc/testsuite/ChangeLog
2016-02-15  Bin Cheng  <bin.ch...@arm.com>

        PR tree-optimization/69052
        * gcc.target/i386/pr69052.c: New test.

> 
> Jeff

diff --git a/gcc/loop-invariant.c b/gcc/loop-invariant.c
index 707f044..af528d0 100644
--- a/gcc/loop-invariant.c
+++ b/gcc/loop-invariant.c
@@ -754,6 +754,147 @@ create_new_invariant (struct def *def, rtx_insn *insn, 
bitmap depends_on,
   return inv;
 }
 
+/* Return a canonical version of X for the address, from the point of view,
+   that all multiplications are represented as MULT instead of the multiply
+   by a power of 2 being represented as ASHIFT.
+
+   This function is a duplication of canonicalize_address from fwprop.c.
+   Callers should prepare a copy of X because this function may modify it
+   in place.  */
+
+static void
+canonicalize_address_mult (rtx x)
+{
+  for (;;)
+    switch (GET_CODE (x))
+      {
+      case ASHIFT:
+       if (CONST_INT_P (XEXP (x, 1))
+           && INTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (GET_MODE (x))
+           && INTVAL (XEXP (x, 1)) >= 0)
+         {
+           HOST_WIDE_INT shift = INTVAL (XEXP (x, 1));
+           PUT_CODE (x, MULT);
+           XEXP (x, 1) = gen_int_mode ((HOST_WIDE_INT) 1 << shift,
+                                       GET_MODE (x));
+         }
+
+       x = XEXP (x, 0);
+       break;
+
+      case PLUS:
+       if (GET_CODE (XEXP (x, 0)) == PLUS
+           || GET_CODE (XEXP (x, 0)) == ASHIFT
+           || GET_CODE (XEXP (x, 0)) == CONST)
+         canonicalize_address_mult (XEXP (x, 0));
+
+       x = XEXP (x, 1);
+       break;
+
+      case CONST:
+       x = XEXP (x, 0);
+       break;
+
+      default:
+       return;
+      }
+}
+
+/* Maximum number of sub expressions in address.  We set it to
+   a small integer since it's unlikely to have a complicated
+   address expression.  */
+
+#define MAX_CANON_ADDR_PARTS (5)
+
+/* Collect sub expressions in address X with PLUS as the seperator.
+   Sub expressions are stored in vector ADDR_PARTS.  */
+
+static void
+collect_address_parts (rtx x, vec<rtx> *addr_parts)
+{
+  for (;;)
+    if (GET_CODE (x) == PLUS)
+      {
+       collect_address_parts (XEXP (x, 0), addr_parts);
+       x = XEXP (x, 1);
+      }
+    else
+      {
+       addr_parts->safe_push (x);
+       break;
+      }
+}
+
+/* Compare function for sorting sub expressions X and Y based on
+   precedence defined for communitive operations.  */
+
+static int
+compare_address_parts (const void *x, const void *y)
+{
+  const rtx *rx = (const rtx *)x;
+  const rtx *ry = (const rtx *)y;
+  int px = commutative_operand_precedence (*rx);
+  int py = commutative_operand_precedence (*ry);
+
+  return (py - px);
+}
+
+/* Return a canonical version address for X by following steps:
+     1) Rewrite ASHIFT into MULT recursively.
+     2) Divide address into sub expressions with PLUS as the
+       separator.
+     3) Sort sub expressions according to precedence defined
+       for communative operations.
+     4) Simplify CONST_INT_P sub expressions.
+     5) Create new canonicalized address and return.
+   Callers should prepare a copy of X because this function may
+   modify it in place.  */
+
+static rtx
+canonicalize_address (rtx x)
+{
+  rtx res;
+  unsigned int i, j;
+  machine_mode mode = GET_MODE (x);
+  auto_vec<rtx, MAX_CANON_ADDR_PARTS> addr_parts;
+
+  /* Rewrite ASHIFT into MULT.  */
+  canonicalize_address_mult (x);
+  /* Divide address into sub expressions.  */
+  collect_address_parts (x, &addr_parts);
+  /* Unlikely to have very complicated address.  */
+  if (addr_parts.length () < 2
+      || addr_parts.length () > MAX_CANON_ADDR_PARTS)
+    return x;
+
+  /* Sort sub expressions according to canonicalization precedence.  */
+  addr_parts.qsort (compare_address_parts);
+
+  /* Simplify all constant int summary if possible.  */
+  for (i = 0; i < addr_parts.length (); i++)
+    if (CONST_INT_P (addr_parts[i]))
+      break;
+
+  for (j = i + 1; j < addr_parts.length (); j++)
+    {
+      gcc_assert (CONST_INT_P (addr_parts[j]));
+      addr_parts[i] = simplify_gen_binary (PLUS, mode,
+                                          addr_parts[i],
+                                          addr_parts[j]);
+    }
+
+  /* Chain PLUS operators to the left for !CONST_INT_P sub expressions.  */
+  res = addr_parts[0];
+  for (j = 1; j < i; j++)
+    res = simplify_gen_binary (PLUS, mode, res, addr_parts[j]);
+
+  /* Pickup the last CONST_INT_P sub expression.  */
+  if (i < addr_parts.length ())
+    res = simplify_gen_binary (PLUS, mode, res, addr_parts[i]);
+
+  return res;
+}
+
 /* Given invariant DEF and its address USE, check if the corresponding
    invariant expr can be propagated into the use or not.  */
 
@@ -761,7 +902,7 @@ static bool
 inv_can_prop_to_addr_use (struct def *def, df_ref use)
 {
   struct invariant *inv;
-  rtx *pos = DF_REF_REAL_LOC (use), def_set;
+  rtx *pos = DF_REF_REAL_LOC (use), def_set, use_set;
   rtx_insn *use_insn = DF_REF_INSN (use);
   rtx_insn *def_insn;
   bool ok;
@@ -778,6 +919,29 @@ inv_can_prop_to_addr_use (struct def *def, df_ref use)
 
   validate_unshare_change (use_insn, pos, SET_SRC (def_set), true);
   ok = verify_changes (0);
+  /* Try harder with canonicalization in address expression.  */
+  if (!ok && (use_set = single_set (use_insn)) != NULL_RTX)
+    {
+      rtx src, dest, mem = NULL_RTX;
+
+      src = SET_SRC (use_set);
+      dest = SET_DEST (use_set);
+      if (MEM_P (src))
+       mem = src;
+      else if (MEM_P (dest))
+       mem = dest;
+
+      if (mem != NULL_RTX
+         && !memory_address_addr_space_p (GET_MODE (mem),
+                                          XEXP (mem, 0),
+                                          MEM_ADDR_SPACE (mem)))
+       {
+         rtx addr = canonicalize_address (copy_rtx (XEXP (mem, 0)));
+         if (memory_address_addr_space_p (GET_MODE (mem),
+                                          addr, MEM_ADDR_SPACE (mem)))
+           ok = true;
+       }
+    }
   cancel_changes (0);
   return ok;
 }
diff --git a/gcc/testsuite/gcc.target/i386/pr69052.c 
b/gcc/testsuite/gcc.target/i386/pr69052.c
new file mode 100644
index 0000000..6f491e9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr69052.c
@@ -0,0 +1,54 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target pie } */
+/* { dg-options "-O2 -fPIE -pie" } */
+
+int look_nbits[256], loop_sym[256];
+const int ind[] = {
+  0,  1,  8, 16,  9,  2,  3, 10, 17, 24, 32, 25, 18, 11,  4,  5,
+ 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13,  6,  7, 14, 21, 28,
+ 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
+ 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63
+};
+int out[256];
+extern void bar (int *, int *);
+void foo (int *l1, int *l2, int *v, int *v1, int *m1, int i)
+{
+  int L = i + 1, b = 20;
+  int result, k;
+
+  for (k = 1; k < 64; k++)
+    {
+      int look = (((L >> (b - 8))) & ((1 << 8) - 1));
+      int nb = l1[look];
+      int code;
+      int r;
+
+      if (nb)
+       {
+         b -= nb;
+         result = l2[look];
+       }
+      else
+       {
+         nb = 9;
+         code = (((L >> (b -= nb))) & ((1 << nb) - 1));
+         result = v[(code + v1[nb])];
+       }
+      r = result >> 4;
+      result &= 15;
+      if (result)
+       {
+         k += r;
+         r = (((L >> (b -= result))) & ((1 << result) - 1));
+         if (r < (1 << (result - 1)))
+           result = r + (((-1) << result) + 1);
+         else
+           result = r;
+
+         out[ind[k]] = result;
+       }
+      bar (&L, &b);
+    }
+}
+
+/* { dg-final { scan-assembler-not "leal\[ \t\]ind@GOTOFF\\(%\[^,\]*\\), %" { 
target ia32 } } } */

Reply via email to