Thanks for the review, both.

On 26/08/2020 09:19, Vladimir Makarov wrote:
> 
> On 2020-08-26 5:06 a.m., Richard Sandiford wrote:
> > Alex Coplan <alex.cop...@arm.com> writes:
> > 
> > Minor nit, should be formatted as:
> > 
> > static rtx
> > canonicalize_reload_addr (rtx addr)
> Sorry for missing this.  Alex, it should be fixed anyway.
> > 
> > I don't think we should we restrict this to (plus (mult X Y) Z),
> > since addresses can be more complicated than that.  One way to
> > search for all MULTs is:
> > 
> >    subrtx_iterator::array_type array;
> >    FOR_EACH_SUBRTX (iter, array, x, NONCONST)
> >      {
> >        rtx x = *iter;
> >        if (GET_CODE (x) == MULT && CONST_INT_P (XEXP (x, 1)))
> >          ...
> >      }
> > 
> > (Needs rtl-iter.h)
> 
> I am agree it would be nice to process a general case.  Alex, you can do
> this as a separate patch if you want.
> 
> Richard, thank you for looking at this patch too.
> 
> 

Please find a reworked version of the patch attached incorporating
Richard's feedback.

Testing:
 * Bootstrap and regtest on aarch64-none-linux-gnu,
   arm-none-linux-gnueabihf, and x86_64-pc-linux-gnu: no regressions.

Is the updated patch OK for master?

Thanks,
Alex
diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c
index 421c453997b..580da9c3ed6 100644
--- a/gcc/lra-constraints.c
+++ b/gcc/lra-constraints.c
@@ -131,6 +131,7 @@
 #include "lra-int.h"
 #include "print-rtl.h"
 #include "function-abi.h"
+#include "rtl-iter.h"
 
 /* Value of LRA_CURR_RELOAD_NUM at the beginning of BB of the current
    insn.  Remember that LRA_CURR_RELOAD_NUM is the number of emitted
@@ -570,6 +571,33 @@ init_curr_insn_input_reloads (void)
   curr_insn_input_reloads_num = 0;
 }
 
+/* The canonical form of an rtx inside a MEM is not necessarily the same as the
+   canonical form of the rtx outside the MEM.  Fix this up in the case that
+   we're reloading an address (and therefore pulling it outside a MEM).  */
+static rtx
+canonicalize_reload_addr (rtx addr)
+{
+  subrtx_var_iterator::array_type array;
+  FOR_EACH_SUBRTX_VAR (iter, array, addr, NONCONST)
+    {
+      rtx x = *iter;
+      if (GET_CODE (x) == MULT && CONST_INT_P (XEXP (x, 1)))
+       {
+         const HOST_WIDE_INT ci = INTVAL (XEXP (x, 1));
+         const int pwr2 = exact_log2 (ci);
+         if (pwr2 > 0)
+           {
+             /* Rewrite this to use a shift instead, which is canonical when
+                outside of a MEM.  */
+             PUT_CODE (x, ASHIFT);
+             XEXP (x, 1) = GEN_INT (pwr2);
+           }
+       }
+    }
+
+  return addr;
+}
+
 /* Create a new pseudo using MODE, RCLASS, ORIGINAL or reuse already
    created input reload pseudo (only if TYPE is not OP_OUT).  Don't
    reuse pseudo if IN_SUBREG_P is true and the reused pseudo should be
@@ -4362,12 +4390,19 @@ curr_insn_transform (bool check_only_p)
            {
              rtx addr = *loc;
              enum rtx_code code = GET_CODE (addr);
-             
+             bool align_p = false;
+
              if (code == AND && CONST_INT_P (XEXP (addr, 1)))
-               /* (and ... (const_int -X)) is used to align to X bytes.  */
-               addr = XEXP (*loc, 0);
+               {
+                 /* (and ... (const_int -X)) is used to align to X bytes.  */
+                 align_p = true;
+                 addr = XEXP (*loc, 0);
+               }
+             else
+               addr = canonicalize_reload_addr (addr);
+
              lra_emit_move (new_reg, addr);
-             if (addr != *loc)
+             if (align_p)
                emit_move_insn (new_reg, gen_rtx_AND (GET_MODE (new_reg), 
new_reg, XEXP (*loc, 1)));
            }
          before = get_insns ();
diff --git a/gcc/testsuite/gcc.target/aarch64/mem-shift-canonical.c 
b/gcc/testsuite/gcc.target/aarch64/mem-shift-canonical.c
new file mode 100644
index 00000000000..36beed497a0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/mem-shift-canonical.c
@@ -0,0 +1,27 @@
+/* This test is a copy of gcc.dg/torture/pr34330.c: here we are looking for
+   specific patterns being matched in the AArch64 backend.  */
+
+/* { dg-do compile } */
+/* { dg-options "-Os -ftree-vectorize -dp" } */
+
+
+struct T
+{
+  int t;
+  struct { short s1, s2, s3, s4; } *s;
+};
+
+void
+foo (int *a, int *b, int *c, int *d, struct T *e)
+{
+  int i;
+  for (i = 0; i < e->t; i++)
+    {
+      e->s[i].s1 = a[i];
+      e->s[i].s2 = b[i];
+      e->s[i].s3 = c[i];
+      e->s[i].s4 = d[i];
+    }
+}
+
+/* { dg-final { scan-assembler-times "add_lsl_di" 3 } } */

Reply via email to