From: Kugan Vivekanandarajah <kugan.vivekanandara...@linaro.org>

This patch changes cse_insn to process parallel rtx one by one such that
any destination rtx in cse list is invalidated before processing the
next.

gcc/ChangeLog:

2019-05-16  Kugan Vivekanandarajah  <kugan.vivekanandara...@linaro.org>

        PR target/88834
        * cse.c (safe_hash): Handle VEC_DUPLICATE.
        (exp_equiv_p): Likewise.
        (hash_rtx_cb): Change to accept const_rtx.
        (struct set): Add field to record if uses of dest is invalidated.
        (cse_insn): For parallel rtx, invalidate register set by first rtx
        before processing the next.

gcc/testsuite/ChangeLog:

2019-05-16  Kugan Vivekanandarajah  <kugan.vivekanandara...@linaro.org>

        PR target/88834
        * gcc.target/aarch64/pr88834.c: New test.

Change-Id: I7c3a61f034128f38abe0c2b7dab5d81dec28146c
---
 gcc/cse.c                                  | 67 ++++++++++++++++++++++++++----
 gcc/testsuite/gcc.target/aarch64/pr88836.c | 14 +++++++
 2 files changed, 73 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr88836.c

diff --git a/gcc/cse.c b/gcc/cse.c
index 6c9cda1..9dc31f5 100644
--- a/gcc/cse.c
+++ b/gcc/cse.c
@@ -570,7 +570,7 @@ static void invalidate_for_call (void);
 static rtx use_related_value (rtx, struct table_elt *);
 
 static inline unsigned canon_hash (rtx, machine_mode);
-static inline unsigned safe_hash (rtx, machine_mode);
+static inline unsigned safe_hash (const_rtx, machine_mode);
 static inline unsigned hash_rtx_string (const char *);
 
 static rtx canon_reg (rtx, rtx_insn *);
@@ -2369,6 +2369,11 @@ hash_rtx_cb (const_rtx x, machine_mode mode,
       hash += fixed_hash (CONST_FIXED_VALUE (x));
       return hash;
 
+    case VEC_DUPLICATE:
+      return hash_rtx_cb (XEXP (x, 0), VOIDmode,
+                         do_not_record_p, hash_arg_in_memory_p,
+                         have_reg_qty, cb);
+
     case CONST_VECTOR:
       {
        int units;
@@ -2599,7 +2604,7 @@ canon_hash (rtx x, machine_mode mode)
    and hash_arg_in_memory are not changed.  */
 
 static inline unsigned
-safe_hash (rtx x, machine_mode mode)
+safe_hash (const_rtx x, machine_mode mode)
 {
   int dummy_do_not_record;
   return hash_rtx (x, mode, &dummy_do_not_record, NULL, true);
@@ -2630,6 +2635,16 @@ exp_equiv_p (const_rtx x, const_rtx y, int validate, 
bool for_gcse)
     return x == y;
 
   code = GET_CODE (x);
+  if ((code == CONST_VECTOR && GET_CODE (y) == VEC_DUPLICATE)
+       || (code == VEC_DUPLICATE && GET_CODE (y) == CONST_VECTOR))
+    {
+      if (code == VEC_DUPLICATE)
+       std::swap (x, y);
+      if (const_vector_encoded_nelts (x) != 1)
+       return 0;
+      return exp_equiv_p (CONST_VECTOR_ENCODED_ELT (x, 0), XEXP (y, 0),
+                         validate, for_gcse);
+    }
   if (code != GET_CODE (y))
     return 0;
 
@@ -4192,7 +4207,8 @@ struct set
   char src_in_memory;
   /* Nonzero if the SET_SRC contains something
      whose value cannot be predicted and understood.  */
-  char src_volatile;
+  char src_volatile : 1;
+  char invalidate_dest_p : 1;
   /* Original machine mode, in case it becomes a CONST_INT.
      The size of this field should match the size of the mode
      field of struct rtx_def (see rtl.h).  */
@@ -4639,7 +4655,7 @@ cse_insn (rtx_insn *insn)
   for (i = 0; i < n_sets; i++)
     {
       bool repeat = false;
-      bool mem_noop_insn = false;
+      bool noop_insn = false;
       rtx src, dest;
       rtx src_folded;
       struct table_elt *elt = 0, *p;
@@ -4736,6 +4752,7 @@ cse_insn (rtx_insn *insn)
       sets[i].src = src;
       sets[i].src_hash = HASH (src, mode);
       sets[i].src_volatile = do_not_record;
+      sets[i].invalidate_dest_p = 1;
       sets[i].src_in_memory = hash_arg_in_memory;
 
       /* If SRC is a MEM, there is a REG_EQUIV note for SRC, and DEST is
@@ -5365,7 +5382,7 @@ cse_insn (rtx_insn *insn)
                       || insn_nothrow_p (insn)))
            {
              SET_SRC (sets[i].rtl) = trial;
-             mem_noop_insn = true;
+             noop_insn = true;
              break;
            }
 
@@ -5418,6 +5435,19 @@ cse_insn (rtx_insn *insn)
              src_folded_cost = constant_pool_entries_cost;
              src_folded_regcost = constant_pool_entries_regcost;
            }
+         else if (n_sets == 1
+                  && REG_P (trial)
+                  && REG_P (SET_DEST (sets[i].rtl))
+                  && GET_MODE_CLASS (mode) == MODE_CC
+                  && REGNO (trial) == REGNO (SET_DEST (sets[i].rtl))
+                  && !side_effects_p (dest)
+                  && (cfun->can_delete_dead_exceptions
+                      || insn_nothrow_p (insn)))
+           {
+             SET_SRC (sets[i].rtl) = trial;
+             noop_insn = true;
+             break;
+           }
        }
 
       /* If we changed the insn too much, handle this set from scratch.  */
@@ -5588,7 +5618,7 @@ cse_insn (rtx_insn *insn)
        }
 
       /* Similarly for no-op MEM moves.  */
-      else if (mem_noop_insn)
+      else if (noop_insn)
        {
          if (cfun->can_throw_non_call_exceptions && can_throw_internal (insn))
            cse_cfg_altered = true;
@@ -5760,6 +5790,26 @@ cse_insn (rtx_insn *insn)
                  }
                elt = insert (src, classp, sets[i].src_hash, mode);
                elt->in_memory = sets[i].src_in_memory;
+
+               if (REG_P (dest)
+                   && ! reg_mentioned_p (dest, src))
+                   {
+                     sets[i].invalidate_dest_p = 0;
+                     unsigned int regno = REGNO (dest);
+                     unsigned int endregno = END_REGNO (dest);
+                     unsigned int j;
+
+                     for (j = regno; j < endregno; j++)
+                       {
+                         if (REG_IN_TABLE (j) >= 0)
+                           {
+                             remove_invalid_refs (j);
+                             REG_IN_TABLE (j) = -1;
+                           }
+                       }
+                     invalidate (dest, VOIDmode);
+                   }
+
                /* If inline asm has any clobbers, ensure we only reuse
                   existing inline asms and never try to put the ASM_OPERANDS
                   into an insn that isn't inline asm.  */
@@ -5853,7 +5903,8 @@ cse_insn (rtx_insn *insn)
           previous quantity's chain.
           Needed for memory if this is a nonvarying address, unless
           we have just done an invalidate_memory that covers even those.  */
-       if (REG_P (dest) || GET_CODE (dest) == SUBREG)
+       if ((REG_P (dest) || GET_CODE (dest) == SUBREG)
+           && sets[i].invalidate_dest_p)
          invalidate (dest, VOIDmode);
        else if (MEM_P (dest))
          invalidate (dest, VOIDmode);
@@ -5887,7 +5938,7 @@ cse_insn (rtx_insn *insn)
 
          if (!REG_P (x))
            mention_regs (x);
-         else
+         else if (sets[i].invalidate_dest_p)
            {
              /* We used to rely on all references to a register becoming
                 inaccessible when a register changes to a new quantity,
diff --git a/gcc/testsuite/gcc.target/aarch64/pr88836.c 
b/gcc/testsuite/gcc.target/aarch64/pr88836.c
new file mode 100644
index 0000000..442e8a7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr88836.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-S -O3 -march=armv8.2-a+sve" } */
+
+void
+f (int *restrict x, int *restrict y, int *restrict z, int n)
+{
+  for (int i = 0; i < n; i += 2)
+    {
+      x[i] = y[i] + z[i];
+      x[i + 1] = y[i + 1] - z[i + 1];
+    }
+}
+
+/* { dg-final { scan-assembler-times {ptest} 0 } } */
-- 
2.7.4

Reply via email to