From: Kugan Vivekanandarajah <kugan.vivekanandara...@linaro.org> This patch changes cse_insn to process parallel rtx one by one such that any destination rtx in cse list is invalidated before processing the next.
gcc/ChangeLog: 2019-05-16 Kugan Vivekanandarajah <kugan.vivekanandara...@linaro.org> PR target/88834 * cse.c (safe_hash): Handle VEC_DUPLICATE. (exp_equiv_p): Likewise. (hash_rtx_cb): Change to accept const_rtx. (struct set): Add field to record if uses of dest is invalidated. (cse_insn): For parallel rtx, invalidate register set by first rtx before processing the next. gcc/testsuite/ChangeLog: 2019-05-16 Kugan Vivekanandarajah <kugan.vivekanandara...@linaro.org> PR target/88834 * gcc.target/aarch64/pr88834.c: New test. Change-Id: I7c3a61f034128f38abe0c2b7dab5d81dec28146c --- gcc/cse.c | 67 ++++++++++++++++++++++++++---- gcc/testsuite/gcc.target/aarch64/pr88836.c | 14 +++++++ 2 files changed, 73 insertions(+), 8 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/pr88836.c diff --git a/gcc/cse.c b/gcc/cse.c index 6c9cda1..9dc31f5 100644 --- a/gcc/cse.c +++ b/gcc/cse.c @@ -570,7 +570,7 @@ static void invalidate_for_call (void); static rtx use_related_value (rtx, struct table_elt *); static inline unsigned canon_hash (rtx, machine_mode); -static inline unsigned safe_hash (rtx, machine_mode); +static inline unsigned safe_hash (const_rtx, machine_mode); static inline unsigned hash_rtx_string (const char *); static rtx canon_reg (rtx, rtx_insn *); @@ -2369,6 +2369,11 @@ hash_rtx_cb (const_rtx x, machine_mode mode, hash += fixed_hash (CONST_FIXED_VALUE (x)); return hash; + case VEC_DUPLICATE: + return hash_rtx_cb (XEXP (x, 0), VOIDmode, + do_not_record_p, hash_arg_in_memory_p, + have_reg_qty, cb); + case CONST_VECTOR: { int units; @@ -2599,7 +2604,7 @@ canon_hash (rtx x, machine_mode mode) and hash_arg_in_memory are not changed. */ static inline unsigned -safe_hash (rtx x, machine_mode mode) +safe_hash (const_rtx x, machine_mode mode) { int dummy_do_not_record; return hash_rtx (x, mode, &dummy_do_not_record, NULL, true); @@ -2630,6 +2635,16 @@ exp_equiv_p (const_rtx x, const_rtx y, int validate, bool for_gcse) return x == y; code = GET_CODE (x); + if ((code == CONST_VECTOR && GET_CODE (y) == VEC_DUPLICATE) + || (code == VEC_DUPLICATE && GET_CODE (y) == CONST_VECTOR)) + { + if (code == VEC_DUPLICATE) + std::swap (x, y); + if (const_vector_encoded_nelts (x) != 1) + return 0; + return exp_equiv_p (CONST_VECTOR_ENCODED_ELT (x, 0), XEXP (y, 0), + validate, for_gcse); + } if (code != GET_CODE (y)) return 0; @@ -4192,7 +4207,8 @@ struct set char src_in_memory; /* Nonzero if the SET_SRC contains something whose value cannot be predicted and understood. */ - char src_volatile; + char src_volatile : 1; + char invalidate_dest_p : 1; /* Original machine mode, in case it becomes a CONST_INT. The size of this field should match the size of the mode field of struct rtx_def (see rtl.h). */ @@ -4639,7 +4655,7 @@ cse_insn (rtx_insn *insn) for (i = 0; i < n_sets; i++) { bool repeat = false; - bool mem_noop_insn = false; + bool noop_insn = false; rtx src, dest; rtx src_folded; struct table_elt *elt = 0, *p; @@ -4736,6 +4752,7 @@ cse_insn (rtx_insn *insn) sets[i].src = src; sets[i].src_hash = HASH (src, mode); sets[i].src_volatile = do_not_record; + sets[i].invalidate_dest_p = 1; sets[i].src_in_memory = hash_arg_in_memory; /* If SRC is a MEM, there is a REG_EQUIV note for SRC, and DEST is @@ -5365,7 +5382,7 @@ cse_insn (rtx_insn *insn) || insn_nothrow_p (insn))) { SET_SRC (sets[i].rtl) = trial; - mem_noop_insn = true; + noop_insn = true; break; } @@ -5418,6 +5435,19 @@ cse_insn (rtx_insn *insn) src_folded_cost = constant_pool_entries_cost; src_folded_regcost = constant_pool_entries_regcost; } + else if (n_sets == 1 + && REG_P (trial) + && REG_P (SET_DEST (sets[i].rtl)) + && GET_MODE_CLASS (mode) == MODE_CC + && REGNO (trial) == REGNO (SET_DEST (sets[i].rtl)) + && !side_effects_p (dest) + && (cfun->can_delete_dead_exceptions + || insn_nothrow_p (insn))) + { + SET_SRC (sets[i].rtl) = trial; + noop_insn = true; + break; + } } /* If we changed the insn too much, handle this set from scratch. */ @@ -5588,7 +5618,7 @@ cse_insn (rtx_insn *insn) } /* Similarly for no-op MEM moves. */ - else if (mem_noop_insn) + else if (noop_insn) { if (cfun->can_throw_non_call_exceptions && can_throw_internal (insn)) cse_cfg_altered = true; @@ -5760,6 +5790,26 @@ cse_insn (rtx_insn *insn) } elt = insert (src, classp, sets[i].src_hash, mode); elt->in_memory = sets[i].src_in_memory; + + if (REG_P (dest) + && ! reg_mentioned_p (dest, src)) + { + sets[i].invalidate_dest_p = 0; + unsigned int regno = REGNO (dest); + unsigned int endregno = END_REGNO (dest); + unsigned int j; + + for (j = regno; j < endregno; j++) + { + if (REG_IN_TABLE (j) >= 0) + { + remove_invalid_refs (j); + REG_IN_TABLE (j) = -1; + } + } + invalidate (dest, VOIDmode); + } + /* If inline asm has any clobbers, ensure we only reuse existing inline asms and never try to put the ASM_OPERANDS into an insn that isn't inline asm. */ @@ -5853,7 +5903,8 @@ cse_insn (rtx_insn *insn) previous quantity's chain. Needed for memory if this is a nonvarying address, unless we have just done an invalidate_memory that covers even those. */ - if (REG_P (dest) || GET_CODE (dest) == SUBREG) + if ((REG_P (dest) || GET_CODE (dest) == SUBREG) + && sets[i].invalidate_dest_p) invalidate (dest, VOIDmode); else if (MEM_P (dest)) invalidate (dest, VOIDmode); @@ -5887,7 +5938,7 @@ cse_insn (rtx_insn *insn) if (!REG_P (x)) mention_regs (x); - else + else if (sets[i].invalidate_dest_p) { /* We used to rely on all references to a register becoming inaccessible when a register changes to a new quantity, diff --git a/gcc/testsuite/gcc.target/aarch64/pr88836.c b/gcc/testsuite/gcc.target/aarch64/pr88836.c new file mode 100644 index 0000000..442e8a7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr88836.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-S -O3 -march=armv8.2-a+sve" } */ + +void +f (int *restrict x, int *restrict y, int *restrict z, int n) +{ + for (int i = 0; i < n; i += 2) + { + x[i] = y[i] + z[i]; + x[i + 1] = y[i + 1] - z[i + 1]; + } +} + +/* { dg-final { scan-assembler-times {ptest} 0 } } */ -- 2.7.4