Hi, This patch is to extend the existing function find_alignment_op to check all defintions of base_reg are AND operations with mask -16B to force the alignment. If all are satifised, it passes all AND operations and instructions in one vector to recombine_lvx_pattern and recombine_stvx_pattern, they will remove all useless ANDs further.
Bootstrapped/regtested on powerpc64le-linux-gnu P8. Is it OK for trunk? BR, Kewen ----- gcc/ChangeLog: * config/rs6000/rs6000-p8swap.c (insn_rtx_pair_t): New type. (find_alignment_op): Adjust to support multiple defintions which are all AND operations with the mask -16B. (recombine_lvx_pattern): Adjust to handle multiple AND operations from find_alignment_op. (recombine_stvx_pattern): Likewise. gcc/testsuite/ChangeLog: * gcc.target/powerpc/pr97019.c: New test.
diff --git a/gcc/config/rs6000/rs6000-p8swap.c b/gcc/config/rs6000/rs6000-p8swap.c index 3d5dc7d8aae..2de2edeab67 100644 --- a/gcc/config/rs6000/rs6000-p8swap.c +++ b/gcc/config/rs6000/rs6000-p8swap.c @@ -183,6 +183,8 @@ class swap_web_entry : public web_entry_base unsigned int will_delete : 1; }; +typedef std::pair<rtx_insn *, rtx> insn_rtx_pair_t; + enum special_handling_values { SH_NONE = 0, SH_CONST_VECTOR, @@ -2095,15 +2097,19 @@ alignment_mask (rtx_insn *insn) return alignment_with_canonical_addr (SET_SRC (body)); } -/* Given INSN that's a load or store based at BASE_REG, look for a - feeding computation that aligns its address on a 16-byte boundary. - Return the rtx and its containing AND_INSN. */ -static rtx -find_alignment_op (rtx_insn *insn, rtx base_reg, rtx_insn **and_insn) +/* Given INSN that's a load or store based at BASE_REG, check if + all of its feeding computations align its address on a 16-byte + boundary. If so, return true and put all the computations + information with the form of pair {and_operation rtx, and_insn} + into AND_INSN_VEC. Otherwise, return false. */ + +static bool +find_alignment_op (rtx_insn *insn, rtx base_reg, + vec<insn_rtx_pair_t> *and_insn_vec) { df_ref base_use; struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); - rtx and_operation = 0; + rtx and_operation = NULL_RTX; FOR_EACH_INSN_INFO_USE (base_use, insn_info) { @@ -2111,22 +2117,30 @@ find_alignment_op (rtx_insn *insn, rtx base_reg, rtx_insn **and_insn) continue; struct df_link *base_def_link = DF_REF_CHAIN (base_use); - if (!base_def_link || base_def_link->next) - break; + if (!base_def_link) + return false; - /* With stack-protector code enabled, and possibly in other - circumstances, there may not be an associated insn for - the def. */ - if (DF_REF_IS_ARTIFICIAL (base_def_link->ref)) - break; + while (base_def_link) + { + /* With stack-protector code enabled, and possibly in other + circumstances, there may not be an associated insn for + the def. */ + if (DF_REF_IS_ARTIFICIAL (base_def_link->ref)) + return false; - *and_insn = DF_REF_INSN (base_def_link->ref); - and_operation = alignment_mask (*and_insn); - if (and_operation != 0) - break; + rtx_insn *and_insn = DF_REF_INSN (base_def_link->ref); + and_operation = alignment_mask (and_insn); + + /* Stop if we find any one which doesn't align. */ + if (and_operation == NULL_RTX) + return false; + + and_insn_vec->safe_push (std::make_pair (and_insn, and_operation)); + base_def_link = base_def_link->next; + } } - return and_operation; + return and_operation != NULL_RTX; } struct del_info { bool replace; rtx_insn *replace_insn; }; @@ -2143,10 +2157,10 @@ recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete) rtx mem = XEXP (SET_SRC (body), 0); rtx base_reg = XEXP (mem, 0); - rtx_insn *and_insn; - rtx and_operation = find_alignment_op (insn, base_reg, &and_insn); + auto_vec<insn_rtx_pair_t> and_insn_vec; + bool all_and_p = find_alignment_op (insn, base_reg, &and_insn_vec); - if (and_operation != 0) + if (all_and_p) { df_ref def; struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); @@ -2168,25 +2182,34 @@ recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete) to_delete[INSN_UID (swap_insn)].replace = true; to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn; - /* However, first we must be sure that we make the - base register from the AND operation available - in case the register has been overwritten. Copy - the base register to a new pseudo and use that - as the base register of the AND operation in - the new LVX instruction. */ - rtx and_base = XEXP (and_operation, 0); - rtx new_reg = gen_reg_rtx (GET_MODE (and_base)); - rtx copy = gen_rtx_SET (new_reg, and_base); - rtx_insn *new_insn = emit_insn_after (copy, and_insn); - set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn)); - df_insn_rescan (new_insn); - - XEXP (mem, 0) = gen_rtx_AND (GET_MODE (and_base), new_reg, - XEXP (and_operation, 1)); + rtx new_reg = NULL_RTX; + rtx and_op = NULL_RTX; + for (unsigned i = 0; i < and_insn_vec.length (); ++i) + { + /* However, first we must be sure that we make the + base register from the AND operation available + in case the register has been overwritten. Copy + the base register to a new pseudo and use that + as the base register of the AND operation in + the new LVX instruction. */ + insn_rtx_pair_t and_pair = and_insn_vec[i]; + rtx_insn *and_insn = and_pair.first; + and_op = and_pair.second; + rtx and_base = XEXP (and_op, 0); + if (!new_reg) + new_reg = gen_reg_rtx (GET_MODE (and_base)); + rtx copy = gen_rtx_SET (new_reg, and_base); + rtx_insn *new_insn = emit_insn_after (copy, and_insn); + set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn)); + df_insn_rescan (new_insn); + } + + XEXP (mem, 0) = gen_rtx_AND (GET_MODE (new_reg), new_reg, + XEXP (and_op, 1)); SET_SRC (body) = mem; INSN_CODE (insn) = -1; /* Force re-recognition. */ df_insn_rescan (insn); - + if (dump_file) fprintf (dump_file, "lvx opportunity found at %d\n", INSN_UID (insn)); @@ -2205,10 +2228,10 @@ recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete) rtx mem = SET_DEST (body); rtx base_reg = XEXP (mem, 0); - rtx_insn *and_insn; - rtx and_operation = find_alignment_op (insn, base_reg, &and_insn); + auto_vec<insn_rtx_pair_t> and_insn_vec; + bool all_and_p = find_alignment_op (insn, base_reg, &and_insn_vec); - if (and_operation != 0) + if (all_and_p) { rtx src_reg = XEXP (SET_SRC (body), 0); df_ref src_use; @@ -2234,25 +2257,34 @@ recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete) to_delete[INSN_UID (swap_insn)].replace = true; to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn; - /* However, first we must be sure that we make the - base register from the AND operation available - in case the register has been overwritten. Copy - the base register to a new pseudo and use that - as the base register of the AND operation in - the new STVX instruction. */ - rtx and_base = XEXP (and_operation, 0); - rtx new_reg = gen_reg_rtx (GET_MODE (and_base)); - rtx copy = gen_rtx_SET (new_reg, and_base); - rtx_insn *new_insn = emit_insn_after (copy, and_insn); - set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn)); - df_insn_rescan (new_insn); - - XEXP (mem, 0) = gen_rtx_AND (GET_MODE (and_base), new_reg, - XEXP (and_operation, 1)); + rtx new_reg = NULL_RTX; + rtx and_op = NULL_RTX; + for (unsigned i = 0; i < and_insn_vec.length (); ++i) + { + /* However, first we must be sure that we make the + base register from the AND operation available + in case the register has been overwritten. Copy + the base register to a new pseudo and use that + as the base register of the AND operation in + the new STVX instruction. */ + insn_rtx_pair_t and_pair = and_insn_vec[i]; + rtx_insn *and_insn = and_pair.first; + and_op = and_pair.second; + rtx and_base = XEXP (and_op, 0); + if (!new_reg) + new_reg = gen_reg_rtx (GET_MODE (and_base)); + rtx copy = gen_rtx_SET (new_reg, and_base); + rtx_insn *new_insn = emit_insn_after (copy, and_insn); + set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn)); + df_insn_rescan (new_insn); + } + + XEXP (mem, 0) = gen_rtx_AND (GET_MODE (new_reg), new_reg, + XEXP (and_op, 1)); SET_SRC (body) = src_reg; INSN_CODE (insn) = -1; /* Force re-recognition. */ df_insn_rescan (insn); - + if (dump_file) fprintf (dump_file, "stvx opportunity found at %d\n", INSN_UID (insn)); diff --git a/gcc/testsuite/gcc.target/powerpc/pr97019.c b/gcc/testsuite/gcc.target/powerpc/pr97019.c new file mode 100644 index 00000000000..d6bb08ad260 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr97019.c @@ -0,0 +1,79 @@ +/* { dg-do compile { target { powerpc_p8vector_ok && le } } } */ +/* { dg-options "-O2 -mdejagnu-cpu=power8" } */ + +/* Test there are no useless instructions "rldicr x,y,0,59" + to align the addresses for lvx/stvx. */ + +extern int a, b, c; +extern vector unsigned long long ev5, ev6, ev7, ev8; +extern int dummy (vector unsigned long long); + +int test_vec_ld(unsigned char *pe) { + + vector unsigned long long v1, v2, v3, v4, v9; + vector unsigned long long v5 = ev5; + vector unsigned long long v6 = ev6; + vector unsigned long long v7 = ev7; + vector unsigned long long v8 = ev8; + + unsigned char *e = pe; + + do { + if (a) { + v1 = __builtin_vec_ld(16, (unsigned long long *)e); + v2 = __builtin_vec_ld(32, (unsigned long long *)e); + v3 = __builtin_vec_ld(48, (unsigned long long *)e); + e = e + 8; + for (int i = 0; i < a; i++) { + v4 = v5; + v5 = __builtin_crypto_vpmsumd(v1, v6); + v6 = __builtin_crypto_vpmsumd(v2, v7); + v7 = __builtin_crypto_vpmsumd(v3, v8); + e = e + 8; + } + } + v5 = __builtin_vec_ld(16, (unsigned long long *)e); + v6 = __builtin_vec_ld(32, (unsigned long long *)e); + v7 = __builtin_vec_ld(48, (unsigned long long *)e); + if (c) + b = 1; + } while (b); + + return dummy(v4); +} + +int test_vec_st(unsigned char *pe) { + + vector unsigned long long v1, v2, v3, v4; + vector unsigned long long v5 = ev5; + vector unsigned long long v6 = ev6; + vector unsigned long long v7 = ev7; + vector unsigned long long v8 = ev8; + + unsigned char *e = pe; + + do { + if (a) { + __builtin_vec_st(v1, 16, (unsigned long long *)e); + __builtin_vec_st(v2, 32, (unsigned long long *)e); + __builtin_vec_st(v3, 48, (unsigned long long *)e); + e = e + 8; + for (int i = 0; i < a; i++) { + v4 = v5; + v5 = __builtin_crypto_vpmsumd(v1, v6); + v6 = __builtin_crypto_vpmsumd(v2, v7); + v7 = __builtin_crypto_vpmsumd(v3, v8); + e = e + 8; + } + } + __builtin_vec_st(v5, 16, (unsigned long long *)e); + __builtin_vec_st(v6, 32, (unsigned long long *)e); + __builtin_vec_st(v7, 48, (unsigned long long *)e); + if (c) + b = 1; + } while (b); + + return dummy(v4); +} + +/* { dg-final { scan-assembler-not "rldicr\[ \t\]+\[0-9\]+,\[0-9\]+,0,59" } } */