Hi,

This patch is to extend the existing function find_alignment_op to
check all defintions of base_reg are AND operations with mask -16B
to force the alignment.  If all are satifised, it passes all AND
operations and instructions in one vector to recombine_lvx_pattern
and recombine_stvx_pattern, they will remove all useless ANDs further.

Bootstrapped/regtested on powerpc64le-linux-gnu P8.

Is it OK for trunk?

BR,
Kewen
-----
gcc/ChangeLog:

        * config/rs6000/rs6000-p8swap.c (insn_rtx_pair_t): New type.
        (find_alignment_op): Adjust to support multiple defintions which
        are all AND operations with the mask -16B.
        (recombine_lvx_pattern): Adjust to handle multiple AND operations
        from find_alignment_op.
        (recombine_stvx_pattern): Likewise.

gcc/testsuite/ChangeLog:

        * gcc.target/powerpc/pr97019.c: New test.
diff --git a/gcc/config/rs6000/rs6000-p8swap.c 
b/gcc/config/rs6000/rs6000-p8swap.c
index 3d5dc7d8aae..2de2edeab67 100644
--- a/gcc/config/rs6000/rs6000-p8swap.c
+++ b/gcc/config/rs6000/rs6000-p8swap.c
@@ -183,6 +183,8 @@ class swap_web_entry : public web_entry_base
   unsigned int will_delete : 1;
 };
 
+typedef std::pair<rtx_insn *, rtx> insn_rtx_pair_t;
+
 enum special_handling_values {
   SH_NONE = 0,
   SH_CONST_VECTOR,
@@ -2095,15 +2097,19 @@ alignment_mask (rtx_insn *insn)
   return alignment_with_canonical_addr (SET_SRC (body));
 }
 
-/* Given INSN that's a load or store based at BASE_REG, look for a
-   feeding computation that aligns its address on a 16-byte boundary.
-   Return the rtx and its containing AND_INSN.  */
-static rtx
-find_alignment_op (rtx_insn *insn, rtx base_reg, rtx_insn **and_insn)
+/* Given INSN that's a load or store based at BASE_REG, check if
+   all of its feeding computations align its address on a 16-byte
+   boundary.  If so, return true and put all the computations
+   information with the form of pair {and_operation rtx, and_insn}
+   into AND_INSN_VEC.  Otherwise, return false.  */
+
+static bool
+find_alignment_op (rtx_insn *insn, rtx base_reg,
+                  vec<insn_rtx_pair_t> *and_insn_vec)
 {
   df_ref base_use;
   struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
-  rtx and_operation = 0;
+  rtx and_operation = NULL_RTX;
 
   FOR_EACH_INSN_INFO_USE (base_use, insn_info)
     {
@@ -2111,22 +2117,30 @@ find_alignment_op (rtx_insn *insn, rtx base_reg, 
rtx_insn **and_insn)
        continue;
 
       struct df_link *base_def_link = DF_REF_CHAIN (base_use);
-      if (!base_def_link || base_def_link->next)
-       break;
+      if (!base_def_link)
+       return false;
 
-      /* With stack-protector code enabled, and possibly in other
-        circumstances, there may not be an associated insn for 
-        the def.  */
-      if (DF_REF_IS_ARTIFICIAL (base_def_link->ref))
-       break;
+      while (base_def_link)
+       {
+         /* With stack-protector code enabled, and possibly in other
+            circumstances, there may not be an associated insn for
+            the def.  */
+         if (DF_REF_IS_ARTIFICIAL (base_def_link->ref))
+           return false;
 
-      *and_insn = DF_REF_INSN (base_def_link->ref);
-      and_operation = alignment_mask (*and_insn);
-      if (and_operation != 0)
-       break;
+         rtx_insn *and_insn = DF_REF_INSN (base_def_link->ref);
+         and_operation = alignment_mask (and_insn);
+
+         /* Stop if we find any one which doesn't align.  */
+         if (and_operation == NULL_RTX)
+           return false;
+
+         and_insn_vec->safe_push (std::make_pair (and_insn, and_operation));
+         base_def_link = base_def_link->next;
+       }
     }
 
-  return and_operation;
+  return and_operation != NULL_RTX;
 }
 
 struct del_info { bool replace; rtx_insn *replace_insn; };
@@ -2143,10 +2157,10 @@ recombine_lvx_pattern (rtx_insn *insn, del_info 
*to_delete)
   rtx mem = XEXP (SET_SRC (body), 0);
   rtx base_reg = XEXP (mem, 0);
 
-  rtx_insn *and_insn;
-  rtx and_operation = find_alignment_op (insn, base_reg, &and_insn);
+  auto_vec<insn_rtx_pair_t> and_insn_vec;
+  bool all_and_p = find_alignment_op (insn, base_reg, &and_insn_vec);
 
-  if (and_operation != 0)
+  if (all_and_p)
     {
       df_ref def;
       struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
@@ -2168,25 +2182,34 @@ recombine_lvx_pattern (rtx_insn *insn, del_info 
*to_delete)
          to_delete[INSN_UID (swap_insn)].replace = true;
          to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
 
-         /* However, first we must be sure that we make the
-            base register from the AND operation available
-            in case the register has been overwritten.  Copy
-            the base register to a new pseudo and use that
-            as the base register of the AND operation in
-            the new LVX instruction.  */
-         rtx and_base = XEXP (and_operation, 0);
-         rtx new_reg = gen_reg_rtx (GET_MODE (and_base));
-         rtx copy = gen_rtx_SET (new_reg, and_base);
-         rtx_insn *new_insn = emit_insn_after (copy, and_insn);
-         set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn));
-         df_insn_rescan (new_insn);
-
-         XEXP (mem, 0) = gen_rtx_AND (GET_MODE (and_base), new_reg,
-                                      XEXP (and_operation, 1));
+         rtx new_reg = NULL_RTX;
+         rtx and_op = NULL_RTX;
+         for (unsigned i = 0; i < and_insn_vec.length (); ++i)
+           {
+             /* However, first we must be sure that we make the
+                base register from the AND operation available
+                in case the register has been overwritten.  Copy
+                the base register to a new pseudo and use that
+                as the base register of the AND operation in
+                the new LVX instruction.  */
+             insn_rtx_pair_t and_pair = and_insn_vec[i];
+             rtx_insn *and_insn = and_pair.first;
+             and_op = and_pair.second;
+             rtx and_base = XEXP (and_op, 0);
+             if (!new_reg)
+               new_reg = gen_reg_rtx (GET_MODE (and_base));
+             rtx copy = gen_rtx_SET (new_reg, and_base);
+             rtx_insn *new_insn = emit_insn_after (copy, and_insn);
+             set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn));
+             df_insn_rescan (new_insn);
+           }
+
+         XEXP (mem, 0) = gen_rtx_AND (GET_MODE (new_reg), new_reg,
+                                      XEXP (and_op, 1));
          SET_SRC (body) = mem;
          INSN_CODE (insn) = -1; /* Force re-recognition.  */
          df_insn_rescan (insn);
-                 
+
          if (dump_file)
            fprintf (dump_file, "lvx opportunity found at %d\n",
                     INSN_UID (insn));
@@ -2205,10 +2228,10 @@ recombine_stvx_pattern (rtx_insn *insn, del_info 
*to_delete)
   rtx mem = SET_DEST (body);
   rtx base_reg = XEXP (mem, 0);
 
-  rtx_insn *and_insn;
-  rtx and_operation = find_alignment_op (insn, base_reg, &and_insn);
+  auto_vec<insn_rtx_pair_t> and_insn_vec;
+  bool all_and_p = find_alignment_op (insn, base_reg, &and_insn_vec);
 
-  if (and_operation != 0)
+  if (all_and_p)
     {
       rtx src_reg = XEXP (SET_SRC (body), 0);
       df_ref src_use;
@@ -2234,25 +2257,34 @@ recombine_stvx_pattern (rtx_insn *insn, del_info 
*to_delete)
          to_delete[INSN_UID (swap_insn)].replace = true;
          to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn;
 
-         /* However, first we must be sure that we make the
-            base register from the AND operation available
-            in case the register has been overwritten.  Copy
-            the base register to a new pseudo and use that
-            as the base register of the AND operation in
-            the new STVX instruction.  */
-         rtx and_base = XEXP (and_operation, 0);
-         rtx new_reg = gen_reg_rtx (GET_MODE (and_base));
-         rtx copy = gen_rtx_SET (new_reg, and_base);
-         rtx_insn *new_insn = emit_insn_after (copy, and_insn);
-         set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn));
-         df_insn_rescan (new_insn);
-
-         XEXP (mem, 0) = gen_rtx_AND (GET_MODE (and_base), new_reg,
-                                      XEXP (and_operation, 1));
+         rtx new_reg = NULL_RTX;
+         rtx and_op = NULL_RTX;
+         for (unsigned i = 0; i < and_insn_vec.length (); ++i)
+           {
+             /* However, first we must be sure that we make the
+                base register from the AND operation available
+                in case the register has been overwritten.  Copy
+                the base register to a new pseudo and use that
+                as the base register of the AND operation in
+                the new STVX instruction.  */
+             insn_rtx_pair_t and_pair = and_insn_vec[i];
+             rtx_insn *and_insn = and_pair.first;
+             and_op = and_pair.second;
+             rtx and_base = XEXP (and_op, 0);
+             if (!new_reg)
+               new_reg = gen_reg_rtx (GET_MODE (and_base));
+             rtx copy = gen_rtx_SET (new_reg, and_base);
+             rtx_insn *new_insn = emit_insn_after (copy, and_insn);
+             set_block_for_insn (new_insn, BLOCK_FOR_INSN (and_insn));
+             df_insn_rescan (new_insn);
+           }
+
+         XEXP (mem, 0) = gen_rtx_AND (GET_MODE (new_reg), new_reg,
+                                      XEXP (and_op, 1));
          SET_SRC (body) = src_reg;
          INSN_CODE (insn) = -1; /* Force re-recognition.  */
          df_insn_rescan (insn);
-                 
+
          if (dump_file)
            fprintf (dump_file, "stvx opportunity found at %d\n",
                     INSN_UID (insn));
diff --git a/gcc/testsuite/gcc.target/powerpc/pr97019.c 
b/gcc/testsuite/gcc.target/powerpc/pr97019.c
new file mode 100644
index 00000000000..d6bb08ad260
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr97019.c
@@ -0,0 +1,79 @@
+/* { dg-do compile { target { powerpc_p8vector_ok && le } } } */
+/* { dg-options "-O2 -mdejagnu-cpu=power8" } */
+
+/* Test there are no useless instructions "rldicr x,y,0,59"
+   to align the addresses for lvx/stvx.  */
+
+extern int a, b, c;
+extern vector unsigned long long ev5, ev6, ev7, ev8;
+extern int dummy (vector unsigned long long);
+
+int test_vec_ld(unsigned char *pe) {
+
+  vector unsigned long long v1, v2, v3, v4, v9;
+  vector unsigned long long v5 = ev5;
+  vector unsigned long long v6 = ev6;
+  vector unsigned long long v7 = ev7;
+  vector unsigned long long v8 = ev8;
+
+  unsigned char *e = pe;
+
+  do {
+    if (a) {
+      v1 = __builtin_vec_ld(16, (unsigned long long *)e);
+      v2 = __builtin_vec_ld(32, (unsigned long long *)e);
+      v3 = __builtin_vec_ld(48, (unsigned long long *)e);
+      e = e + 8;
+      for (int i = 0; i < a; i++) {
+        v4 = v5;
+        v5 = __builtin_crypto_vpmsumd(v1, v6);
+        v6 = __builtin_crypto_vpmsumd(v2, v7);
+        v7 = __builtin_crypto_vpmsumd(v3, v8);
+        e = e + 8;
+      }
+    }
+    v5 = __builtin_vec_ld(16, (unsigned long long *)e);
+    v6 = __builtin_vec_ld(32, (unsigned long long *)e);
+    v7 = __builtin_vec_ld(48, (unsigned long long *)e);
+    if (c)
+      b = 1;
+  } while (b);
+
+  return dummy(v4);
+}
+
+int test_vec_st(unsigned char *pe) {
+
+  vector unsigned long long v1, v2, v3, v4;
+  vector unsigned long long v5 = ev5;
+  vector unsigned long long v6 = ev6;
+  vector unsigned long long v7 = ev7;
+  vector unsigned long long v8 = ev8;
+
+  unsigned char *e = pe;
+
+  do {
+    if (a) {
+      __builtin_vec_st(v1, 16, (unsigned long long *)e);
+      __builtin_vec_st(v2, 32, (unsigned long long *)e);
+      __builtin_vec_st(v3, 48, (unsigned long long *)e);
+      e = e + 8;
+      for (int i = 0; i < a; i++) {
+        v4 = v5;
+        v5 = __builtin_crypto_vpmsumd(v1, v6);
+        v6 = __builtin_crypto_vpmsumd(v2, v7);
+        v7 = __builtin_crypto_vpmsumd(v3, v8);
+        e = e + 8;
+      }
+    }
+    __builtin_vec_st(v5, 16, (unsigned long long *)e);
+    __builtin_vec_st(v6, 32, (unsigned long long *)e);
+    __builtin_vec_st(v7, 48, (unsigned long long *)e);
+    if (c)
+      b = 1;
+  } while (b);
+
+  return dummy(v4);
+}
+
+/* { dg-final { scan-assembler-not "rldicr\[ \t\]+\[0-9\]+,\[0-9\]+,0,59" } } 
*/

Reply via email to