Hi,
  This patch creates an insn_and_split pattern which helps the duplicated
constant vector replace the source pseudo of store insn in fwprop pass.
Thus the store can be implemented by a single stxvd2x and it eliminates the
unnecessary byte swap insn on P8 LE. The test case shows the optimization.

  The patch depends on the first generic patch which uses insn cost in fwprop.

  Bootstrapped and tested on x86 and powerpc64-linux BE and LE with no
regressions.

Thanks
Gui Haochen


ChangeLog
rs6000: Eliminate unnecessary byte swaps for duplicated constant vector store

gcc/
        PR target/113325
        * config/rs6000/predicates.md (duplicate_easy_altivec_constant): New.
        * config/rs6000/vsx.md (vsx_stxvd2x4_le_const_<mode>): New.

gcc/testsuite/
        PR target/113325
        * gcc.target/powerpc/pr113325.c: New.


patch.diff
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index ef7d3f214c4..8ab6db630b7 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -759,6 +759,14 @@ (define_predicate "easy_vector_constant"
   return false;
 })

+;; Return 1 if it's a duplicated easy_altivec_constant.
+(define_predicate "duplicate_easy_altivec_constant"
+  (and (match_code "const_vector")
+       (match_test "easy_altivec_constant (op, mode)"))
+{
+  return const_vec_duplicate_p (op);
+})
+
 ;; Same as easy_vector_constant but only for EASY_VECTOR_15_ADD_SELF.
 (define_predicate "easy_vector_constant_add_self"
   (and (match_code "const_vector")
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 26fa32829af..98e4be26f64 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -3362,6 +3362,29 @@ (define_insn "*vsx_stxvd2x4_le_<mode>"
   "stxvd2x %x1,%y0"
   [(set_attr "type" "vecstore")])

+(define_insn_and_split "vsx_stxvd2x4_le_const_<mode>"
+  [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
+       (match_operand:VSX_W 1 "duplicate_easy_altivec_constant" "W"))]
+  "!BYTES_BIG_ENDIAN
+   && VECTOR_MEM_VSX_P (<MODE>mode)
+   && !TARGET_P9_VECTOR"
+  "#"
+  "&& 1"
+  [(set (match_dup 2)
+       (match_dup 1))
+   (set (match_dup 0)
+       (vec_select:VSX_W
+         (match_dup 2)
+         (parallel [(const_int 2) (const_int 3)
+                    (const_int 0) (const_int 1)])))]
+{
+  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1])
+                                        : operands[1];
+
+}
+  [(set_attr "type" "vecstore")
+   (set_attr "length" "8")])
+
 (define_insn "*vsx_stxvd2x8_le_V8HI"
   [(set (match_operand:V8HI 0 "memory_operand" "=Z")
         (vec_select:V8HI
diff --git a/gcc/testsuite/gcc.target/powerpc/pr113325.c 
b/gcc/testsuite/gcc.target/powerpc/pr113325.c
new file mode 100644
index 00000000000..dff68ac0a51
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr113325.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mdejagnu-cpu=power8 -mvsx" } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-final { scan-assembler-not {\mxxpermdi\M} } } */
+
+void* foo (void* s1)
+{
+  return __builtin_memset (s1, 0, 32);
+}

Reply via email to