Hi,
  This patch replaces shift and ior insns with one rotate and mask
insn for the split patterns which are for DI byte swap on Power6 and
before. The test cases shows the optimization.

  Bootstrapped and tested on ppc64 Linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-06-06 Haochen Gui <guih...@linux.ibm.com>

gcc/
        * config/rs6000/rs6000.md (split for DI load byte swap): Merge shift
        and ior insns to one rotate and mask insn.
        (split for DI register byte swap): Likewise.

gcc/testsuite/
        * gcc.target/powerpc/pr93453-1.c: New.

patch.diff
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index bf85baa5370..2e38195aaac 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -2828,8 +2828,8 @@ (define_split
       emit_insn (gen_bswapsi2 (dest_32, word2));
     }

-  emit_insn (gen_ashldi3 (op3, op3, GEN_INT (32)));
-  emit_insn (gen_iordi3 (dest, dest, op3));
+  emit_insn (gen_rotldi3_insert_3 (dest, op3, GEN_INT (32), dest,
+                                  GEN_INT ((HOST_WIDE_INT_1U << 32) - 1)));
   DONE;
 })

@@ -2914,10 +2914,10 @@ (define_split
   rtx op3_si  = simplify_gen_subreg (SImode, op3, DImode, lo_off);

   emit_insn (gen_lshrdi3 (op2, src, GEN_INT (32)));
-  emit_insn (gen_bswapsi2 (dest_si, src_si));
-  emit_insn (gen_bswapsi2 (op3_si, op2_si));
-  emit_insn (gen_ashldi3 (dest, dest, GEN_INT (32)));
-  emit_insn (gen_iordi3 (dest, dest, op3));
+  emit_insn (gen_bswapsi2 (op3_si, src_si));
+  emit_insn (gen_bswapsi2 (dest_si, op2_si));
+  emit_insn (gen_rotldi3_insert_3 (dest, op3, GEN_INT (32), dest,
+                                  GEN_INT ((HOST_WIDE_INT_1U << 32) - 1)));
   DONE;
 })

diff --git a/gcc/testsuite/gcc.target/powerpc/pr93453-1.c 
b/gcc/testsuite/gcc.target/powerpc/pr93453-1.c
new file mode 100644
index 00000000000..4271886561f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr93453-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-mdejagnu-cpu=power6 -O2" } */
+
+unsigned long load_byte_reverse (unsigned long *in)
+{
+   return __builtin_bswap64 (*in);
+}
+
+unsigned long byte_reverse (unsigned long in)
+{
+   return __builtin_bswap64 (in);
+}
+
+/* { dg-final { scan-assembler-times {\mrldimi\M} 2 } } */

Reply via email to