Hi, This patch replaces shift and ior insns with one rotate and mask insn for the split patterns which are for DI byte swap on Power6 and before. The test cases shows the optimization.
Bootstrapped and tested on ppc64 Linux BE and LE with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot. ChangeLog 2022-06-06 Haochen Gui <guih...@linux.ibm.com> gcc/ * config/rs6000/rs6000.md (split for DI load byte swap): Merge shift and ior insns to one rotate and mask insn. (split for DI register byte swap): Likewise. gcc/testsuite/ * gcc.target/powerpc/pr93453-1.c: New. patch.diff diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index bf85baa5370..2e38195aaac 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -2828,8 +2828,8 @@ (define_split emit_insn (gen_bswapsi2 (dest_32, word2)); } - emit_insn (gen_ashldi3 (op3, op3, GEN_INT (32))); - emit_insn (gen_iordi3 (dest, dest, op3)); + emit_insn (gen_rotldi3_insert_3 (dest, op3, GEN_INT (32), dest, + GEN_INT ((HOST_WIDE_INT_1U << 32) - 1))); DONE; }) @@ -2914,10 +2914,10 @@ (define_split rtx op3_si = simplify_gen_subreg (SImode, op3, DImode, lo_off); emit_insn (gen_lshrdi3 (op2, src, GEN_INT (32))); - emit_insn (gen_bswapsi2 (dest_si, src_si)); - emit_insn (gen_bswapsi2 (op3_si, op2_si)); - emit_insn (gen_ashldi3 (dest, dest, GEN_INT (32))); - emit_insn (gen_iordi3 (dest, dest, op3)); + emit_insn (gen_bswapsi2 (op3_si, src_si)); + emit_insn (gen_bswapsi2 (dest_si, op2_si)); + emit_insn (gen_rotldi3_insert_3 (dest, op3, GEN_INT (32), dest, + GEN_INT ((HOST_WIDE_INT_1U << 32) - 1))); DONE; }) diff --git a/gcc/testsuite/gcc.target/powerpc/pr93453-1.c b/gcc/testsuite/gcc.target/powerpc/pr93453-1.c new file mode 100644 index 00000000000..4271886561f --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr93453-1.c @@ -0,0 +1,14 @@ +/* { dg-do compile { target lp64 } } */ +/* { dg-options "-mdejagnu-cpu=power6 -O2" } */ + +unsigned long load_byte_reverse (unsigned long *in) +{ + return __builtin_bswap64 (*in); +} + +unsigned long byte_reverse (unsigned long in) +{ + return __builtin_bswap64 (in); +} + +/* { dg-final { scan-assembler-times {\mrldimi\M} 2 } } */