Hi, This patch replaces shift and ior insns with one rotate and mask insn for the split patterns which are for DI byte swap on Power6. The test cases shows the optimization.
Bootstrapped and tested on ppc64 Linux BE and LE with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot. ChangeLog 2022-06-07 Haochen Gui <guih...@linux.ibm.com> gcc/ * config/rs6000/rs6000.md (define_split for bswapdi load): Merge shift and ior insns to one rotate and mask insn. (define_split for bswapdi register): Likewise. gcc/testsuite/ * gcc.target/powerpc/pr93453-1.c: New. patch.diff diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index bf85baa5370..83800df12aa 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -2828,8 +2828,8 @@ (define_split emit_insn (gen_bswapsi2 (dest_32, word2)); } - emit_insn (gen_ashldi3 (op3, op3, GEN_INT (32))); - emit_insn (gen_iordi3 (dest, dest, op3)); + emit_insn (gen_rotldi3_insert_3 (dest, op3, GEN_INT (32), dest, + GEN_INT (0xffffffff))); DONE; }) @@ -2914,10 +2914,10 @@ (define_split rtx op3_si = simplify_gen_subreg (SImode, op3, DImode, lo_off); emit_insn (gen_lshrdi3 (op2, src, GEN_INT (32))); - emit_insn (gen_bswapsi2 (dest_si, src_si)); - emit_insn (gen_bswapsi2 (op3_si, op2_si)); - emit_insn (gen_ashldi3 (dest, dest, GEN_INT (32))); - emit_insn (gen_iordi3 (dest, dest, op3)); + emit_insn (gen_bswapsi2 (op3_si, src_si)); + emit_insn (gen_bswapsi2 (dest_si, op2_si)); + emit_insn (gen_rotldi3_insert_3 (dest, op3, GEN_INT (32), dest, + GEN_INT (0xffffffff))); DONE; }) diff --git a/gcc/testsuite/gcc.target/powerpc/pr93453-1.c b/gcc/testsuite/gcc.target/powerpc/pr93453-1.c new file mode 100644 index 00000000000..4271886561f --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr93453-1.c @@ -0,0 +1,14 @@ +/* { dg-do compile { target lp64 } } */ +/* { dg-options "-mdejagnu-cpu=power6 -O2" } */ + +unsigned long load_byte_reverse (unsigned long *in) +{ + return __builtin_bswap64 (*in); +} + +unsigned long byte_reverse (unsigned long in) +{ + return __builtin_bswap64 (in); +} + +/* { dg-final { scan-assembler-times {\mrldimi\M} 2 } } */