Hi, This patch is to make unsigned int vector init go with rldimi to merge two integers instead of shift and ior.
I tried to use nonzero_bits in md file to make it more general, but the testing shows it isn't doable. The reason is that some passes would replace some pseudos with other pseudos and do the recog again, but at that time the nonzero_bits could get rough information and lead the recog fails unexpectedly. btw, the test case would reply on the combine patch[1]. Bootstrapped/regtested on powerpc64le-linux-gnu P9. BR, Kewen [1] https://gcc.gnu.org/pipermail/gcc-patches/2020-December/561413.html gcc/ChangeLog: * config/rs6000/rs6000.md (*rotl<mode>3_insert_3): Renamed to... (rotl<mode>3_insert_3): ...this. * config/rs6000/vsx.md (vsx_init_v4si): Use gen_rotldi3_insert_3 for integer merging. gcc/testsuite/ChangeLog: * gcc.target/powerpc/vec-init-10.c: New test. -----
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 0e799198a50..3529b79d35d 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -4067,7 +4067,7 @@ [(set_attr "type" "insert")]) ; There are also some forms without one of the ANDs. -(define_insn "*rotl<mode>3_insert_3" +(define_insn "rotl<mode>3_insert_3" [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") (ior:GPR (and:GPR (match_operand:GPR 3 "gpc_reg_operand" "0") (match_operand:GPR 4 "const_int_operand" "n")) diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 947631d83ee..37105a5aabf 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -3008,28 +3008,22 @@ (use (match_operand:SI 4 "gpc_reg_operand"))] "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" { - rtx a = gen_reg_rtx (DImode); - rtx b = gen_reg_rtx (DImode); - rtx c = gen_reg_rtx (DImode); - rtx d = gen_reg_rtx (DImode); - emit_insn (gen_zero_extendsidi2 (a, operands[1])); - emit_insn (gen_zero_extendsidi2 (b, operands[2])); - emit_insn (gen_zero_extendsidi2 (c, operands[3])); - emit_insn (gen_zero_extendsidi2 (d, operands[4])); + rtx a = gen_lowpart_SUBREG (DImode, operands[1]); + rtx b = gen_lowpart_SUBREG (DImode, operands[2]); + rtx c = gen_lowpart_SUBREG (DImode, operands[3]); + rtx d = gen_lowpart_SUBREG (DImode, operands[4]); if (!BYTES_BIG_ENDIAN) { std::swap (a, b); std::swap (c, d); } - rtx aa = gen_reg_rtx (DImode); rtx ab = gen_reg_rtx (DImode); - rtx cc = gen_reg_rtx (DImode); rtx cd = gen_reg_rtx (DImode); - emit_insn (gen_ashldi3 (aa, a, GEN_INT (32))); - emit_insn (gen_ashldi3 (cc, c, GEN_INT (32))); - emit_insn (gen_iordi3 (ab, aa, b)); - emit_insn (gen_iordi3 (cd, cc, d)); + emit_insn (gen_rotldi3_insert_3 (ab, a, GEN_INT (32), b, + GEN_INT (0xffffffff))); + emit_insn (gen_rotldi3_insert_3 (cd, c, GEN_INT (32), d, + GEN_INT (0xffffffff))); rtx abcd = gen_reg_rtx (V2DImode); emit_insn (gen_vsx_concat_v2di (abcd, ab, cd)); diff --git a/gcc/testsuite/gcc.target/powerpc/vec-init-10.c b/gcc/testsuite/gcc.target/powerpc/vec-init-10.c new file mode 100644 index 00000000000..680538e67f3 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-init-10.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-options "-O2 -mdejagnu-cpu=power8" } */ + +/* Check that we can optimize sldi + or to rldimi for vector int init. */ + +vector unsigned int +testu (unsigned int i1, unsigned int i2, unsigned int i3, unsigned int i4) +{ + vector unsigned int v = {i1, i2, i3, i4}; + return v; +} + +vector signed int +tests (signed int i1, signed int i2, signed int i3, signed int i4) +{ + vector signed int v = {i1, i2, i3, i4}; + return v; +} + +/* { dg-final { scan-assembler-not "sldi" } } */ +/* { dg-final { scan-assembler-not "or" } } */ +/* { dg-final { scan-assembler-times {\mrldimi\M} 4 } } */