This patch optimizes cases like: vector double v1, v2; /* ... */ v2 = vec_splats (vec_extract (v1, 0); /* or */ v2 = vec_splats (vec_extract (v1, 1);
Previously: vector long long splat_dup_l_0 (vector long long v) { return __builtin_vec_splats (__builtin_vec_extract (v, 0)); } would generate: mfvsrld 9,34 mtvsrdd 34,9,9 blr With this patch, GCC generates: xxpermdi 34,34,34,3 blr 2023-07-10 Michael Meissner <meiss...@linux.ibm.com> gcc/ PR target/99293 * gcc/config/rs6000/vsx.md (vsx_splat_extract_<mode>): New combiner insn. gcc/testsuite/ PR target/108958 * gcc.target/powerpc/pr99293.c: New test. * gcc.target/powerpc/builtins-1.c: Update insn count. --- gcc/config/rs6000/vsx.md | 18 ++++++ gcc/testsuite/gcc.target/powerpc/builtins-1.c | 2 +- gcc/testsuite/gcc.target/powerpc/pr99293.c | 55 +++++++++++++++++++ 3 files changed, 74 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/pr99293.c diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 0c269e4e8d9..d34c3b21abe 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -4600,6 +4600,24 @@ (define_insn "vsx_splat_<mode>_mem" "lxvdsx %x0,%y1" [(set_attr "type" "vecload")]) +;; Optimize SPLAT of an extract from a V2DF/V2DI vector with a constant element +(define_insn "*vsx_splat_extract_<mode>" + [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") + (vec_duplicate:VSX_D + (vec_select:<VEC_base> + (match_operand:VSX_D 1 "vsx_register_operand" "wa") + (parallel [(match_operand 2 "const_0_to_1_operand" "n")]))))] + "VECTOR_MEM_VSX_P (<MODE>mode)" +{ + int which_word = INTVAL (operands[2]); + if (!BYTES_BIG_ENDIAN) + which_word = 1 - which_word; + + operands[3] = GEN_INT (which_word ? 3 : 0); + return "xxpermdi %x0,%x1,%x1,%3"; +} + [(set_attr "type" "vecperm")]) + ;; V4SI splat support (define_insn "vsx_splat_v4si" [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,wa") diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-1.c b/gcc/testsuite/gcc.target/powerpc/builtins-1.c index 28cd1aa6b1a..98783668bce 100644 --- a/gcc/testsuite/gcc.target/powerpc/builtins-1.c +++ b/gcc/testsuite/gcc.target/powerpc/builtins-1.c @@ -1035,4 +1035,4 @@ foo156 (vector unsigned short usa) /* { dg-final { scan-assembler-times {\mvmrglb\M} 3 } } */ /* { dg-final { scan-assembler-times {\mvmrgew\M} 4 } } */ /* { dg-final { scan-assembler-times {\mvsplth|xxsplth\M} 4 } } */ -/* { dg-final { scan-assembler-times {\mxxpermdi\M} 44 } } */ +/* { dg-final { scan-assembler-times {\mxxpermdi\M} 42 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr99293.c b/gcc/testsuite/gcc.target/powerpc/pr99293.c new file mode 100644 index 00000000000..e5f44bd7346 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr99293.c @@ -0,0 +1,55 @@ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-options "-O2 -mpower8-vector" } */ + +/* Test for PR 99263, which wants to do: + __builtin_vec_splats (__builtin_vec_extract (v, n)) + + where v is a V2DF or V2DI vector and n is either 0 or 1. Previously the GCC + compiler would do a direct move to the GPR registers to select the item and a + direct move from the GPR registers to do the splat. + + Before the patch, splat_dup_ll_0 or splat_dup_dbl_0 below would generate: + + mfvsrld 9,34 + mtvsrdd 34,9,9 + blr + + and now it generates: + + xxpermdi 34,34,34,3 + blr */ + +#include <altivec.h> + +vector long long +splat_dup_ll_0 (vector long long v) +{ + /* xxpermdi 34,34,34,3 */ + return __builtin_vec_splats (vec_extract (v, 0)); +} + +vector double +splat_dup_dbl_0 (vector double v) +{ + /* xxpermdi 34,34,34,3 */ + return __builtin_vec_splats (vec_extract (v, 0)); +} + +vector long long +splat_dup_ll_1 (vector long long v) +{ + /* xxpermdi 34,34,34,0 */ + return __builtin_vec_splats (vec_extract (v, 1)); +} + +vector double +splat_dup_dbl_1 (vector double v) +{ + /* xxpermdi 34,34,34,0 */ + return __builtin_vec_splats (vec_extract (v, 1)); +} + +/* { dg-final { scan-assembler-times "xxpermdi" 4 } } */ +/* { dg-final { scan-assembler-not "mfvsrd" } } */ +/* { dg-final { scan-assembler-not "mfvsrld" } } */ +/* { dg-final { scan-assembler-not "mtvsrdd" } } */ -- 2.41.0 -- Michael Meissner, IBM PO Box 98, Ayer, Massachusetts, USA, 01432 email: meiss...@linux.ibm.com