Hi Mike, on 2023/7/11 03:50, Michael Meissner wrote: > This patch optimizes cases like: > > vector double v1, v2; > /* ... */ > v2 = vec_splats (vec_extract (v1, 0); /* or */ > v2 = vec_splats (vec_extract (v1, 1); > > Previously: > > vector long long > splat_dup_l_0 (vector long long v) > { > return __builtin_vec_splats (__builtin_vec_extract (v, 0)); > } > > would generate: > > mfvsrld 9,34 > mtvsrdd 34,9,9 > blr > > With this patch, GCC generates: > > xxpermdi 34,34,34,3 > blr > > 2023-07-10 Michael Meissner <meiss...@linux.ibm.com> > > gcc/ > > PR target/99293 > * gcc/config/rs6000/vsx.md (vsx_splat_extract_<mode>): New combiner > insn. > > gcc/testsuite/ > > PR target/108958 > * gcc.target/powerpc/pr99293.c: New test. > * gcc.target/powerpc/builtins-1.c: Update insn count. > --- > gcc/config/rs6000/vsx.md | 18 ++++++ > gcc/testsuite/gcc.target/powerpc/builtins-1.c | 2 +- > gcc/testsuite/gcc.target/powerpc/pr99293.c | 55 +++++++++++++++++++ > 3 files changed, 74 insertions(+), 1 deletion(-) > create mode 100644 gcc/testsuite/gcc.target/powerpc/pr99293.c > > diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md > index 0c269e4e8d9..d34c3b21abe 100644 > --- a/gcc/config/rs6000/vsx.md > +++ b/gcc/config/rs6000/vsx.md > @@ -4600,6 +4600,24 @@ (define_insn "vsx_splat_<mode>_mem" > "lxvdsx %x0,%y1" > [(set_attr "type" "vecload")]) > > +;; Optimize SPLAT of an extract from a V2DF/V2DI vector with a constant > element > +(define_insn "*vsx_splat_extract_<mode>" > + [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") > + (vec_duplicate:VSX_D > + (vec_select:<VEC_base> > + (match_operand:VSX_D 1 "vsx_register_operand" "wa") > + (parallel [(match_operand 2 "const_0_to_1_operand" "n")]))))] > + "VECTOR_MEM_VSX_P (<MODE>mode)" > +{ > + int which_word = INTVAL (operands[2]); > + if (!BYTES_BIG_ENDIAN) > + which_word = 1 - which_word; > + > + operands[3] = GEN_INT (which_word ? 3 : 0); > + return "xxpermdi %x0,%x1,%x1,%3"; > +} > + [(set_attr "type" "vecperm")]) > + > ;; V4SI splat support > (define_insn "vsx_splat_v4si" > [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,wa") > diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-1.c > b/gcc/testsuite/gcc.target/powerpc/builtins-1.c > index 28cd1aa6b1a..98783668bce 100644 > --- a/gcc/testsuite/gcc.target/powerpc/builtins-1.c > +++ b/gcc/testsuite/gcc.target/powerpc/builtins-1.c > @@ -1035,4 +1035,4 @@ foo156 (vector unsigned short usa) > /* { dg-final { scan-assembler-times {\mvmrglb\M} 3 } } */ > /* { dg-final { scan-assembler-times {\mvmrgew\M} 4 } } */ > /* { dg-final { scan-assembler-times {\mvsplth|xxsplth\M} 4 } } */ > -/* { dg-final { scan-assembler-times {\mxxpermdi\M} 44 } } */ > +/* { dg-final { scan-assembler-times {\mxxpermdi\M} 42 } } */ > diff --git a/gcc/testsuite/gcc.target/powerpc/pr99293.c > b/gcc/testsuite/gcc.target/powerpc/pr99293.c > new file mode 100644 > index 00000000000..e5f44bd7346 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr99293.c > @@ -0,0 +1,55 @@ > +/* { dg-require-effective-target powerpc_p8vector_ok } */ > +/* { dg-options "-O2 -mpower8-vector" } */
Nit: IMHO -mdejagnu-cpu=power8 is preferred against -mpower8-vector which is considered as a workaround option, and we plan to make it go away. > + > +/* Test for PR 99263, which wants to do: > + __builtin_vec_splats (__builtin_vec_extract (v, n)) Nit: Maybe remove all "__builtin_" prefixes since vec_splats and vec_extract are defined in PVIPR without __builtin_. This is also applied for the others below. > + > + where v is a V2DF or V2DI vector and n is either 0 or 1. Previously the > GCC > + compiler would do a direct move to the GPR registers to select the item > and a > + direct move from the GPR registers to do the splat. > + > + Before the patch, splat_dup_ll_0 or splat_dup_dbl_0 below would generate: > + > + mfvsrld 9,34 > + mtvsrdd 34,9,9 > + blr > + > + and now it generates: > + > + xxpermdi 34,34,34,3 > + blr */ > + > +#include <altivec.h> > + > +vector long long > +splat_dup_ll_0 (vector long long v) > +{ > + /* xxpermdi 34,34,34,3 */ > + return __builtin_vec_splats (vec_extract (v, 0)); > +} > + > +vector double > +splat_dup_dbl_0 (vector double v) > +{ > + /* xxpermdi 34,34,34,3 */ > + return __builtin_vec_splats (vec_extract (v, 0)); > +} > + > +vector long long > +splat_dup_ll_1 (vector long long v) > +{ > + /* xxpermdi 34,34,34,0 */ > + return __builtin_vec_splats (vec_extract (v, 1)); > +} > + > +vector double > +splat_dup_dbl_1 (vector double v) > +{ > + /* xxpermdi 34,34,34,0 */ > + return __builtin_vec_splats (vec_extract (v, 1)); > +} > + > +/* { dg-final { scan-assembler-times "xxpermdi" 4 } } */ Nit: It's good to add \m..\M like the others, i.e. /* { dg-final { scan-assembler-times {\mxxpermdi\M} 4 } } */ ..., same for the below ones. > +/* { dg-final { scan-assembler-not "mfvsrd" } } */ > +/* { dg-final { scan-assembler-not "mfvsrld" } } */ > +/* { dg-final { scan-assembler-not "mtvsrdd" } } */ This patch is okay for trunk with these nits tweaked, thanks! BR, Kewen