On Tue, Jun 24, 2025 at 09:49:01AM +0200, Juergen Christ wrote: > Some patterns that are detected by the autovectorizer can be supported by > s390. Add expanders such that autovectorization of these patterns works. > > Bootstrapped and regtested on s390. Ok for trunk? > > gcc/ChangeLog: > > * config/s390/vector.md (avg<mode>3_ceil): New pattern. > (uavg<mode>3_ceil): New pattern. > (smul<mode>3_highpart): New pattern. > (umul<mode>3_highpart): New pattern. > > gcc/testsuite/ChangeLog: > > * gcc.target/s390/vector/pattern-avg-1.c: New test. > * gcc.target/s390/vector/pattern-mulh-1.c: New test. > > Signed-off-by: Juergen Christ <jchr...@linux.ibm.com> > --- > gcc/config/s390/vector.md | 28 ++++++++++++++++++ > .../gcc.target/s390/vector/pattern-avg-1.c | 26 +++++++++++++++++ > .../gcc.target/s390/vector/pattern-mulh-1.c | 29 +++++++++++++++++++ > 3 files changed, 83 insertions(+) > create mode 100644 gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c > create mode 100644 gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c > > diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md > index 6f4e1929eb80..16f4b8116432 100644 > --- a/gcc/config/s390/vector.md > +++ b/gcc/config/s390/vector.md > @@ -3576,3 +3576,31 @@ > ; vec_unpacks_float_lo > ; vec_unpacku_float_hi > ; vec_unpacku_float_lo > + > +(define_expand "avg<mode>3_ceil" > + [(set (match_operand:VIT_HW_VXE3_T 0 > "register_operand" "=v") > + (unspec:VIT_HW_VXE3_T [(match_operand:VIT_HW_VXE3_T 1 > "register_operand" "v") > + (match_operand:VIT_HW_VXE3_T 2 > "register_operand" "v")] > + UNSPEC_VEC_AVG))] > + "TARGET_VX") > + > +(define_expand "uavg<mode>3_ceil" > + [(set (match_operand:VIT_HW_VXE3_T 0 > "register_operand" "=v") > + (unspec:VIT_HW_VXE3_T [(match_operand:VIT_HW_VXE3_T 1 > "register_operand" "v") > + (match_operand:VIT_HW_VXE3_T 2 > "register_operand" "v")] > + UNSPEC_VEC_AVGU))] > + "TARGET_VX") > + > +(define_expand "smul<mode>3_highpart" > + [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand" > "=v") > + (unspec:VIT_HW_VXE3_DT [(match_operand:VIT_HW_VXE3_DT 1 > "register_operand" "v") > + (match_operand:VIT_HW_VXE3_DT 2 > "register_operand" "v")] > + UNSPEC_VEC_SMULT_HI))] > + "TARGET_VX") > + > +(define_expand "umul<mode>3_highpart" > + [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand" > "=v") > + (unspec:VIT_HW_VXE3_DT [(match_operand:VIT_HW_VXE3_DT 1 > "register_operand" "v") > + (match_operand:VIT_HW_VXE3_DT 2 > "register_operand" "v")] > + UNSPEC_VEC_UMULT_HI))] > + "TARGET_VX")
In commit r12-4231-g555fa3545efe23 RTX smul_highpart and umul_highpart were introduced which we could use instead of the unspec, now. So one solution would be to move vec_smulh<mode>/vec_umulh<mode> from vx-builtins.md to vector.md and rename those to smul<mode>3_highpart/umul<mode>3_highpart and then making sure that those are used in s390-builtins.def. Of course, replacing the unspec by the corresponding RTXs', too. Sorry for bothering with this. But I think it is worthwhile to replace those unspecs. Thanks, Stefan > diff --git a/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c > b/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c > new file mode 100644 > index 000000000000..a15301aabe54 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c > @@ -0,0 +1,26 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -mzarch -march=z16 -ftree-vectorize > -fdump-tree-optimized" } */ > + > +#define TEST(T1,T2,N) \ > + void \ > + avg##T1 (signed T1 *__restrict res, signed T1 *__restrict a, \ > + signed T1 *__restrict b) \ > + { \ > + for (int i = 0; i < N; ++i) \ > + res[i] = ((signed T2)a[i] + b[i] + 1) >> 1; \ > + } \ > + \ > + void \ > + uavg##T1 (unsigned T1 *__restrict res, unsigned T1 *__restrict a, \ > + unsigned T1 *__restrict b) \ > + { \ > + for (int i = 0; i < N; ++i) \ > + res[i] = ((unsigned T2)a[i] + b[i] + 1) >> 1; \ > + } > + > +TEST(char,short,16) > +TEST(short,int,8) > +TEST(int,long,4) > +TEST(long,__int128,2) > + > +/* { dg-final { scan-tree-dump-times "\.AVG_CEIL" 8 "optimized" } } */ > diff --git a/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c > b/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c > new file mode 100644 > index 000000000000..cd8e4e7d7a09 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c > @@ -0,0 +1,29 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -mzarch -march=arch15 -ftree-vectorize > -fdump-tree-optimized" } */ > + > +#define TEST(T1,T2,N,S) \ > + void \ > + mulh##T1 (signed T1 *__restrict res, \ > + signed T1 *__restrict l, \ > + signed T1 *__restrict r) \ > + { \ > + for (int i = 0; i < N; ++i) \ > + res[i] = (signed T1) (((signed T2)l[i] * (signed T2)r[i]) >> S); \ > + } \ > + \ > + void \ > + umulh##T1 (unsigned T1 *__restrict res, \ > + unsigned T1 *__restrict l, \ > + unsigned T1 *__restrict r) \ > + { \ > + for (int i = 0; i < N; ++i) \ > + res[i] = (unsigned T1) \ > + (((unsigned T2)l[i] * (unsigned T2)r[i]) >> S); \ > + } > + > +TEST(char,short,16,8) > +TEST(short,int,8,16) > +TEST(int,long,4,32) > +TEST(long,__int128,2,64) > + > +/* { dg-final { scan-tree-dump-times "\.MULH" 8 "optimized" } } */ > -- > 2.43.5 >