On Tue, Jun 24, 2025 at 09:49:01AM +0200, Juergen Christ wrote:
> Some patterns that are detected by the autovectorizer can be supported by
> s390.  Add expanders such that autovectorization of these patterns works.
> 
> Bootstrapped and regtested on s390.  Ok for trunk?
> 
> gcc/ChangeLog:
> 
>       * config/s390/vector.md (avg<mode>3_ceil): New pattern.
>       (uavg<mode>3_ceil): New pattern.
>       (smul<mode>3_highpart): New pattern.
>       (umul<mode>3_highpart): New pattern.
> 
> gcc/testsuite/ChangeLog:
> 
>       * gcc.target/s390/vector/pattern-avg-1.c: New test.
>       * gcc.target/s390/vector/pattern-mulh-1.c: New test.
> 
> Signed-off-by: Juergen Christ <jchr...@linux.ibm.com>
> ---
>  gcc/config/s390/vector.md                     | 28 ++++++++++++++++++
>  .../gcc.target/s390/vector/pattern-avg-1.c    | 26 +++++++++++++++++
>  .../gcc.target/s390/vector/pattern-mulh-1.c   | 29 +++++++++++++++++++
>  3 files changed, 83 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c
>  create mode 100644 gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c
> 
> diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
> index 6f4e1929eb80..16f4b8116432 100644
> --- a/gcc/config/s390/vector.md
> +++ b/gcc/config/s390/vector.md
> @@ -3576,3 +3576,31 @@
>  ; vec_unpacks_float_lo
>  ; vec_unpacku_float_hi
>  ; vec_unpacku_float_lo
> +
> +(define_expand "avg<mode>3_ceil"
> +  [(set (match_operand:VIT_HW_VXE3_T                        0 
> "register_operand" "=v")
> +     (unspec:VIT_HW_VXE3_T [(match_operand:VIT_HW_VXE3_T 1 
> "register_operand"  "v")
> +                            (match_operand:VIT_HW_VXE3_T 2 
> "register_operand"  "v")]
> +                           UNSPEC_VEC_AVG))]
> +  "TARGET_VX")
> +
> +(define_expand "uavg<mode>3_ceil"
> +  [(set (match_operand:VIT_HW_VXE3_T                        0 
> "register_operand" "=v")
> +     (unspec:VIT_HW_VXE3_T [(match_operand:VIT_HW_VXE3_T 1 
> "register_operand"  "v")
> +                            (match_operand:VIT_HW_VXE3_T 2 
> "register_operand"  "v")]
> +                           UNSPEC_VEC_AVGU))]
> +  "TARGET_VX")
> +
> +(define_expand "smul<mode>3_highpart"
> +  [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand"                   
>     "=v")
> +     (unspec:VIT_HW_VXE3_DT [(match_operand:VIT_HW_VXE3_DT 1 
> "register_operand" "v")
> +                             (match_operand:VIT_HW_VXE3_DT 2 
> "register_operand" "v")]
> +                            UNSPEC_VEC_SMULT_HI))]
> +  "TARGET_VX")
> +
> +(define_expand "umul<mode>3_highpart"
> +  [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand"                   
>     "=v")
> +     (unspec:VIT_HW_VXE3_DT [(match_operand:VIT_HW_VXE3_DT 1 
> "register_operand" "v")
> +                             (match_operand:VIT_HW_VXE3_DT 2 
> "register_operand" "v")]
> +                            UNSPEC_VEC_UMULT_HI))]
> +  "TARGET_VX")

In commit r12-4231-g555fa3545efe23 RTX smul_highpart and umul_highpart
were introduced which we could use instead of the unspec, now.  So one
solution would be to move vec_smulh<mode>/vec_umulh<mode> from
vx-builtins.md to vector.md and rename those to
smul<mode>3_highpart/umul<mode>3_highpart and then making sure that
those are used in s390-builtins.def.  Of course, replacing the unspec by
the corresponding RTXs', too.

Sorry for bothering with this.  But I think it is worthwhile to replace
those unspecs.

Thanks,
Stefan

> diff --git a/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c 
> b/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c
> new file mode 100644
> index 000000000000..a15301aabe54
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/s390/vector/pattern-avg-1.c
> @@ -0,0 +1,26 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -mzarch -march=z16 -ftree-vectorize 
> -fdump-tree-optimized" } */
> +
> +#define TEST(T1,T2,N)                                                   \
> +  void                                                                  \
> +  avg##T1 (signed T1 *__restrict res, signed T1 *__restrict a,          \
> +           signed T1 *__restrict b)                                     \
> +  {                                                                     \
> +    for (int i = 0; i < N; ++i)                                         \
> +      res[i] = ((signed T2)a[i] + b[i] + 1) >> 1;                       \
> +  }                                                                     \
> +                                                                        \
> +  void                                                                  \
> +  uavg##T1 (unsigned T1 *__restrict res, unsigned T1 *__restrict a,     \
> +            unsigned T1 *__restrict b)                                  \
> +  {                                                                     \
> +    for (int i = 0; i < N; ++i)                                         \
> +      res[i] = ((unsigned T2)a[i] + b[i] + 1) >> 1;                     \
> +  }
> +
> +TEST(char,short,16)
> +TEST(short,int,8)
> +TEST(int,long,4)
> +TEST(long,__int128,2)
> +
> +/* { dg-final { scan-tree-dump-times "\.AVG_CEIL" 8 "optimized" } } */
> diff --git a/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c 
> b/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c
> new file mode 100644
> index 000000000000..cd8e4e7d7a09
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/s390/vector/pattern-mulh-1.c
> @@ -0,0 +1,29 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -mzarch -march=arch15 -ftree-vectorize 
> -fdump-tree-optimized" } */
> +
> +#define TEST(T1,T2,N,S)                                                 \
> +  void                                                                  \
> +  mulh##T1 (signed T1 *__restrict res,                                  \
> +            signed T1 *__restrict l,                                    \
> +            signed T1 *__restrict r)                                    \
> +  {                                                                     \
> +    for (int i = 0; i < N; ++i)                                         \
> +      res[i] = (signed T1) (((signed T2)l[i] * (signed T2)r[i]) >> S);  \
> +  }                                                                     \
> +                                                                        \
> +  void                                                                  \
> +  umulh##T1 (unsigned T1 *__restrict res,                               \
> +             unsigned T1 *__restrict l,                                 \
> +             unsigned T1 *__restrict r)                                 \
> +  {                                                                     \
> +    for (int i = 0; i < N; ++i)                                         \
> +      res[i] = (unsigned T1)                                            \
> +        (((unsigned T2)l[i] * (unsigned T2)r[i]) >> S);                 \
> +  }
> +
> +TEST(char,short,16,8)
> +TEST(short,int,8,16)
> +TEST(int,long,4,32)
> +TEST(long,__int128,2,64)
> +
> +/* { dg-final { scan-tree-dump-times "\.MULH" 8 "optimized" } } */
> -- 
> 2.43.5
> 

Reply via email to