Re: [PATCH 2/6] rs6000: Support SSE4.1 "min" and "max" intrinsics
Hi Paul, On 8/9/21 3:23 PM, Paul A. Clarke via Gcc-patches wrote: Also, copy tests for _mm_min_epi8, _mm_min_epu16, _mm_min_epi32, _mm_min_epu32, _mm_max_epi8, _mm_max_epu16, _mm_max_epi32, _mm_max_epu32 from gcc/testsuite/gcc.target/i386. sse4_1-pmaxsb.c and sse4_1-pminsb.c were modified from using "char" types to "signed char" types, because the default is unsigned on powerpc. Where tested, do you want backports, etc... 2021-08-09 Paul A. Clarke gcc * config/rs6000/smmintrin.h (_mm_min_epi8, _mm_min_epu16, _mm_min_epi32, _mm_min_epu32, _mm_max_epi8, _mm_max_epu16, _mm_max_epi32, _mm_max_epu32): New. gcc/testsuite * gcc.target/powerpc/sse4_1-pmaxsb.c: Copy from gcc.target/i386. * gcc.target/powerpc/sse4_1-pmaxsd.c: Same. * gcc.target/powerpc/sse4_1-pmaxud.c: Same. * gcc.target/powerpc/sse4_1-pmaxuw.c: Same. * gcc.target/powerpc/sse4_1-pminsb.c: Same. * gcc.target/powerpc/sse4_1-pminsd.c: Same. * gcc.target/powerpc/sse4_1-pminud.c: Same. * gcc.target/powerpc/sse4_1-pminuw.c: Same. --- gcc/config/rs6000/smmintrin.h | 56 +++ .../gcc.target/powerpc/sse4_1-pmaxsb.c| 46 +++ .../gcc.target/powerpc/sse4_1-pmaxsd.c| 46 +++ .../gcc.target/powerpc/sse4_1-pmaxud.c| 47 .../gcc.target/powerpc/sse4_1-pmaxuw.c| 47 .../gcc.target/powerpc/sse4_1-pminsb.c| 46 +++ .../gcc.target/powerpc/sse4_1-pminsd.c| 46 +++ .../gcc.target/powerpc/sse4_1-pminud.c| 47 .../gcc.target/powerpc/sse4_1-pminuw.c| 47 9 files changed, 428 insertions(+) create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmaxsb.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmaxsd.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmaxud.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmaxuw.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pminsb.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pminsd.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pminud.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pminuw.c diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h index 862e78ac7d60..f7f03d8d7782 100644 --- a/gcc/config/rs6000/smmintrin.h +++ b/gcc/config/rs6000/smmintrin.h @@ -414,6 +414,62 @@ _mm_test_mix_ones_zeros (__m128i __A, __m128i __mask) return any_ones * any_zeros; } +__inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_min_epi8 (__m128i __X, __m128i __Y) +{ + return (__m128i) vec_min ((__v16qi)__X, (__v16qi)__Y); +} + +__inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_min_epu16 (__m128i __X, __m128i __Y) +{ + return (__m128i) vec_min ((__v8hu)__X, (__v8hu)__Y); +} + +__inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_min_epi32 (__m128i __X, __m128i __Y) +{ + return (__m128i) vec_min ((__v4si)__X, (__v4si)__Y); +} + +__inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_min_epu32 (__m128i __X, __m128i __Y) +{ + return (__m128i) vec_min ((__v4su)__X, (__v4su)__Y); +} + +__inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_max_epi8 (__m128i __X, __m128i __Y) +{ + return (__m128i) vec_max ((__v16qi)__X, (__v16qi)__Y); +} + +__inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_max_epu16 (__m128i __X, __m128i __Y) +{ + return (__m128i) vec_max ((__v8hu)__X, (__v8hu)__Y); +} + +__inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_max_epi32 (__m128i __X, __m128i __Y) +{ + return (__m128i) vec_max ((__v4si)__X, (__v4si)__Y); +} + +__inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_max_epu32 (__m128i __X, __m128i __Y) +{ + return (__m128i) vec_max ((__v4su)__X, (__v4su)__Y); +} + I guess these are all integers, so we don't have to worry about fast-math semantics. OK. /* Return horizontal packed word minimum and its index in bits [15:0] and bits [18:16] respectively. */ __inline __m128i diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmaxsb.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmaxsb.c new file mode 100644 index ..24a74da309b0 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmaxsb.c @@ -0,0 +1,46 @@ +/* { dg-do run } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-O2 -mvsx -Wno-psabi" } */ Please check/document -Wno-psabi. Otherwise the patch looks fine to me. I won't hold you responsible for style issues in the x86 tests. :-) Thanks! Bill + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif
[PATCH 2/6] rs6000: Support SSE4.1 "min" and "max" intrinsics
Also, copy tests for _mm_min_epi8, _mm_min_epu16, _mm_min_epi32, _mm_min_epu32, _mm_max_epi8, _mm_max_epu16, _mm_max_epi32, _mm_max_epu32 from gcc/testsuite/gcc.target/i386. sse4_1-pmaxsb.c and sse4_1-pminsb.c were modified from using "char" types to "signed char" types, because the default is unsigned on powerpc. 2021-08-09 Paul A. Clarke gcc * config/rs6000/smmintrin.h (_mm_min_epi8, _mm_min_epu16, _mm_min_epi32, _mm_min_epu32, _mm_max_epi8, _mm_max_epu16, _mm_max_epi32, _mm_max_epu32): New. gcc/testsuite * gcc.target/powerpc/sse4_1-pmaxsb.c: Copy from gcc.target/i386. * gcc.target/powerpc/sse4_1-pmaxsd.c: Same. * gcc.target/powerpc/sse4_1-pmaxud.c: Same. * gcc.target/powerpc/sse4_1-pmaxuw.c: Same. * gcc.target/powerpc/sse4_1-pminsb.c: Same. * gcc.target/powerpc/sse4_1-pminsd.c: Same. * gcc.target/powerpc/sse4_1-pminud.c: Same. * gcc.target/powerpc/sse4_1-pminuw.c: Same. --- gcc/config/rs6000/smmintrin.h | 56 +++ .../gcc.target/powerpc/sse4_1-pmaxsb.c| 46 +++ .../gcc.target/powerpc/sse4_1-pmaxsd.c| 46 +++ .../gcc.target/powerpc/sse4_1-pmaxud.c| 47 .../gcc.target/powerpc/sse4_1-pmaxuw.c| 47 .../gcc.target/powerpc/sse4_1-pminsb.c| 46 +++ .../gcc.target/powerpc/sse4_1-pminsd.c| 46 +++ .../gcc.target/powerpc/sse4_1-pminud.c| 47 .../gcc.target/powerpc/sse4_1-pminuw.c| 47 9 files changed, 428 insertions(+) create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmaxsb.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmaxsd.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmaxud.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmaxuw.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pminsb.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pminsd.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pminud.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pminuw.c diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h index 862e78ac7d60..f7f03d8d7782 100644 --- a/gcc/config/rs6000/smmintrin.h +++ b/gcc/config/rs6000/smmintrin.h @@ -414,6 +414,62 @@ _mm_test_mix_ones_zeros (__m128i __A, __m128i __mask) return any_ones * any_zeros; } +__inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_min_epi8 (__m128i __X, __m128i __Y) +{ + return (__m128i) vec_min ((__v16qi)__X, (__v16qi)__Y); +} + +__inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_min_epu16 (__m128i __X, __m128i __Y) +{ + return (__m128i) vec_min ((__v8hu)__X, (__v8hu)__Y); +} + +__inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_min_epi32 (__m128i __X, __m128i __Y) +{ + return (__m128i) vec_min ((__v4si)__X, (__v4si)__Y); +} + +__inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_min_epu32 (__m128i __X, __m128i __Y) +{ + return (__m128i) vec_min ((__v4su)__X, (__v4su)__Y); +} + +__inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_max_epi8 (__m128i __X, __m128i __Y) +{ + return (__m128i) vec_max ((__v16qi)__X, (__v16qi)__Y); +} + +__inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_max_epu16 (__m128i __X, __m128i __Y) +{ + return (__m128i) vec_max ((__v8hu)__X, (__v8hu)__Y); +} + +__inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_max_epi32 (__m128i __X, __m128i __Y) +{ + return (__m128i) vec_max ((__v4si)__X, (__v4si)__Y); +} + +__inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_max_epu32 (__m128i __X, __m128i __Y) +{ + return (__m128i) vec_max ((__v4su)__X, (__v4su)__Y); +} + /* Return horizontal packed word minimum and its index in bits [15:0] and bits [18:16] respectively. */ __inline __m128i diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmaxsb.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmaxsb.c new file mode 100644 index ..24a74da309b0 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmaxsb.c @@ -0,0 +1,46 @@ +/* { dg-do run } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-O2 -mvsx -Wno-psabi" } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +#include + +#define NUM 1024 + +static void +TEST (void) +{ + union +{ + __m128i x[NUM / 16]; + signed char i[NUM]; +} dst, src1, src2; + int i, sign = 1; + signed char max; + + for (i = 0; i < NUM; i++) +{ + src1.i[i] = i * i * sign; + src2.i[i] = (i + 20) * sign; + sign = -sign;