Re: [PATCH 2/6] rs6000: Support SSE4.1 "min" and "max" intrinsics

2021-08-18 Thread Bill Schmidt via Gcc-patches

Hi Paul,

On 8/9/21 3:23 PM, Paul A. Clarke via Gcc-patches wrote:

Also, copy tests for _mm_min_epi8, _mm_min_epu16, _mm_min_epi32,
_mm_min_epu32, _mm_max_epi8, _mm_max_epu16, _mm_max_epi32, _mm_max_epu32
from gcc/testsuite/gcc.target/i386.

sse4_1-pmaxsb.c and sse4_1-pminsb.c were modified from using
"char" types to "signed char" types, because the default is unsigned on
powerpc.

Where tested, do you want backports, etc...


2021-08-09  Paul A. Clarke  

gcc
* config/rs6000/smmintrin.h (_mm_min_epi8, _mm_min_epu16,
_mm_min_epi32, _mm_min_epu32, _mm_max_epi8, _mm_max_epu16,
_mm_max_epi32, _mm_max_epu32): New.

gcc/testsuite
* gcc.target/powerpc/sse4_1-pmaxsb.c: Copy from gcc.target/i386.
* gcc.target/powerpc/sse4_1-pmaxsd.c: Same.
* gcc.target/powerpc/sse4_1-pmaxud.c: Same.
* gcc.target/powerpc/sse4_1-pmaxuw.c: Same.
* gcc.target/powerpc/sse4_1-pminsb.c: Same.
* gcc.target/powerpc/sse4_1-pminsd.c: Same.
* gcc.target/powerpc/sse4_1-pminud.c: Same.
* gcc.target/powerpc/sse4_1-pminuw.c: Same.
---
  gcc/config/rs6000/smmintrin.h | 56 +++
  .../gcc.target/powerpc/sse4_1-pmaxsb.c| 46 +++
  .../gcc.target/powerpc/sse4_1-pmaxsd.c| 46 +++
  .../gcc.target/powerpc/sse4_1-pmaxud.c| 47 
  .../gcc.target/powerpc/sse4_1-pmaxuw.c| 47 
  .../gcc.target/powerpc/sse4_1-pminsb.c| 46 +++
  .../gcc.target/powerpc/sse4_1-pminsd.c| 46 +++
  .../gcc.target/powerpc/sse4_1-pminud.c| 47 
  .../gcc.target/powerpc/sse4_1-pminuw.c| 47 
  9 files changed, 428 insertions(+)
  create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmaxsb.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmaxsd.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmaxud.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmaxuw.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pminsb.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pminsd.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pminud.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pminuw.c

diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h
index 862e78ac7d60..f7f03d8d7782 100644
--- a/gcc/config/rs6000/smmintrin.h
+++ b/gcc/config/rs6000/smmintrin.h
@@ -414,6 +414,62 @@ _mm_test_mix_ones_zeros (__m128i __A, __m128i __mask)
return any_ones * any_zeros;
  }

+__inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_epi8 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) vec_min ((__v16qi)__X, (__v16qi)__Y);
+}
+
+__inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_epu16 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) vec_min ((__v8hu)__X, (__v8hu)__Y);
+}
+
+__inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_epi32 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) vec_min ((__v4si)__X, (__v4si)__Y);
+}
+
+__inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_epu32 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) vec_min ((__v4su)__X, (__v4su)__Y);
+}
+
+__inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_epi8 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) vec_max ((__v16qi)__X, (__v16qi)__Y);
+}
+
+__inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_epu16 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) vec_max ((__v8hu)__X, (__v8hu)__Y);
+}
+
+__inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_epi32 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) vec_max ((__v4si)__X, (__v4si)__Y);
+}
+
+__inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_epu32 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) vec_max ((__v4su)__X, (__v4su)__Y);
+}
+



I guess these are all integers, so we don't have to worry about 
fast-math semantics.  OK.



  /* Return horizontal packed word minimum and its index in bits [15:0]
 and bits [18:16] respectively.  */
  __inline __m128i
diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmaxsb.c 
b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmaxsb.c
new file mode 100644
index ..24a74da309b0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmaxsb.c
@@ -0,0 +1,46 @@
+/* { dg-do run } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mvsx -Wno-psabi" } */



Please check/document -Wno-psabi.  Otherwise the patch looks fine to 
me.  I won't hold you responsible for style issues in the x86 tests. :-)


Thanks!
Bill


+
+#ifndef CHECK_H
+#define CHECK_H "sse4_1-check.h"
+#endif

[PATCH 2/6] rs6000: Support SSE4.1 "min" and "max" intrinsics

2021-08-09 Thread Paul A. Clarke via Gcc-patches
Also, copy tests for _mm_min_epi8, _mm_min_epu16, _mm_min_epi32,
_mm_min_epu32, _mm_max_epi8, _mm_max_epu16, _mm_max_epi32, _mm_max_epu32
from gcc/testsuite/gcc.target/i386.

sse4_1-pmaxsb.c and sse4_1-pminsb.c were modified from using
"char" types to "signed char" types, because the default is unsigned on
powerpc.

2021-08-09  Paul A. Clarke  

gcc
* config/rs6000/smmintrin.h (_mm_min_epi8, _mm_min_epu16,
_mm_min_epi32, _mm_min_epu32, _mm_max_epi8, _mm_max_epu16,
_mm_max_epi32, _mm_max_epu32): New.

gcc/testsuite
* gcc.target/powerpc/sse4_1-pmaxsb.c: Copy from gcc.target/i386.
* gcc.target/powerpc/sse4_1-pmaxsd.c: Same.
* gcc.target/powerpc/sse4_1-pmaxud.c: Same.
* gcc.target/powerpc/sse4_1-pmaxuw.c: Same.
* gcc.target/powerpc/sse4_1-pminsb.c: Same.
* gcc.target/powerpc/sse4_1-pminsd.c: Same.
* gcc.target/powerpc/sse4_1-pminud.c: Same.
* gcc.target/powerpc/sse4_1-pminuw.c: Same.
---
 gcc/config/rs6000/smmintrin.h | 56 +++
 .../gcc.target/powerpc/sse4_1-pmaxsb.c| 46 +++
 .../gcc.target/powerpc/sse4_1-pmaxsd.c| 46 +++
 .../gcc.target/powerpc/sse4_1-pmaxud.c| 47 
 .../gcc.target/powerpc/sse4_1-pmaxuw.c| 47 
 .../gcc.target/powerpc/sse4_1-pminsb.c| 46 +++
 .../gcc.target/powerpc/sse4_1-pminsd.c| 46 +++
 .../gcc.target/powerpc/sse4_1-pminud.c| 47 
 .../gcc.target/powerpc/sse4_1-pminuw.c| 47 
 9 files changed, 428 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmaxsb.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmaxsd.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmaxud.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmaxuw.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pminsb.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pminsd.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pminud.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pminuw.c

diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h
index 862e78ac7d60..f7f03d8d7782 100644
--- a/gcc/config/rs6000/smmintrin.h
+++ b/gcc/config/rs6000/smmintrin.h
@@ -414,6 +414,62 @@ _mm_test_mix_ones_zeros (__m128i __A, __m128i __mask)
   return any_ones * any_zeros;
 }
 
+__inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_epi8 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) vec_min ((__v16qi)__X, (__v16qi)__Y);
+}
+
+__inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_epu16 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) vec_min ((__v8hu)__X, (__v8hu)__Y);
+}
+
+__inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_epi32 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) vec_min ((__v4si)__X, (__v4si)__Y);
+}
+
+__inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_epu32 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) vec_min ((__v4su)__X, (__v4su)__Y);
+}
+
+__inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_epi8 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) vec_max ((__v16qi)__X, (__v16qi)__Y);
+}
+
+__inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_epu16 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) vec_max ((__v8hu)__X, (__v8hu)__Y);
+}
+
+__inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_epi32 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) vec_max ((__v4si)__X, (__v4si)__Y);
+}
+
+__inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_epu32 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) vec_max ((__v4su)__X, (__v4su)__Y);
+}
+
 /* Return horizontal packed word minimum and its index in bits [15:0]
and bits [18:16] respectively.  */
 __inline __m128i
diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pmaxsb.c 
b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmaxsb.c
new file mode 100644
index ..24a74da309b0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pmaxsb.c
@@ -0,0 +1,46 @@
+/* { dg-do run } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mvsx -Wno-psabi" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK_H
+
+#include 
+
+#define NUM 1024
+
+static void
+TEST (void)
+{
+  union
+{
+  __m128i x[NUM / 16];
+  signed char i[NUM];
+} dst, src1, src2;
+  int i, sign = 1;
+  signed char max;
+
+  for (i = 0; i < NUM; i++)
+{
+  src1.i[i] = i * i * sign;
+  src2.i[i] = (i + 20) * sign;
+  sign = -sign;