[PATCH v3 4/6] rs6000: Support SSE4.1 "cvt" intrinsics

2021-08-23 Thread Paul A. Clarke via Gcc-patches
Function signatures and decorations match gcc/config/i386/smmintrin.h.

Also, copy tests for:
- _mm_cvtepi8_epi16, _mm_cvtepi8_epi32, _mm_cvtepi8_epi64
- _mm_cvtepi16_epi32, _mm_cvtepi16_epi64
- _mm_cvtepi32_epi64,
- _mm_cvtepu8_epi16, _mm_cvtepu8_epi32, _mm_cvtepu8_epi64
- _mm_cvtepu16_epi32, _mm_cvtepu16_epi64
- _mm_cvtepu32_epi64

from gcc/testsuite/gcc.target/i386.

sse4_1-pmovsxbd.c, sse4_1-pmovsxbq.c, and sse4_1-pmovsxbw.c were
modified from using "char" types to "signed char" types, because
the default is unsigned on powerpc.

2021-08-20  Paul A. Clarke  

gcc
* config/rs6000/smmintrin.h (_mm_cvtepi8_epi16, _mm_cvtepi8_epi32,
_mm_cvtepi8_epi64, _mm_cvtepi16_epi32, _mm_cvtepi16_epi64,
_mm_cvtepi32_epi64, _mm_cvtepu8_epi16, _mm_cvtepu8_epi32,
_mm_cvtepu8_epi64, _mm_cvtepu16_epi32, _mm_cvtepu16_epi64,
_mm_cvtepu32_epi64): New.

gcc/testsuite
* gcc.target/powerpc/sse4_1-pmovsxbd.c: Copy from gcc.target/i386,
adjust dg directives to suit.
* gcc.target/powerpc/sse4_1-pmovsxbq.c: Same.
* gcc.target/powerpc/sse4_1-pmovsxbw.c: Same.
* gcc.target/powerpc/sse4_1-pmovsxdq.c: Same.
* gcc.target/powerpc/sse4_1-pmovsxwd.c: Same.
* gcc.target/powerpc/sse4_1-pmovsxwq.c: Same.
* gcc.target/powerpc/sse4_1-pmovzxbd.c: Same.
* gcc.target/powerpc/sse4_1-pmovzxbq.c: Same.
* gcc.target/powerpc/sse4_1-pmovzxbw.c: Same.
* gcc.target/powerpc/sse4_1-pmovzxdq.c: Same.
* gcc.target/powerpc/sse4_1-pmovzxwd.c: Same.
* gcc.target/powerpc/sse4_1-pmovzxwq.c: Same.
---
v3: No change.
v2:
- Added "extern" to functions to maintain compatible decorations with
  like implementations in gcc/config/i386.
- Removed "-Wno-psabi" from tests as unnecessary, per v1 review.
- Noted testing in patch series cover letter.

 gcc/config/rs6000/smmintrin.h | 138 ++
 .../gcc.target/powerpc/sse4_1-pmovsxbd.c  |  42 ++
 .../gcc.target/powerpc/sse4_1-pmovsxbq.c  |  42 ++
 .../gcc.target/powerpc/sse4_1-pmovsxbw.c  |  42 ++
 .../gcc.target/powerpc/sse4_1-pmovsxdq.c  |  42 ++
 .../gcc.target/powerpc/sse4_1-pmovsxwd.c  |  42 ++
 .../gcc.target/powerpc/sse4_1-pmovsxwq.c  |  42 ++
 .../gcc.target/powerpc/sse4_1-pmovzxbd.c  |  43 ++
 .../gcc.target/powerpc/sse4_1-pmovzxbq.c  |  43 ++
 .../gcc.target/powerpc/sse4_1-pmovzxbw.c  |  43 ++
 .../gcc.target/powerpc/sse4_1-pmovzxdq.c  |  43 ++
 .../gcc.target/powerpc/sse4_1-pmovzxwd.c  |  43 ++
 .../gcc.target/powerpc/sse4_1-pmovzxwq.c  |  43 ++
 13 files changed, 648 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbd.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbq.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbw.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxdq.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxwd.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxwq.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbd.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbq.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbw.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxdq.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxwd.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxwq.c

diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h
index 363534cb06a2..fdef6674d16c 100644
--- a/gcc/config/rs6000/smmintrin.h
+++ b/gcc/config/rs6000/smmintrin.h
@@ -442,6 +442,144 @@ _mm_max_epu32 (__m128i __X, __m128i __Y)
   return (__m128i) vec_max ((__v4su)__X, (__v4su)__Y);
 }
 
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi8_epi16 (__m128i __A)
+{
+  return (__m128i) vec_unpackh ((__v16qi)__A);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi8_epi32 (__m128i __A)
+{
+  __A = (__m128i) vec_unpackh ((__v16qi)__A);
+  return (__m128i) vec_unpackh ((__v8hi)__A);
+}
+
+#ifdef _ARCH_PWR8
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi8_epi64 (__m128i __A)
+{
+  __A = (__m128i) vec_unpackh ((__v16qi)__A);
+  __A = (__m128i) vec_unpackh ((__v8hi)__A);
+  return (__m128i) vec_unpackh ((__v4si)__A);
+}
+#endif
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi16_epi32 (__m128i __A)
+{
+  return (__m128i) vec_unpackh ((__v8hi)__A);
+}
+
+#ifdef _ARCH_PWR8
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi16_epi64 (__m128i __A)
+{
+  __A = (__m128i) vec_unpackh ((__v8hi)__A);
+  return (__m128i) vec_unpackh ((__v4si)__A);
+}
+#endif
+

Re: [PATCH v3 4/6] rs6000: Support SSE4.1 "cvt" intrinsics

2021-08-27 Thread Bill Schmidt via Gcc-patches

This looks fine, recommend approval.

Thanks!
Bill

On 8/23/21 2:03 PM, Paul A. Clarke wrote:

Function signatures and decorations match gcc/config/i386/smmintrin.h.

Also, copy tests for:
- _mm_cvtepi8_epi16, _mm_cvtepi8_epi32, _mm_cvtepi8_epi64
- _mm_cvtepi16_epi32, _mm_cvtepi16_epi64
- _mm_cvtepi32_epi64,
- _mm_cvtepu8_epi16, _mm_cvtepu8_epi32, _mm_cvtepu8_epi64
- _mm_cvtepu16_epi32, _mm_cvtepu16_epi64
- _mm_cvtepu32_epi64

from gcc/testsuite/gcc.target/i386.

sse4_1-pmovsxbd.c, sse4_1-pmovsxbq.c, and sse4_1-pmovsxbw.c were
modified from using "char" types to "signed char" types, because
the default is unsigned on powerpc.

2021-08-20  Paul A. Clarke  

gcc
* config/rs6000/smmintrin.h (_mm_cvtepi8_epi16, _mm_cvtepi8_epi32,
_mm_cvtepi8_epi64, _mm_cvtepi16_epi32, _mm_cvtepi16_epi64,
_mm_cvtepi32_epi64, _mm_cvtepu8_epi16, _mm_cvtepu8_epi32,
_mm_cvtepu8_epi64, _mm_cvtepu16_epi32, _mm_cvtepu16_epi64,
_mm_cvtepu32_epi64): New.

gcc/testsuite
* gcc.target/powerpc/sse4_1-pmovsxbd.c: Copy from gcc.target/i386,
adjust dg directives to suit.
* gcc.target/powerpc/sse4_1-pmovsxbq.c: Same.
* gcc.target/powerpc/sse4_1-pmovsxbw.c: Same.
* gcc.target/powerpc/sse4_1-pmovsxdq.c: Same.
* gcc.target/powerpc/sse4_1-pmovsxwd.c: Same.
* gcc.target/powerpc/sse4_1-pmovsxwq.c: Same.
* gcc.target/powerpc/sse4_1-pmovzxbd.c: Same.
* gcc.target/powerpc/sse4_1-pmovzxbq.c: Same.
* gcc.target/powerpc/sse4_1-pmovzxbw.c: Same.
* gcc.target/powerpc/sse4_1-pmovzxdq.c: Same.
* gcc.target/powerpc/sse4_1-pmovzxwd.c: Same.
* gcc.target/powerpc/sse4_1-pmovzxwq.c: Same.
---
v3: No change.
v2:
- Added "extern" to functions to maintain compatible decorations with
   like implementations in gcc/config/i386.
- Removed "-Wno-psabi" from tests as unnecessary, per v1 review.
- Noted testing in patch series cover letter.

  gcc/config/rs6000/smmintrin.h | 138 ++
  .../gcc.target/powerpc/sse4_1-pmovsxbd.c  |  42 ++
  .../gcc.target/powerpc/sse4_1-pmovsxbq.c  |  42 ++
  .../gcc.target/powerpc/sse4_1-pmovsxbw.c  |  42 ++
  .../gcc.target/powerpc/sse4_1-pmovsxdq.c  |  42 ++
  .../gcc.target/powerpc/sse4_1-pmovsxwd.c  |  42 ++
  .../gcc.target/powerpc/sse4_1-pmovsxwq.c  |  42 ++
  .../gcc.target/powerpc/sse4_1-pmovzxbd.c  |  43 ++
  .../gcc.target/powerpc/sse4_1-pmovzxbq.c  |  43 ++
  .../gcc.target/powerpc/sse4_1-pmovzxbw.c  |  43 ++
  .../gcc.target/powerpc/sse4_1-pmovzxdq.c  |  43 ++
  .../gcc.target/powerpc/sse4_1-pmovzxwd.c  |  43 ++
  .../gcc.target/powerpc/sse4_1-pmovzxwq.c  |  43 ++
  13 files changed, 648 insertions(+)
  create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbd.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbq.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbw.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxdq.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxwd.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxwq.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbd.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbq.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbw.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxdq.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxwd.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxwq.c

diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h
index 363534cb06a2..fdef6674d16c 100644
--- a/gcc/config/rs6000/smmintrin.h
+++ b/gcc/config/rs6000/smmintrin.h
@@ -442,6 +442,144 @@ _mm_max_epu32 (__m128i __X, __m128i __Y)
return (__m128i) vec_max ((__v4su)__X, (__v4su)__Y);
  }
  
+extern __inline __m128i

+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi8_epi16 (__m128i __A)
+{
+  return (__m128i) vec_unpackh ((__v16qi)__A);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi8_epi32 (__m128i __A)
+{
+  __A = (__m128i) vec_unpackh ((__v16qi)__A);
+  return (__m128i) vec_unpackh ((__v8hi)__A);
+}
+
+#ifdef _ARCH_PWR8
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi8_epi64 (__m128i __A)
+{
+  __A = (__m128i) vec_unpackh ((__v16qi)__A);
+  __A = (__m128i) vec_unpackh ((__v8hi)__A);
+  return (__m128i) vec_unpackh ((__v4si)__A);
+}
+#endif
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi16_epi32 (__m128i __A)
+{
+  return (__m128i) vec_unpackh ((__v8hi)__A);
+}
+
+#ifdef _ARCH_PWR8
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi16_ep

Re: [PATCH v3 4/6] rs6000: Support SSE4.1 "cvt" intrinsics

2021-10-11 Thread Segher Boessenkool
Hi!

On Mon, Aug 23, 2021 at 02:03:08PM -0500, Paul A. Clarke wrote:
> gcc
>   * config/rs6000/smmintrin.h (_mm_cvtepi8_epi16, _mm_cvtepi8_epi32,
>   _mm_cvtepi8_epi64, _mm_cvtepi16_epi32, _mm_cvtepi16_epi64,
>   _mm_cvtepi32_epi64, _mm_cvtepu8_epi16, _mm_cvtepu8_epi32,
>   _mm_cvtepu8_epi64, _mm_cvtepu16_epi32, _mm_cvtepu16_epi64,
>   _mm_cvtepu32_epi64): New.
> 
> gcc/testsuite
>   * gcc.target/powerpc/sse4_1-pmovsxbd.c: Copy from gcc.target/i386,
>   adjust dg directives to suit.
>   * gcc.target/powerpc/sse4_1-pmovsxbq.c: Same.
>   * gcc.target/powerpc/sse4_1-pmovsxbw.c: Same.
>   * gcc.target/powerpc/sse4_1-pmovsxdq.c: Same.
>   * gcc.target/powerpc/sse4_1-pmovsxwd.c: Same.
>   * gcc.target/powerpc/sse4_1-pmovsxwq.c: Same.
>   * gcc.target/powerpc/sse4_1-pmovzxbd.c: Same.
>   * gcc.target/powerpc/sse4_1-pmovzxbq.c: Same.
>   * gcc.target/powerpc/sse4_1-pmovzxbw.c: Same.
>   * gcc.target/powerpc/sse4_1-pmovzxdq.c: Same.
>   * gcc.target/powerpc/sse4_1-pmovzxwd.c: Same.
>   * gcc.target/powerpc/sse4_1-pmovzxwq.c: Same.

> +extern __inline __m128i
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cvtepi8_epi16 (__m128i __A)
> +{
> +  return (__m128i) vec_unpackh ((__v16qi)__A);
> +}

This strange mixture of sometimes writing a cast with a space and
sometimes without one is...  strange :-)

Having up to three unpacks in a row seems suboptimal.  But it certainly
is aesthetically pleasing :-)

> +/* { dg-do run } */
> +/* { dg-require-effective-target powerpc_vsx_ok } */
> +/* { dg-options "-O2 -mvsx" } */

Same as before here too (needs vsx_hw).

Okay for trunk with that fixed.  Thanks!


Segher