[PATCH v3 4/6] rs6000: Support SSE4.1 "cvt" intrinsics
Function signatures and decorations match gcc/config/i386/smmintrin.h. Also, copy tests for: - _mm_cvtepi8_epi16, _mm_cvtepi8_epi32, _mm_cvtepi8_epi64 - _mm_cvtepi16_epi32, _mm_cvtepi16_epi64 - _mm_cvtepi32_epi64, - _mm_cvtepu8_epi16, _mm_cvtepu8_epi32, _mm_cvtepu8_epi64 - _mm_cvtepu16_epi32, _mm_cvtepu16_epi64 - _mm_cvtepu32_epi64 from gcc/testsuite/gcc.target/i386. sse4_1-pmovsxbd.c, sse4_1-pmovsxbq.c, and sse4_1-pmovsxbw.c were modified from using "char" types to "signed char" types, because the default is unsigned on powerpc. 2021-08-20 Paul A. Clarke gcc * config/rs6000/smmintrin.h (_mm_cvtepi8_epi16, _mm_cvtepi8_epi32, _mm_cvtepi8_epi64, _mm_cvtepi16_epi32, _mm_cvtepi16_epi64, _mm_cvtepi32_epi64, _mm_cvtepu8_epi16, _mm_cvtepu8_epi32, _mm_cvtepu8_epi64, _mm_cvtepu16_epi32, _mm_cvtepu16_epi64, _mm_cvtepu32_epi64): New. gcc/testsuite * gcc.target/powerpc/sse4_1-pmovsxbd.c: Copy from gcc.target/i386, adjust dg directives to suit. * gcc.target/powerpc/sse4_1-pmovsxbq.c: Same. * gcc.target/powerpc/sse4_1-pmovsxbw.c: Same. * gcc.target/powerpc/sse4_1-pmovsxdq.c: Same. * gcc.target/powerpc/sse4_1-pmovsxwd.c: Same. * gcc.target/powerpc/sse4_1-pmovsxwq.c: Same. * gcc.target/powerpc/sse4_1-pmovzxbd.c: Same. * gcc.target/powerpc/sse4_1-pmovzxbq.c: Same. * gcc.target/powerpc/sse4_1-pmovzxbw.c: Same. * gcc.target/powerpc/sse4_1-pmovzxdq.c: Same. * gcc.target/powerpc/sse4_1-pmovzxwd.c: Same. * gcc.target/powerpc/sse4_1-pmovzxwq.c: Same. --- v3: No change. v2: - Added "extern" to functions to maintain compatible decorations with like implementations in gcc/config/i386. - Removed "-Wno-psabi" from tests as unnecessary, per v1 review. - Noted testing in patch series cover letter. gcc/config/rs6000/smmintrin.h | 138 ++ .../gcc.target/powerpc/sse4_1-pmovsxbd.c | 42 ++ .../gcc.target/powerpc/sse4_1-pmovsxbq.c | 42 ++ .../gcc.target/powerpc/sse4_1-pmovsxbw.c | 42 ++ .../gcc.target/powerpc/sse4_1-pmovsxdq.c | 42 ++ .../gcc.target/powerpc/sse4_1-pmovsxwd.c | 42 ++ .../gcc.target/powerpc/sse4_1-pmovsxwq.c | 42 ++ .../gcc.target/powerpc/sse4_1-pmovzxbd.c | 43 ++ .../gcc.target/powerpc/sse4_1-pmovzxbq.c | 43 ++ .../gcc.target/powerpc/sse4_1-pmovzxbw.c | 43 ++ .../gcc.target/powerpc/sse4_1-pmovzxdq.c | 43 ++ .../gcc.target/powerpc/sse4_1-pmovzxwd.c | 43 ++ .../gcc.target/powerpc/sse4_1-pmovzxwq.c | 43 ++ 13 files changed, 648 insertions(+) create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbd.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbq.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbw.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxdq.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxwd.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxwq.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbd.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbq.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbw.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxdq.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxwd.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxwq.c diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h index 363534cb06a2..fdef6674d16c 100644 --- a/gcc/config/rs6000/smmintrin.h +++ b/gcc/config/rs6000/smmintrin.h @@ -442,6 +442,144 @@ _mm_max_epu32 (__m128i __X, __m128i __Y) return (__m128i) vec_max ((__v4su)__X, (__v4su)__Y); } +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtepi8_epi16 (__m128i __A) +{ + return (__m128i) vec_unpackh ((__v16qi)__A); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtepi8_epi32 (__m128i __A) +{ + __A = (__m128i) vec_unpackh ((__v16qi)__A); + return (__m128i) vec_unpackh ((__v8hi)__A); +} + +#ifdef _ARCH_PWR8 +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtepi8_epi64 (__m128i __A) +{ + __A = (__m128i) vec_unpackh ((__v16qi)__A); + __A = (__m128i) vec_unpackh ((__v8hi)__A); + return (__m128i) vec_unpackh ((__v4si)__A); +} +#endif + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtepi16_epi32 (__m128i __A) +{ + return (__m128i) vec_unpackh ((__v8hi)__A); +} + +#ifdef _ARCH_PWR8 +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtepi16_epi64 (__m128i __A) +{ + __A = (__m128i) vec_unpackh ((__v8hi)__A); + return (__m128i) vec_unpackh ((__v4si)__A); +} +#endif +
Re: [PATCH v3 4/6] rs6000: Support SSE4.1 "cvt" intrinsics
This looks fine, recommend approval. Thanks! Bill On 8/23/21 2:03 PM, Paul A. Clarke wrote: Function signatures and decorations match gcc/config/i386/smmintrin.h. Also, copy tests for: - _mm_cvtepi8_epi16, _mm_cvtepi8_epi32, _mm_cvtepi8_epi64 - _mm_cvtepi16_epi32, _mm_cvtepi16_epi64 - _mm_cvtepi32_epi64, - _mm_cvtepu8_epi16, _mm_cvtepu8_epi32, _mm_cvtepu8_epi64 - _mm_cvtepu16_epi32, _mm_cvtepu16_epi64 - _mm_cvtepu32_epi64 from gcc/testsuite/gcc.target/i386. sse4_1-pmovsxbd.c, sse4_1-pmovsxbq.c, and sse4_1-pmovsxbw.c were modified from using "char" types to "signed char" types, because the default is unsigned on powerpc. 2021-08-20 Paul A. Clarke gcc * config/rs6000/smmintrin.h (_mm_cvtepi8_epi16, _mm_cvtepi8_epi32, _mm_cvtepi8_epi64, _mm_cvtepi16_epi32, _mm_cvtepi16_epi64, _mm_cvtepi32_epi64, _mm_cvtepu8_epi16, _mm_cvtepu8_epi32, _mm_cvtepu8_epi64, _mm_cvtepu16_epi32, _mm_cvtepu16_epi64, _mm_cvtepu32_epi64): New. gcc/testsuite * gcc.target/powerpc/sse4_1-pmovsxbd.c: Copy from gcc.target/i386, adjust dg directives to suit. * gcc.target/powerpc/sse4_1-pmovsxbq.c: Same. * gcc.target/powerpc/sse4_1-pmovsxbw.c: Same. * gcc.target/powerpc/sse4_1-pmovsxdq.c: Same. * gcc.target/powerpc/sse4_1-pmovsxwd.c: Same. * gcc.target/powerpc/sse4_1-pmovsxwq.c: Same. * gcc.target/powerpc/sse4_1-pmovzxbd.c: Same. * gcc.target/powerpc/sse4_1-pmovzxbq.c: Same. * gcc.target/powerpc/sse4_1-pmovzxbw.c: Same. * gcc.target/powerpc/sse4_1-pmovzxdq.c: Same. * gcc.target/powerpc/sse4_1-pmovzxwd.c: Same. * gcc.target/powerpc/sse4_1-pmovzxwq.c: Same. --- v3: No change. v2: - Added "extern" to functions to maintain compatible decorations with like implementations in gcc/config/i386. - Removed "-Wno-psabi" from tests as unnecessary, per v1 review. - Noted testing in patch series cover letter. gcc/config/rs6000/smmintrin.h | 138 ++ .../gcc.target/powerpc/sse4_1-pmovsxbd.c | 42 ++ .../gcc.target/powerpc/sse4_1-pmovsxbq.c | 42 ++ .../gcc.target/powerpc/sse4_1-pmovsxbw.c | 42 ++ .../gcc.target/powerpc/sse4_1-pmovsxdq.c | 42 ++ .../gcc.target/powerpc/sse4_1-pmovsxwd.c | 42 ++ .../gcc.target/powerpc/sse4_1-pmovsxwq.c | 42 ++ .../gcc.target/powerpc/sse4_1-pmovzxbd.c | 43 ++ .../gcc.target/powerpc/sse4_1-pmovzxbq.c | 43 ++ .../gcc.target/powerpc/sse4_1-pmovzxbw.c | 43 ++ .../gcc.target/powerpc/sse4_1-pmovzxdq.c | 43 ++ .../gcc.target/powerpc/sse4_1-pmovzxwd.c | 43 ++ .../gcc.target/powerpc/sse4_1-pmovzxwq.c | 43 ++ 13 files changed, 648 insertions(+) create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbd.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbq.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxbw.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxdq.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxwd.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovsxwq.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbd.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbq.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxbw.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxdq.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxwd.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pmovzxwq.c diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h index 363534cb06a2..fdef6674d16c 100644 --- a/gcc/config/rs6000/smmintrin.h +++ b/gcc/config/rs6000/smmintrin.h @@ -442,6 +442,144 @@ _mm_max_epu32 (__m128i __X, __m128i __Y) return (__m128i) vec_max ((__v4su)__X, (__v4su)__Y); } +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtepi8_epi16 (__m128i __A) +{ + return (__m128i) vec_unpackh ((__v16qi)__A); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtepi8_epi32 (__m128i __A) +{ + __A = (__m128i) vec_unpackh ((__v16qi)__A); + return (__m128i) vec_unpackh ((__v8hi)__A); +} + +#ifdef _ARCH_PWR8 +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtepi8_epi64 (__m128i __A) +{ + __A = (__m128i) vec_unpackh ((__v16qi)__A); + __A = (__m128i) vec_unpackh ((__v8hi)__A); + return (__m128i) vec_unpackh ((__v4si)__A); +} +#endif + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtepi16_epi32 (__m128i __A) +{ + return (__m128i) vec_unpackh ((__v8hi)__A); +} + +#ifdef _ARCH_PWR8 +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtepi16_ep
Re: [PATCH v3 4/6] rs6000: Support SSE4.1 "cvt" intrinsics
Hi! On Mon, Aug 23, 2021 at 02:03:08PM -0500, Paul A. Clarke wrote: > gcc > * config/rs6000/smmintrin.h (_mm_cvtepi8_epi16, _mm_cvtepi8_epi32, > _mm_cvtepi8_epi64, _mm_cvtepi16_epi32, _mm_cvtepi16_epi64, > _mm_cvtepi32_epi64, _mm_cvtepu8_epi16, _mm_cvtepu8_epi32, > _mm_cvtepu8_epi64, _mm_cvtepu16_epi32, _mm_cvtepu16_epi64, > _mm_cvtepu32_epi64): New. > > gcc/testsuite > * gcc.target/powerpc/sse4_1-pmovsxbd.c: Copy from gcc.target/i386, > adjust dg directives to suit. > * gcc.target/powerpc/sse4_1-pmovsxbq.c: Same. > * gcc.target/powerpc/sse4_1-pmovsxbw.c: Same. > * gcc.target/powerpc/sse4_1-pmovsxdq.c: Same. > * gcc.target/powerpc/sse4_1-pmovsxwd.c: Same. > * gcc.target/powerpc/sse4_1-pmovsxwq.c: Same. > * gcc.target/powerpc/sse4_1-pmovzxbd.c: Same. > * gcc.target/powerpc/sse4_1-pmovzxbq.c: Same. > * gcc.target/powerpc/sse4_1-pmovzxbw.c: Same. > * gcc.target/powerpc/sse4_1-pmovzxdq.c: Same. > * gcc.target/powerpc/sse4_1-pmovzxwd.c: Same. > * gcc.target/powerpc/sse4_1-pmovzxwq.c: Same. > +extern __inline __m128i > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_cvtepi8_epi16 (__m128i __A) > +{ > + return (__m128i) vec_unpackh ((__v16qi)__A); > +} This strange mixture of sometimes writing a cast with a space and sometimes without one is... strange :-) Having up to three unpacks in a row seems suboptimal. But it certainly is aesthetically pleasing :-) > +/* { dg-do run } */ > +/* { dg-require-effective-target powerpc_vsx_ok } */ > +/* { dg-options "-O2 -mvsx" } */ Same as before here too (needs vsx_hw). Okay for trunk with that fixed. Thanks! Segher