On Fri, Oct 15, 2021 at 4:38 PM dianhong.xu--- via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > From: dianhong xu <dianhong...@intel.com> > > Add *_set1_pch (_Float16 _Complex A) intrinsics. > > gcc/ChangeLog: > > * config/i386/avx512fp16intrin.h: > (_mm512_set1_pch): New intrinsic. > * config/i386/avx512fp16vlintrin.h: > (_mm256_set1_pch): New intrinsic. > (_mm_set1_pch): Ditto. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/avx512fp16-set1-pch-1a.c: New test. > * gcc.target/i386/avx512fp16-set1-pch-1b.c: New test. > * gcc.target/i386/avx512fp16vl-set1-pch-1a.c: New test. > * gcc.target/i386/avx512fp16vl-set1-pch-1b.c: New test. LGTM. > --- > gcc/config/i386/avx512fp16intrin.h | 13 +++++ > gcc/config/i386/avx512fp16vlintrin.h | 26 +++++++++ > .../gcc.target/i386/avx512fp16-set1-pch-1a.c | 13 +++++ > .../gcc.target/i386/avx512fp16-set1-pch-1b.c | 42 ++++++++++++++ > .../i386/avx512fp16vl-set1-pch-1a.c | 20 +++++++ > .../i386/avx512fp16vl-set1-pch-1b.c | 57 +++++++++++++++++++ > 6 files changed, 171 insertions(+) > create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-set1-pch-1a.c > create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-set1-pch-1b.c > create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16vl-set1-pch-1a.c > create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16vl-set1-pch-1b.c > > diff --git a/gcc/config/i386/avx512fp16intrin.h > b/gcc/config/i386/avx512fp16intrin.h > index 079ce321c01..17025d68b8e 100644 > --- a/gcc/config/i386/avx512fp16intrin.h > +++ b/gcc/config/i386/avx512fp16intrin.h > @@ -7237,6 +7237,19 @@ _mm512_permutexvar_ph (__m512i __A, __m512h __B) > (__mmask32)-1); > } > > +extern __inline __m512h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm512_set1_pch (_Float16 _Complex __A) > +{ > + union > + { > + _Float16 _Complex a; > + float b; > + } u = { .a = __A}; > + > + return (__m512h) _mm512_set1_ps (u.b); > +} > + > #ifdef __DISABLE_AVX512FP16__ > #undef __DISABLE_AVX512FP16__ > #pragma GCC pop_options > diff --git a/gcc/config/i386/avx512fp16vlintrin.h > b/gcc/config/i386/avx512fp16vlintrin.h > index f83a429ba43..1de4513d7f1 100644 > --- a/gcc/config/i386/avx512fp16vlintrin.h > +++ b/gcc/config/i386/avx512fp16vlintrin.h > @@ -3315,6 +3315,32 @@ _mm_permutexvar_ph (__m128i __A, __m128h __B) > (__mmask8)-1); > } > > +extern __inline __m256h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm256_set1_pch (_Float16 _Complex __A) > +{ > + union > + { > + _Float16 _Complex a; > + float b; > + } u = { .a = __A }; > + > + return (__m256h) _mm256_set1_ps (u.b); > +} > + > +extern __inline __m128h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_set1_pch (_Float16 _Complex __A) > +{ > + union > + { > + _Float16 _Complex a; > + float b; > + } u = { .a = __A }; > + > + return (__m128h) _mm_set1_ps (u.b); > +} > + > #ifdef __DISABLE_AVX512FP16VL__ > #undef __DISABLE_AVX512FP16VL__ > #pragma GCC pop_options > diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-set1-pch-1a.c > b/gcc/testsuite/gcc.target/i386/avx512fp16-set1-pch-1a.c > new file mode 100644 > index 00000000000..0055193f243 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-set1-pch-1a.c > @@ -0,0 +1,13 @@ > +/* { dg-do compile} */ > +/* { dg-options "-O2 -mavx512fp16" } */ > + > +#include <immintrin.h> > + > +__m512h > +__attribute__ ((noinline, noclone)) > +test_mm512_set1_pch (_Float16 _Complex A) > +{ > + return _mm512_set1_pch(A); > +} > + > +/* { dg-final { scan-assembler "vbroadcastss\[ \\t\]+\[^\n\r\]*%zmm\[01\]" } > } */ > diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-set1-pch-1b.c > b/gcc/testsuite/gcc.target/i386/avx512fp16-set1-pch-1b.c > new file mode 100644 > index 00000000000..450d7e37237 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-set1-pch-1b.c > @@ -0,0 +1,42 @@ > +/* { dg-do run { target avx512fp16 } } */ > +/* { dg-options "-O2 -mavx512fp16" } */ > + > +#include<stdio.h> > +#include <math.h> > +#include <complex.h> > + > +static void do_test (void); > + > +#define DO_TEST do_test > +#define AVX512FP16 > + > +#include <immintrin.h> > +#include "avx512-check.h" > + > +static void > +do_test (void) > +{ > + _Float16 _Complex fc = 1.0 + 1.0*I; > + union > + { > + _Float16 _Complex a; > + float b; > + } u = { .a = fc }; > + float ff= u.b; > + > + typedef union > + { > + float fp[16]; > + __m512h m512h; > + } u1; > + > + __m512h test512 = _mm512_set1_pch(fc); > + > + u1 test; > + test.m512h = test512; > + for (int i = 0; i<16; i++) > + { > + if (test.fp[i] != ff) abort(); > + } > + > +} > diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-set1-pch-1a.c > b/gcc/testsuite/gcc.target/i386/avx512fp16vl-set1-pch-1a.c > new file mode 100644 > index 00000000000..4c5624f9935 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-set1-pch-1a.c > @@ -0,0 +1,20 @@ > +/* { dg-do compile} */ > +/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */ > + > +#include <immintrin.h> > + > +__m256h > +__attribute__ ((noinline, noclone)) > +test_mm256_set1_pch (_Float16 _Complex A) > +{ > + return _mm256_set1_pch(A); > +} > + > +__m128h > +__attribute__ ((noinline, noclone)) > +test_mm_set1_pch (_Float16 _Complex A) > +{ > + return _mm_set1_pch(A); > +} > + > +/* { dg-final { scan-assembler-times "vbroadcastss" 2 } } */ > diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-set1-pch-1b.c > b/gcc/testsuite/gcc.target/i386/avx512fp16vl-set1-pch-1b.c > new file mode 100644 > index 00000000000..aebff141821 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-set1-pch-1b.c > @@ -0,0 +1,57 @@ > +/* { dg-do run { target avx512fp16 } } */ > +/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */ > + > +#include<stdio.h> > +#include <math.h> > +#include <complex.h> > + > +static void do_test (void); > + > +#define DO_TEST do_test > +#define AVX512FP16 > + > +#include <immintrin.h> > +#include "avx512-check.h" > + > +static void > +do_test (void) > +{ > + _Float16 _Complex fc = 1.0 + 1.0*I; > + union > + { > + _Float16 _Complex a; > + float b; > + } u = { .a = fc }; > + float ff= u.b; > + > + typedef union > + { > + float fp[8]; > + __m256h m256h; > + } u1; > + > + __m256h test256 = _mm256_set1_pch(fc); > + > + u1 test1; > + test1.m256h = test256; > + for (int i = 0; i<8; i++) > + { > + if (test1.fp[i] != ff) abort(); > + } > + > + typedef union > + { > + float fp[4]; > + __m128h m128h; > + } u2; > + > + __m128h test128 = _mm_set1_pch(fc); > + > + u2 test2; > + test2.m128h = test128; > + for (int i = 0; i<4; i++) > + { > + if (test2.fp[i] != ff) abort(); > + } > + > +} > -- > 2.18.1 >
-- BR, Hongtao