Re: [PATCH] i386: using __bf16 for AVX512BF16 intrinsics
On Fri, Oct 28, 2022 at 2:20 PM Kong, Lingling via Gcc-patches wrote: > > Hi, > > Previously we use unsigned short to represent bf16. It's not a good > expression, and at the time the front end didn't support bf16 type. > Now we introduced __bf16 to X86 psABI. So we can switch intrinsics to the new > type. > > Ok for trunk ? LGTM, but please don't commit it until next week to leave some time for others to take a look. Also please update GCC13 doc for it. https://gcc.gnu.org/gcc-13/changes.html. > > Thanks, > Lingling > > gcc/ChangeLog: > > * config/i386/avx512bf16intrin.h (__attribute__): Change short to > bf16. > (_mm_cvtsbh_ss): Ditto. > (_mm512_cvtne2ps_pbh): Ditto. > (_mm512_mask_cvtne2ps_pbh): Ditto. > (_mm512_maskz_cvtne2ps_pbh): Ditto. > * config/i386/avx512bf16vlintrin.h (__attribute__): Ditto. > (_mm256_cvtne2ps_pbh): Ditto. > (_mm256_mask_cvtne2ps_pbh): Ditto. > (_mm256_maskz_cvtne2ps_pbh): Ditto. > (_mm_cvtne2ps_pbh): Ditto. > (_mm_mask_cvtne2ps_pbh): Ditto. > (_mm_maskz_cvtne2ps_pbh): Ditto. > (_mm_cvtness_sbh): Ditto. > * config/i386/i386-builtin-types.def (V8BF): Add new > DEF_VECTOR_TYPE for BFmode. > (V16BF): Ditto. > (V32BF): Ditto. > * config/i386/i386-builtin.def (BDESC): Fixed builtins. > * config/i386/i386-expand.cc (ix86_expand_args_builtin): Changed > avx512bf16 ix86_builtin_func_type included HI to BF. > * config/i386/immintrin.h: Add SSE2 depend for avx512bf16. > * config/i386/sse.md (TARGET_AVX512VL): Changed HI vector to BF > vector. > (avx512f_cvtneps2bf16_v4sf): New define_expand. > (*avx512f_cvtneps2bf16_v4sf): New define_insn. > (avx512f_cvtneps2bf16_v4sf_maskz):Ditto. > (avx512f_cvtneps2bf16_v4sf_mask): Ditto. > (avx512f_cvtneps2bf16_v4sf_mask_1): Ditto. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/avx512bf16-cvtsbh2ss-1.c: Add fpmath option. > * gcc.target/i386/avx512bf16-vdpbf16ps-2.c: Fixed > scan-assembler. > * gcc.target/i386/avx512bf16vl-cvtness2sbh-1.c: Add x/y suffix > for vcvtneps2bf16. > * gcc.target/i386/avx512bf16vl-vcvtneps2bf16-1.c: Ditto. > --- > gcc/config/i386/avx512bf16intrin.h| 12 +-- > gcc/config/i386/avx512bf16vlintrin.h | 29 ++--- > gcc/config/i386/i386-builtin-types.def| 51 - > gcc/config/i386/i386-builtin.def | 54 +- > gcc/config/i386/i386-expand.cc| 48 - > gcc/config/i386/immintrin.h | 2 + > gcc/config/i386/sse.md| 101 ++ > .../gcc.target/i386/avx512bf16-cvtsbh2ss-1.c | 2 +- > .../gcc.target/i386/avx512bf16-vdpbf16ps-2.c | 2 +- > .../i386/avx512bf16vl-cvtness2sbh-1.c | 2 +- > .../i386/avx512bf16vl-vcvtneps2bf16-1.c | 12 +-- > 11 files changed, 189 insertions(+), 126 deletions(-) > > diff --git a/gcc/config/i386/avx512bf16intrin.h > b/gcc/config/i386/avx512bf16intrin.h > index b6e9ddad157..ea1d0125b3f 100644 > --- a/gcc/config/i386/avx512bf16intrin.h > +++ b/gcc/config/i386/avx512bf16intrin.h > @@ -35,16 +35,16 @@ > #endif /* __AVX512BF16__ */ > > /* Internal data types for implementing the intrinsics. */ > -typedef short __v32bh __attribute__ ((__vector_size__ (64))); > +typedef __bf16 __v32bf __attribute__ ((__vector_size__ (64))); > > /* The Intel API is flexible enough that we must allow aliasing with other > vector types, and their scalar components. */ > -typedef short __m512bh __attribute__ ((__vector_size__ (64), __may_alias__)); > +typedef __bf16 __m512bh __attribute__ ((__vector_size__ (64), > __may_alias__)); > > /* Convert One BF16 Data to One Single Float Data. */ > extern __inline float > __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > -_mm_cvtsbh_ss (__bfloat16 __A) > +_mm_cvtsbh_ss (__bf16 __A) > { >union{ float a; unsigned int b;} __tmp; >__tmp.b = ((unsigned int)(__A)) << 16; > @@ -57,21 +57,21 @@ extern __inline __m512bh > __attribute__((__gnu_inline__, __always_inline__, __artificial__)) > _mm512_cvtne2ps_pbh (__m512 __A, __m512 __B) > { > - return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi(__A, __B); > + return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32bf(__A, __B); > } > > extern __inline __m512bh > __attribute__((__gnu_inline__, __always_inline__, __artificial__)) > _mm512_mask_cvtne2ps_pbh (__m512bh __A, __mmask32 __B, __m512 __C, __m512 > __D) > { > - return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi_mask(__C, __D, __A, > __B); > + return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32bf_mask(__C, __D, __A, > __B); > } > > extern __inline __m512bh > __attribute__((__gnu_inline__, __always_inline__, __artificial__)) > _mm512_maskz_cvtne2ps_pbh (__mmask32 __A, __m512 __B, __m512 __C) > { > - return
[PATCH] i386: using __bf16 for AVX512BF16 intrinsics
Hi, Previously we use unsigned short to represent bf16. It's not a good expression, and at the time the front end didn't support bf16 type. Now we introduced __bf16 to X86 psABI. So we can switch intrinsics to the new type. Ok for trunk ? Thanks, Lingling gcc/ChangeLog: * config/i386/avx512bf16intrin.h (__attribute__): Change short to bf16. (_mm_cvtsbh_ss): Ditto. (_mm512_cvtne2ps_pbh): Ditto. (_mm512_mask_cvtne2ps_pbh): Ditto. (_mm512_maskz_cvtne2ps_pbh): Ditto. * config/i386/avx512bf16vlintrin.h (__attribute__): Ditto. (_mm256_cvtne2ps_pbh): Ditto. (_mm256_mask_cvtne2ps_pbh): Ditto. (_mm256_maskz_cvtne2ps_pbh): Ditto. (_mm_cvtne2ps_pbh): Ditto. (_mm_mask_cvtne2ps_pbh): Ditto. (_mm_maskz_cvtne2ps_pbh): Ditto. (_mm_cvtness_sbh): Ditto. * config/i386/i386-builtin-types.def (V8BF): Add new DEF_VECTOR_TYPE for BFmode. (V16BF): Ditto. (V32BF): Ditto. * config/i386/i386-builtin.def (BDESC): Fixed builtins. * config/i386/i386-expand.cc (ix86_expand_args_builtin): Changed avx512bf16 ix86_builtin_func_type included HI to BF. * config/i386/immintrin.h: Add SSE2 depend for avx512bf16. * config/i386/sse.md (TARGET_AVX512VL): Changed HI vector to BF vector. (avx512f_cvtneps2bf16_v4sf): New define_expand. (*avx512f_cvtneps2bf16_v4sf): New define_insn. (avx512f_cvtneps2bf16_v4sf_maskz):Ditto. (avx512f_cvtneps2bf16_v4sf_mask): Ditto. (avx512f_cvtneps2bf16_v4sf_mask_1): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/avx512bf16-cvtsbh2ss-1.c: Add fpmath option. * gcc.target/i386/avx512bf16-vdpbf16ps-2.c: Fixed scan-assembler. * gcc.target/i386/avx512bf16vl-cvtness2sbh-1.c: Add x/y suffix for vcvtneps2bf16. * gcc.target/i386/avx512bf16vl-vcvtneps2bf16-1.c: Ditto. --- gcc/config/i386/avx512bf16intrin.h| 12 +-- gcc/config/i386/avx512bf16vlintrin.h | 29 ++--- gcc/config/i386/i386-builtin-types.def| 51 - gcc/config/i386/i386-builtin.def | 54 +- gcc/config/i386/i386-expand.cc| 48 - gcc/config/i386/immintrin.h | 2 + gcc/config/i386/sse.md| 101 ++ .../gcc.target/i386/avx512bf16-cvtsbh2ss-1.c | 2 +- .../gcc.target/i386/avx512bf16-vdpbf16ps-2.c | 2 +- .../i386/avx512bf16vl-cvtness2sbh-1.c | 2 +- .../i386/avx512bf16vl-vcvtneps2bf16-1.c | 12 +-- 11 files changed, 189 insertions(+), 126 deletions(-) diff --git a/gcc/config/i386/avx512bf16intrin.h b/gcc/config/i386/avx512bf16intrin.h index b6e9ddad157..ea1d0125b3f 100644 --- a/gcc/config/i386/avx512bf16intrin.h +++ b/gcc/config/i386/avx512bf16intrin.h @@ -35,16 +35,16 @@ #endif /* __AVX512BF16__ */ /* Internal data types for implementing the intrinsics. */ -typedef short __v32bh __attribute__ ((__vector_size__ (64))); +typedef __bf16 __v32bf __attribute__ ((__vector_size__ (64))); /* The Intel API is flexible enough that we must allow aliasing with other vector types, and their scalar components. */ -typedef short __m512bh __attribute__ ((__vector_size__ (64), __may_alias__)); +typedef __bf16 __m512bh __attribute__ ((__vector_size__ (64), __may_alias__)); /* Convert One BF16 Data to One Single Float Data. */ extern __inline float __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cvtsbh_ss (__bfloat16 __A) +_mm_cvtsbh_ss (__bf16 __A) { union{ float a; unsigned int b;} __tmp; __tmp.b = ((unsigned int)(__A)) << 16; @@ -57,21 +57,21 @@ extern __inline __m512bh __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm512_cvtne2ps_pbh (__m512 __A, __m512 __B) { - return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi(__A, __B); + return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32bf(__A, __B); } extern __inline __m512bh __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_cvtne2ps_pbh (__m512bh __A, __mmask32 __B, __m512 __C, __m512 __D) { - return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi_mask(__C, __D, __A, __B); + return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32bf_mask(__C, __D, __A, __B); } extern __inline __m512bh __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm512_maskz_cvtne2ps_pbh (__mmask32 __A, __m512 __B, __m512 __C) { - return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi_maskz(__B, __C, __A); + return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32bf_maskz(__B, __C, __A); } /* vcvtneps2bf16 */ diff --git a/gcc/config/i386/avx512bf16vlintrin.h b/gcc/config/i386/avx512bf16vlintrin.h index 969335ff358..56c28f14cf6 100644 --- a/gcc/config/i386/avx512bf16vlintrin.h +++ b/gcc/config/i386/avx512bf16vlintrin.h @@ -35,57 +35,58 @@ #endif /* __AVX512BF16__ */ /* Internal data types for