Re: [PATCH] i386: using __bf16 for AVX512BF16 intrinsics

2022-10-28 Thread Hongtao Liu via Gcc-patches
On Fri, Oct 28, 2022 at 2:20 PM Kong, Lingling via Gcc-patches
 wrote:
>
> Hi,
>
> Previously we use unsigned short to represent bf16. It's not a good 
> expression, and at the time the front end didn't support bf16 type.
> Now we introduced __bf16 to X86 psABI. So we can switch intrinsics to the new 
> type.
>
> Ok for trunk ?
LGTM, but please don't commit it until next week to leave some time
for others to take a look.
Also please update GCC13 doc for it.
https://gcc.gnu.org/gcc-13/changes.html.
>
> Thanks,
> Lingling
>
> gcc/ChangeLog:
>
> * config/i386/avx512bf16intrin.h (__attribute__): Change short to 
> bf16.
> (_mm_cvtsbh_ss): Ditto.
> (_mm512_cvtne2ps_pbh): Ditto.
> (_mm512_mask_cvtne2ps_pbh): Ditto.
> (_mm512_maskz_cvtne2ps_pbh): Ditto.
> * config/i386/avx512bf16vlintrin.h (__attribute__): Ditto.
> (_mm256_cvtne2ps_pbh): Ditto.
> (_mm256_mask_cvtne2ps_pbh): Ditto.
> (_mm256_maskz_cvtne2ps_pbh): Ditto.
> (_mm_cvtne2ps_pbh): Ditto.
> (_mm_mask_cvtne2ps_pbh): Ditto.
> (_mm_maskz_cvtne2ps_pbh): Ditto.
> (_mm_cvtness_sbh): Ditto.
> * config/i386/i386-builtin-types.def (V8BF): Add new
> DEF_VECTOR_TYPE for BFmode.
> (V16BF): Ditto.
> (V32BF): Ditto.
> * config/i386/i386-builtin.def (BDESC): Fixed builtins.
> * config/i386/i386-expand.cc (ix86_expand_args_builtin): Changed
> avx512bf16 ix86_builtin_func_type included HI to BF.
> * config/i386/immintrin.h: Add SSE2 depend for avx512bf16.
> * config/i386/sse.md (TARGET_AVX512VL): Changed HI vector to BF
> vector.
> (avx512f_cvtneps2bf16_v4sf): New define_expand.
> (*avx512f_cvtneps2bf16_v4sf): New define_insn.
> (avx512f_cvtneps2bf16_v4sf_maskz):Ditto.
> (avx512f_cvtneps2bf16_v4sf_mask): Ditto.
> (avx512f_cvtneps2bf16_v4sf_mask_1): Ditto.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/avx512bf16-cvtsbh2ss-1.c: Add fpmath option.
> * gcc.target/i386/avx512bf16-vdpbf16ps-2.c: Fixed
> scan-assembler.
> * gcc.target/i386/avx512bf16vl-cvtness2sbh-1.c: Add x/y suffix
> for vcvtneps2bf16.
> * gcc.target/i386/avx512bf16vl-vcvtneps2bf16-1.c: Ditto.
> ---
>  gcc/config/i386/avx512bf16intrin.h|  12 +--
>  gcc/config/i386/avx512bf16vlintrin.h  |  29 ++---
>  gcc/config/i386/i386-builtin-types.def|  51 -
>  gcc/config/i386/i386-builtin.def  |  54 +-
>  gcc/config/i386/i386-expand.cc|  48 -
>  gcc/config/i386/immintrin.h   |   2 +
>  gcc/config/i386/sse.md| 101 ++
>  .../gcc.target/i386/avx512bf16-cvtsbh2ss-1.c  |   2 +-
>  .../gcc.target/i386/avx512bf16-vdpbf16ps-2.c  |   2 +-
>  .../i386/avx512bf16vl-cvtness2sbh-1.c |   2 +-
>  .../i386/avx512bf16vl-vcvtneps2bf16-1.c   |  12 +--
>  11 files changed, 189 insertions(+), 126 deletions(-)
>
> diff --git a/gcc/config/i386/avx512bf16intrin.h 
> b/gcc/config/i386/avx512bf16intrin.h
> index b6e9ddad157..ea1d0125b3f 100644
> --- a/gcc/config/i386/avx512bf16intrin.h
> +++ b/gcc/config/i386/avx512bf16intrin.h
> @@ -35,16 +35,16 @@
>  #endif /* __AVX512BF16__ */
>
>  /* Internal data types for implementing the intrinsics.  */
> -typedef short __v32bh __attribute__ ((__vector_size__ (64)));
> +typedef __bf16 __v32bf __attribute__ ((__vector_size__ (64)));
>
>  /* The Intel API is flexible enough that we must allow aliasing with other
> vector types, and their scalar components.  */
> -typedef short __m512bh __attribute__ ((__vector_size__ (64), __may_alias__));
> +typedef __bf16 __m512bh __attribute__ ((__vector_size__ (64), 
> __may_alias__));
>
>  /* Convert One BF16 Data to One Single Float Data.  */
>  extern __inline float
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_cvtsbh_ss (__bfloat16 __A)
> +_mm_cvtsbh_ss (__bf16 __A)
>  {
>union{ float a; unsigned int b;} __tmp;
>__tmp.b = ((unsigned int)(__A)) << 16;
> @@ -57,21 +57,21 @@ extern __inline __m512bh
>  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_cvtne2ps_pbh (__m512 __A, __m512 __B)
>  {
> -  return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi(__A, __B);
> +  return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32bf(__A, __B);
>  }
>
>  extern __inline __m512bh
>  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_mask_cvtne2ps_pbh (__m512bh __A, __mmask32 __B, __m512 __C, __m512 
> __D)
>  {
> -  return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi_mask(__C, __D, __A, 
> __B);
> +  return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32bf_mask(__C, __D, __A, 
> __B);
>  }
>
>  extern __inline __m512bh
>  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_maskz_cvtne2ps_pbh (__mmask32 __A, __m512 __B, __m512 __C)
>  {
> -  return 

[PATCH] i386: using __bf16 for AVX512BF16 intrinsics

2022-10-28 Thread Kong, Lingling via Gcc-patches
Hi,

Previously we use unsigned short to represent bf16. It's not a good expression, 
and at the time the front end didn't support bf16 type.
Now we introduced __bf16 to X86 psABI. So we can switch intrinsics to the new 
type.

Ok for trunk ?

Thanks,
Lingling

gcc/ChangeLog:

* config/i386/avx512bf16intrin.h (__attribute__): Change short to bf16.
(_mm_cvtsbh_ss): Ditto.
(_mm512_cvtne2ps_pbh): Ditto.
(_mm512_mask_cvtne2ps_pbh): Ditto.
(_mm512_maskz_cvtne2ps_pbh): Ditto.
* config/i386/avx512bf16vlintrin.h (__attribute__): Ditto.
(_mm256_cvtne2ps_pbh): Ditto.
(_mm256_mask_cvtne2ps_pbh): Ditto.
(_mm256_maskz_cvtne2ps_pbh): Ditto.
(_mm_cvtne2ps_pbh): Ditto.
(_mm_mask_cvtne2ps_pbh): Ditto.
(_mm_maskz_cvtne2ps_pbh): Ditto.
(_mm_cvtness_sbh): Ditto.
* config/i386/i386-builtin-types.def (V8BF): Add new
DEF_VECTOR_TYPE for BFmode.
(V16BF): Ditto.
(V32BF): Ditto.
* config/i386/i386-builtin.def (BDESC): Fixed builtins.
* config/i386/i386-expand.cc (ix86_expand_args_builtin): Changed
avx512bf16 ix86_builtin_func_type included HI to BF.
* config/i386/immintrin.h: Add SSE2 depend for avx512bf16.
* config/i386/sse.md (TARGET_AVX512VL): Changed HI vector to BF
vector.
(avx512f_cvtneps2bf16_v4sf): New define_expand.
(*avx512f_cvtneps2bf16_v4sf): New define_insn.
(avx512f_cvtneps2bf16_v4sf_maskz):Ditto.
(avx512f_cvtneps2bf16_v4sf_mask): Ditto.
(avx512f_cvtneps2bf16_v4sf_mask_1): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx512bf16-cvtsbh2ss-1.c: Add fpmath option.
* gcc.target/i386/avx512bf16-vdpbf16ps-2.c: Fixed
scan-assembler.
* gcc.target/i386/avx512bf16vl-cvtness2sbh-1.c: Add x/y suffix
for vcvtneps2bf16.
* gcc.target/i386/avx512bf16vl-vcvtneps2bf16-1.c: Ditto.
---
 gcc/config/i386/avx512bf16intrin.h|  12 +--
 gcc/config/i386/avx512bf16vlintrin.h  |  29 ++---
 gcc/config/i386/i386-builtin-types.def|  51 -
 gcc/config/i386/i386-builtin.def  |  54 +-
 gcc/config/i386/i386-expand.cc|  48 -
 gcc/config/i386/immintrin.h   |   2 +
 gcc/config/i386/sse.md| 101 ++
 .../gcc.target/i386/avx512bf16-cvtsbh2ss-1.c  |   2 +-
 .../gcc.target/i386/avx512bf16-vdpbf16ps-2.c  |   2 +-
 .../i386/avx512bf16vl-cvtness2sbh-1.c |   2 +-
 .../i386/avx512bf16vl-vcvtneps2bf16-1.c   |  12 +--
 11 files changed, 189 insertions(+), 126 deletions(-)

diff --git a/gcc/config/i386/avx512bf16intrin.h 
b/gcc/config/i386/avx512bf16intrin.h
index b6e9ddad157..ea1d0125b3f 100644
--- a/gcc/config/i386/avx512bf16intrin.h
+++ b/gcc/config/i386/avx512bf16intrin.h
@@ -35,16 +35,16 @@
 #endif /* __AVX512BF16__ */
 
 /* Internal data types for implementing the intrinsics.  */
-typedef short __v32bh __attribute__ ((__vector_size__ (64)));
+typedef __bf16 __v32bf __attribute__ ((__vector_size__ (64)));
 
 /* The Intel API is flexible enough that we must allow aliasing with other
vector types, and their scalar components.  */
-typedef short __m512bh __attribute__ ((__vector_size__ (64), __may_alias__));
+typedef __bf16 __m512bh __attribute__ ((__vector_size__ (64), __may_alias__));
 
 /* Convert One BF16 Data to One Single Float Data.  */
 extern __inline float
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtsbh_ss (__bfloat16 __A)
+_mm_cvtsbh_ss (__bf16 __A)
 {
   union{ float a; unsigned int b;} __tmp;
   __tmp.b = ((unsigned int)(__A)) << 16;
@@ -57,21 +57,21 @@ extern __inline __m512bh
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_cvtne2ps_pbh (__m512 __A, __m512 __B)
 {
-  return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi(__A, __B);
+  return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32bf(__A, __B);
 }
 
 extern __inline __m512bh
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_cvtne2ps_pbh (__m512bh __A, __mmask32 __B, __m512 __C, __m512 __D)
 {
-  return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi_mask(__C, __D, __A, __B);
+  return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32bf_mask(__C, __D, __A, __B);
 }
 
 extern __inline __m512bh
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_cvtne2ps_pbh (__mmask32 __A, __m512 __B, __m512 __C)
 {
-  return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi_maskz(__B, __C, __A);
+  return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32bf_maskz(__B, __C, __A);
 }
 
 /* vcvtneps2bf16 */
diff --git a/gcc/config/i386/avx512bf16vlintrin.h 
b/gcc/config/i386/avx512bf16vlintrin.h
index 969335ff358..56c28f14cf6 100644
--- a/gcc/config/i386/avx512bf16vlintrin.h
+++ b/gcc/config/i386/avx512bf16vlintrin.h
@@ -35,57 +35,58 @@
 #endif /* __AVX512BF16__ */
 
 /* Internal data types for