[PATCH] D120395: [X86] Prohibit arithmatic operations on type `__bfloat16`

2022-02-25 Thread Phoebe Wang via Phabricator via cfe-commits
pengfei updated this revision to Diff 411334.
pengfei added a comment.

Disscussed with GCC folks. We think it's better to use the same way as D120411 
 that replacing it with short int.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120395/new/

https://reviews.llvm.org/D120395

Files:
  clang/lib/Headers/avx512bf16intrin.h
  clang/lib/Headers/avx512vlbf16intrin.h
  clang/test/CodeGen/X86/avx512bf16-builtins.c
  clang/test/CodeGen/X86/avx512bf16-error.c
  clang/test/CodeGen/X86/avx512vlbf16-builtins.c
  llvm/docs/LangRef.rst


Index: llvm/docs/LangRef.rst
===
--- llvm/docs/LangRef.rst
+++ llvm/docs/LangRef.rst
@@ -3369,8 +3369,8 @@
* - ``bfloat``
  - 16-bit "brain" floating-point value (7-bit significand).  Provides the
same number of exponent bits as ``float``, so that it matches its 
dynamic
-   range, but with greatly reduced precision.  Used in Intel's AVX-512 BF16
-   extensions and Arm's ARMv8.6-A extensions, among others.
+   range, but with greatly reduced precision.  Used in Arm's ARMv8.6-A
+   extensions, among others.
 
* - ``float``
  - 32-bit floating-point value
Index: clang/test/CodeGen/X86/avx512vlbf16-builtins.c
===
--- clang/test/CodeGen/X86/avx512vlbf16-builtins.c
+++ clang/test/CodeGen/X86/avx512vlbf16-builtins.c
@@ -162,7 +162,7 @@
   return _mm256_mask_dpbf16_ps(D, U, A, B);
 }
 
-__bfloat16 test_mm_cvtness_sbh(float A) {
+unsigned short test_mm_cvtness_sbh(float A) {
   // CHECK-LABEL: @test_mm_cvtness_sbh
   // CHECK: @llvm.x86.avx512bf16.mask.cvtneps2bf16.128
   // CHECK: ret i16 %{{.*}}
Index: clang/test/CodeGen/X86/avx512bf16-error.c
===
--- /dev/null
+++ clang/test/CodeGen/X86/avx512bf16-error.c
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -fsyntax-only -verify -ffreestanding -triple 
x86_64-linux-pc %s
+
+// expected-error@+1 3 {{unknown type name '__bfloat16'}}
+__bfloat16 foo(__bfloat16 a, __bfloat16 b) {
+  return a + b;
+}
+
+#include 
+
+// expected-warning@+2 3 {{'__bfloat16' is deprecated: use unsigned short 
instead}}
+// expected-note@* 3 {{'__bfloat16' has been explicitly marked deprecated 
here}}
+__bfloat16 bar(__bfloat16 a, __bfloat16 b) {
+  return a + b;
+}
Index: clang/test/CodeGen/X86/avx512bf16-builtins.c
===
--- clang/test/CodeGen/X86/avx512bf16-builtins.c
+++ clang/test/CodeGen/X86/avx512bf16-builtins.c
@@ -4,7 +4,7 @@
 
 #include 
 
-float test_mm_cvtsbh_ss(__bfloat16 A) {
+float test_mm_cvtsbh_ss(unsigned short A) {
   // CHECK-LABEL: @test_mm_cvtsbh_ss
   // CHECK: zext i16 %{{.*}} to i32
   // CHECK: shl i32 %{{.*}}, 16
Index: clang/lib/Headers/avx512vlbf16intrin.h
===
--- clang/lib/Headers/avx512vlbf16intrin.h
+++ clang/lib/Headers/avx512vlbf16intrin.h
@@ -413,7 +413,8 @@
 ///A float data.
 /// \returns A bf16 data whose sign field and exponent field keep unchanged,
 ///and fraction field is truncated to 7 bits.
-static __inline__ __bfloat16 __DEFAULT_FN_ATTRS128 _mm_cvtness_sbh(float __A) {
+static __inline__ unsigned short __DEFAULT_FN_ATTRS128
+_mm_cvtness_sbh(float __A) {
   __v4sf __V = {__A, 0, 0, 0};
   __v8hi __R = __builtin_ia32_cvtneps2bf16_128_mask(
   (__v4sf)__V, (__v8hi)_mm_undefined_si128(), (__mmask8)-1);
Index: clang/lib/Headers/avx512bf16intrin.h
===
--- clang/lib/Headers/avx512bf16intrin.h
+++ clang/lib/Headers/avx512bf16intrin.h
@@ -15,7 +15,8 @@
 
 typedef short __m512bh __attribute__((__vector_size__(64), __aligned__(64)));
 typedef short __m256bh __attribute__((__vector_size__(32), __aligned__(32)));
-typedef unsigned short __bfloat16;
+typedef unsigned short __bfloat16
+__attribute__((deprecated("use unsigned short instead")));
 
 #define __DEFAULT_FN_ATTRS512 \
   __attribute__((__always_inline__, __nodebug__, __target__("avx512bf16"), \
@@ -33,7 +34,7 @@
 ///A bfloat data.
 /// \returns A float data whose sign field and exponent field keep unchanged,
 ///and fraction field is extended to 23 bits.
-static __inline__ float __DEFAULT_FN_ATTRS _mm_cvtsbh_ss(__bfloat16 __A) {
+static __inline__ float __DEFAULT_FN_ATTRS _mm_cvtsbh_ss(unsigned short __A) {
   return __builtin_ia32_cvtsbf162ss_32(__A);
 }
 


Index: llvm/docs/LangRef.rst
===
--- llvm/docs/LangRef.rst
+++ llvm/docs/LangRef.rst
@@ -3369,8 +3369,8 @@
* - ``bfloat``
  - 16-bit "brain" floating-point value (7-bit significand).  Provides the
same number of exponent bits as ``float``, so that it matches its dynamic
-   range, but with greatly reduced precision.  Used in Intel's 

[PATCH] D120395: [X86] Prohibit arithmatic operations on type `__bfloat16`

2022-02-24 Thread Phoebe Wang via Phabricator via cfe-commits
pengfei added a comment.

In D120395#3343799 , @andrew.w.kaylor 
wrote:

> In D120395#3340953 , @craig.topper 
> wrote:
>
>> These intrinsics pre-date the existence of the bfloat type in LLVM. To use 
>> bfloat we have to make __bf16 a legal type in C. This means we need to 
>> support loads, stores, and arguments of that type. I think that would create 
>> bunch of backend complexity because we don't have could 16-bit load/store 
>> support to XMM registers. I think we only have load that inserts into a 
>> specific element. It's doable, but I'm not sure what we gain from it.
>
> My motivation for wanting to use 'bloat' started with wanting to use '__bf16' 
> as the front end type. It just doesn't make sense to me to define a new type 
> when we have an existing built-in type that has the same semantics and binary 
> representation. The argument for introducing a new IR type was made here: 
> https://reviews.llvm.org/D76077 It doesn't seem like a particularly strong 
> argument, but it's what was decided then. Using bfloat rather than i16 in the 
> IR has the benefit that it expresses what the type actually is instead of 
> just using something that has the same size. Using i16, the semantics of the 
> type are known only to the front end and we have to rely on what the front 
> end did for enforcement of the semantics. That's generally going to be OK, 
> but it seems to me like it works for the wrong reason. That is, i16 is not a 
> storage-only type and the only reason we don't notice is that the front end 
> doesn't generate IR that violates the implicit semantics of the type.
>
> I think there's a minor argument to be made concerning TBAA (short and 
> __bfloat16 look like compatible types). Perhaps a more significant argument 
> is that using the __bf16 built-in type would allow us to define a type like 
> __m256bh like this:
>
>   typedef __bf16 __m256bh __attribute__((__vector_size__(32), 
> __aligned__(32)));
>
> So my question would be, how much work are we talking about to make this work 
> with the x86 backend?

I don't see much value to support `__bf16` in front end for X86. I guess you 
may want something like `__fp16`. But the design of `__fp16` doesn't look great 
to me. GCC doesn't support `__fp16` for X86. And the existing implementation of 
`__fp16` somehow becomes obstacle for us to support `_Float16`, especially when 
we want to support for targets without `avx512fp16`. Not to mention the 
functionality of `__bf16` isn't as complete as `__fp16`: 
https://godbolt.org/z/WzKPrYTYP I think it's far from evaluating the backend 
work.
I believe the right approch is to define the ABI type firstly like `_Float16`, 
then we can do something in backend to support it.

Anyway, it doesn't matter to the intrinsics we are supporting here whether we 
want to support `__bf16` or not. We are free to define and use target specific 
type for target intrinsics. As mature intrinsics, our focuses are backward 
compatibilities and cross compiler compatibilities. Both stop us from defining 
with `__bf16`.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120395/new/

https://reviews.llvm.org/D120395

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D120395: [X86] Prohibit arithmatic operations on type `__bfloat16`

2022-02-24 Thread Andy Kaylor via Phabricator via cfe-commits
andrew.w.kaylor added a comment.

In D120395#3340953 , @craig.topper 
wrote:

> These intrinsics pre-date the existence of the bfloat type in LLVM. To use 
> bfloat we have to make __bf16 a legal type in C. This means we need to 
> support loads, stores, and arguments of that type. I think that would create 
> bunch of backend complexity because we don't have could 16-bit load/store 
> support to XMM registers. I think we only have load that inserts into a 
> specific element. It's doable, but I'm not sure what we gain from it.

My motivation for wanting to use 'bloat' started with wanting to use '__bf16' 
as the front end type. It just doesn't make sense to me to define a new type 
when we have an existing built-in type that has the same semantics and binary 
representation. The argument for introducing a new IR type was made here: 
https://reviews.llvm.org/D76077 It doesn't seem like a particularly strong 
argument, but it's what was decided then. Using bfloat rather than i16 in the 
IR has the benefit that it expresses what the type actually is instead of just 
using something that has the same size. Using i16, the semantics of the type 
are known only to the front end and we have to rely on what the front end did 
for enforcement of the semantics. That's generally going to be OK, but it seems 
to me like it works for the wrong reason. That is, i16 is not a storage-only 
type and the only reason we don't notice is that the front end doesn't generate 
IR that violates the implicit semantics of the type.

I think there's a minor argument to be made concerning TBAA (short and 
__bfloat16 look like compatible types). Perhaps a more significant argument is 
that using the __bf16 built-in type would allow us to define a type like 
__m256bh like this:

  typedef __bf16 __m256bh __attribute__((__vector_size__(32), __aligned__(32)));

So my question would be, how much work are we talking about to make this work 
with the x86 backend?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120395/new/

https://reviews.llvm.org/D120395

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D120395: [X86] Prohibit arithmatic operations on type `__bfloat16`

2022-02-23 Thread Craig Topper via Phabricator via cfe-commits
craig.topper added a comment.

In D120395#3340891 , @andrew.w.kaylor 
wrote:

> In D120395#3340496 , @pengfei wrote:
>
>> Update LangRef. We use `i16` type to represent bfloat16.
>
> Why are we using i16 to represent bfloat16? The bfloat type is better.

These intrinsics pre-date the existence of the bfloat type in LLVM. To use 
bfloat we have to make __bf16 a legal type in C. This means we need to support 
loads, stores, and arguments of that type. I think that would create bunch of 
backend complexity because we don't have could 16-bit load/store support to XMM 
registers. I think we only have load that inserts into a specific element. It's 
doable, but I'm not sure what we gain from it.




Comment at: clang/lib/Headers/avx512bf16intrin.h:22
+///format type. Define through structure to explicitly prohibit any
+///arithmatic operations.
+typedef struct __bfloat16_s {

arithmatic -> arithmetic


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120395/new/

https://reviews.llvm.org/D120395

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D120395: [X86] Prohibit arithmatic operations on type `__bfloat16`

2022-02-23 Thread Andy Kaylor via Phabricator via cfe-commits
andrew.w.kaylor added a comment.

In D120395#3340496 , @pengfei wrote:

> Update LangRef. We use `i16` type to represent bfloat16.

Why are we using i16 to represent bfloat16? The bfloat type is better.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120395/new/

https://reviews.llvm.org/D120395

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D120395: [X86] Prohibit arithmatic operations on type `__bfloat16`

2022-02-23 Thread Phoebe Wang via Phabricator via cfe-commits
pengfei updated this revision to Diff 410827.
pengfei added a comment.
Herald added subscribers: llvm-commits, jdoerfert.
Herald added a project: LLVM.

Update LangRef. We use `i16` type to represent bfloat16.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120395/new/

https://reviews.llvm.org/D120395

Files:
  clang/lib/Headers/avx512bf16intrin.h
  clang/lib/Headers/avx512vlbf16intrin.h
  clang/test/CodeGen/X86/avx512bf16-error.c
  llvm/docs/LangRef.rst


Index: llvm/docs/LangRef.rst
===
--- llvm/docs/LangRef.rst
+++ llvm/docs/LangRef.rst
@@ -3369,8 +3369,8 @@
* - ``bfloat``
  - 16-bit "brain" floating-point value (7-bit significand).  Provides the
same number of exponent bits as ``float``, so that it matches its 
dynamic
-   range, but with greatly reduced precision.  Used in Intel's AVX-512 BF16
-   extensions and Arm's ARMv8.6-A extensions, among others.
+   range, but with greatly reduced precision.  Used in Arm's ARMv8.6-A
+   extensions, among others.
 
* - ``float``
  - 32-bit floating-point value
Index: clang/test/CodeGen/X86/avx512bf16-error.c
===
--- /dev/null
+++ clang/test/CodeGen/X86/avx512bf16-error.c
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -fsyntax-only -verify -ffreestanding -triple 
x86_64-linux-pc %s
+
+// expected-error@+1 3 {{unknown type name '__bfloat16'}}
+__bfloat16 foo(__bfloat16 a, __bfloat16 b) {
+  return a + b;
+}
+
+#include 
+
+// expected-error@+2 {{invalid operands to binary expression ('__bfloat16' 
(aka 'struct __bfloat16_s') and '__bfloat16')}}
+__bfloat16 bar(__bfloat16 a, __bfloat16 b) {
+  return a + b;
+}
Index: clang/lib/Headers/avx512vlbf16intrin.h
===
--- clang/lib/Headers/avx512vlbf16intrin.h
+++ clang/lib/Headers/avx512vlbf16intrin.h
@@ -415,9 +415,10 @@
 ///and fraction field is truncated to 7 bits.
 static __inline__ __bfloat16 __DEFAULT_FN_ATTRS128 _mm_cvtness_sbh(float __A) {
   __v4sf __V = {__A, 0, 0, 0};
-  __v8hi __R = __builtin_ia32_cvtneps2bf16_128_mask(
+  __v8hi __R1 = __builtin_ia32_cvtneps2bf16_128_mask(
   (__v4sf)__V, (__v8hi)_mm_undefined_si128(), (__mmask8)-1);
-  return __R[0];
+  __bfloat16 __R2 = {__R1[0]};
+  return __R2;
 }
 
 /// Convert Packed BF16 Data to Packed float Data.
Index: clang/lib/Headers/avx512bf16intrin.h
===
--- clang/lib/Headers/avx512bf16intrin.h
+++ clang/lib/Headers/avx512bf16intrin.h
@@ -15,7 +15,14 @@
 
 typedef short __m512bh __attribute__((__vector_size__(64), __aligned__(64)));
 typedef short __m256bh __attribute__((__vector_size__(32), __aligned__(32)));
-typedef unsigned short __bfloat16;
+
+/// \typedef __bfloat16
+///A target specific type to represent the storage only brain 
floating-point
+///format type. Define through structure to explicitly prohibit any
+///arithmatic operations.
+typedef struct __bfloat16_s {
+  short _Value;
+} __bfloat16;
 
 #define __DEFAULT_FN_ATTRS512 \
   __attribute__((__always_inline__, __nodebug__, __target__("avx512bf16"), \
@@ -34,7 +41,7 @@
 /// \returns A float data whose sign field and exponent field keep unchanged,
 ///and fraction field is extended to 23 bits.
 static __inline__ float __DEFAULT_FN_ATTRS _mm_cvtsbh_ss(__bfloat16 __A) {
-  return __builtin_ia32_cvtsbf162ss_32(__A);
+  return __builtin_ia32_cvtsbf162ss_32(__A._Value);
 }
 
 /// Convert Two Packed Single Data to One Packed BF16 Data.


Index: llvm/docs/LangRef.rst
===
--- llvm/docs/LangRef.rst
+++ llvm/docs/LangRef.rst
@@ -3369,8 +3369,8 @@
* - ``bfloat``
  - 16-bit "brain" floating-point value (7-bit significand).  Provides the
same number of exponent bits as ``float``, so that it matches its dynamic
-   range, but with greatly reduced precision.  Used in Intel's AVX-512 BF16
-   extensions and Arm's ARMv8.6-A extensions, among others.
+   range, but with greatly reduced precision.  Used in Arm's ARMv8.6-A
+   extensions, among others.
 
* - ``float``
  - 32-bit floating-point value
Index: clang/test/CodeGen/X86/avx512bf16-error.c
===
--- /dev/null
+++ clang/test/CodeGen/X86/avx512bf16-error.c
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -fsyntax-only -verify -ffreestanding -triple x86_64-linux-pc %s
+
+// expected-error@+1 3 {{unknown type name '__bfloat16'}}
+__bfloat16 foo(__bfloat16 a, __bfloat16 b) {
+  return a + b;
+}
+
+#include 
+
+// expected-error@+2 {{invalid operands to binary expression ('__bfloat16' (aka 'struct __bfloat16_s') and '__bfloat16')}}
+__bfloat16 bar(__bfloat16 a, __bfloat16 b) {
+  return a + b;
+}
Index: clang/lib/Headers/avx512vlbf16intrin.h

[PATCH] D120395: [X86] Prohibit arithmatic operations on type `__bfloat16`

2022-02-23 Thread Phoebe Wang via Phabricator via cfe-commits
pengfei added a comment.

In D120395#3340153 , @pengfei wrote:

> In D120395#3340041 , @RKSimon wrote:
>
>> but its still OK to perform arithmetic with __m128bh ? 
>> https://simd.godbolt.org/z/Ef59Ws4M3
>
> Good point! I'd think the define of `__m128bh` is wrong direction. We should 
> use `__m128i` like we doing with f16c intrinsics and reserve `__m128bh` for 
> ABI type like we doing with avx512fp16.
> I tried to warn for it using `deprecated` but it didn't report warning at 
> all. Any thought?
>
>   diff --git a/clang/lib/Headers/avx512bf16intrin.h 
> b/clang/lib/Headers/avx512bf16intrin.h
>   index 54f0cb9cfbf1..2f9cda6b32f2 100644
>   --- a/clang/lib/Headers/avx512bf16intrin.h
>   +++ b/clang/lib/Headers/avx512bf16intrin.h
>   @@ -13,8 +13,10 @@
>#ifndef __AVX512BF16INTRIN_H
>#define __AVX512BF16INTRIN_H
>   
>   -typedef short __m512bh __attribute__((__vector_size__(64), 
> __aligned__(64)));
>   -typedef short __m256bh __attribute__((__vector_size__(32), 
> __aligned__(32)));
>   +typedef short __m512bh __attribute__((__vector_size__(64), __aligned__(64),
>   +  deprecated("use __m512i instead")));
>   +typedef short __m256bh __attribute__((__vector_size__(32), __aligned__(32),
>   +  deprecated("use __m256i instead")));
>   
>/// \typedef __bfloat16
>///A target specific type to represent the storage only brain 
> floating-point
>   diff --git a/clang/lib/Headers/avx512vlbf16intrin.h 
> b/clang/lib/Headers/avx512vlbf16intrin.h
>   index d42f8eb0f0f5..0e47a930ebd0 100644
>   --- a/clang/lib/Headers/avx512vlbf16intrin.h
>   +++ b/clang/lib/Headers/avx512vlbf16intrin.h
>   @@ -13,7 +13,8 @@
>#ifndef __AVX512VLBF16INTRIN_H
>#define __AVX512VLBF16INTRIN_H
>   
>   -typedef short __m128bh __attribute__((__vector_size__(16), 
> __aligned__(16)));
>   +typedef short __m128bh __attribute__((__vector_size__(16), __aligned__(16),
>   +  deprecated("use __m128i instead")));
>   
>#define __DEFAULT_FN_ATTRS128 \
>  __attribute__((__always_inline__, __nodebug__, \

Sorry, it works.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120395/new/

https://reviews.llvm.org/D120395

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D120395: [X86] Prohibit arithmatic operations on type `__bfloat16`

2022-02-23 Thread Phoebe Wang via Phabricator via cfe-commits
pengfei added a comment.

In D120395#3340041 , @RKSimon wrote:

> but its still OK to perform arithmetic with __m128bh ? 
> https://simd.godbolt.org/z/Ef59Ws4M3

Good point! I'd think the define of `__m128bh` is wrong direction. We should 
use `__m128i` like we doing with f16c intrinsics and reserve `__m128bh` for ABI 
type like we doing with avx512fp16.
I tried to warn for it using `deprecated` but it didn't report warning at all. 
Any thought?

  diff --git a/clang/lib/Headers/avx512bf16intrin.h 
b/clang/lib/Headers/avx512bf16intrin.h
  index 54f0cb9cfbf1..2f9cda6b32f2 100644
  --- a/clang/lib/Headers/avx512bf16intrin.h
  +++ b/clang/lib/Headers/avx512bf16intrin.h
  @@ -13,8 +13,10 @@
   #ifndef __AVX512BF16INTRIN_H
   #define __AVX512BF16INTRIN_H
  
  -typedef short __m512bh __attribute__((__vector_size__(64), __aligned__(64)));
  -typedef short __m256bh __attribute__((__vector_size__(32), __aligned__(32)));
  +typedef short __m512bh __attribute__((__vector_size__(64), __aligned__(64),
  +  deprecated("use __m512i instead")));
  +typedef short __m256bh __attribute__((__vector_size__(32), __aligned__(32),
  +  deprecated("use __m256i instead")));
  
   /// \typedef __bfloat16
   ///A target specific type to represent the storage only brain 
floating-point
  diff --git a/clang/lib/Headers/avx512vlbf16intrin.h 
b/clang/lib/Headers/avx512vlbf16intrin.h
  index d42f8eb0f0f5..0e47a930ebd0 100644
  --- a/clang/lib/Headers/avx512vlbf16intrin.h
  +++ b/clang/lib/Headers/avx512vlbf16intrin.h
  @@ -13,7 +13,8 @@
   #ifndef __AVX512VLBF16INTRIN_H
   #define __AVX512VLBF16INTRIN_H
  
  -typedef short __m128bh __attribute__((__vector_size__(16), __aligned__(16)));
  +typedef short __m128bh __attribute__((__vector_size__(16), __aligned__(16),
  +  deprecated("use __m128i instead")));
  
   #define __DEFAULT_FN_ATTRS128 \
 __attribute__((__always_inline__, __nodebug__, \


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120395/new/

https://reviews.llvm.org/D120395

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D120395: [X86] Prohibit arithmatic operations on type `__bfloat16`

2022-02-23 Thread Simon Pilgrim via Phabricator via cfe-commits
RKSimon added a comment.

but its still OK to perform arithmetic with __m128bh ? 
https://simd.godbolt.org/z/Ef59Ws4M3


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120395/new/

https://reviews.llvm.org/D120395

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D120395: [X86] Prohibit arithmatic operations on type `__bfloat16`

2022-02-23 Thread Phoebe Wang via Phabricator via cfe-commits
pengfei created this revision.
pengfei added reviewers: skan, RKSimon, craig.topper, FreddyYe, LuoYuanke.
pengfei requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

`__bfloat16` is defined as X86 specific type that represents the brain
floating-point format. It is only usable with X86 intrinsics. Arithmatic
operations with this type need to be forbidden.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D120395

Files:
  clang/lib/Headers/avx512bf16intrin.h
  clang/lib/Headers/avx512vlbf16intrin.h
  clang/test/CodeGen/X86/avx512bf16-error.c


Index: clang/test/CodeGen/X86/avx512bf16-error.c
===
--- /dev/null
+++ clang/test/CodeGen/X86/avx512bf16-error.c
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -fsyntax-only -verify -ffreestanding -triple 
x86_64-linux-pc %s
+
+// expected-error@+1 3 {{unknown type name '__bfloat16'}}
+__bfloat16 foo(__bfloat16 a, __bfloat16 b) {
+  return a + b;
+}
+
+#include 
+
+// expected-error@+2 {{invalid operands to binary expression ('__bfloat16' 
(aka 'struct __bfloat16_s') and '__bfloat16')}}
+__bfloat16 bar(__bfloat16 a, __bfloat16 b) {
+  return a + b;
+}
Index: clang/lib/Headers/avx512vlbf16intrin.h
===
--- clang/lib/Headers/avx512vlbf16intrin.h
+++ clang/lib/Headers/avx512vlbf16intrin.h
@@ -415,9 +415,10 @@
 ///and fraction field is truncated to 7 bits.
 static __inline__ __bfloat16 __DEFAULT_FN_ATTRS128 _mm_cvtness_sbh(float __A) {
   __v4sf __V = {__A, 0, 0, 0};
-  __v8hi __R = __builtin_ia32_cvtneps2bf16_128_mask(
+  __v8hi __R1 = __builtin_ia32_cvtneps2bf16_128_mask(
   (__v4sf)__V, (__v8hi)_mm_undefined_si128(), (__mmask8)-1);
-  return __R[0];
+  __bfloat16 __R2 = {__R1[0]};
+  return __R2;
 }
 
 /// Convert Packed BF16 Data to Packed float Data.
Index: clang/lib/Headers/avx512bf16intrin.h
===
--- clang/lib/Headers/avx512bf16intrin.h
+++ clang/lib/Headers/avx512bf16intrin.h
@@ -15,7 +15,14 @@
 
 typedef short __m512bh __attribute__((__vector_size__(64), __aligned__(64)));
 typedef short __m256bh __attribute__((__vector_size__(32), __aligned__(32)));
-typedef unsigned short __bfloat16;
+
+/// \typedef __bfloat16
+///A target specific type to represent the storage only brain 
floating-point
+///format type. Define through structure to explicitly prohibit any
+///arithmatic operations.
+typedef struct __bfloat16_s {
+  short _Value;
+} __bfloat16;
 
 #define __DEFAULT_FN_ATTRS512 \
   __attribute__((__always_inline__, __nodebug__, __target__("avx512bf16"), \
@@ -34,7 +41,7 @@
 /// \returns A float data whose sign field and exponent field keep unchanged,
 ///and fraction field is extended to 23 bits.
 static __inline__ float __DEFAULT_FN_ATTRS _mm_cvtsbh_ss(__bfloat16 __A) {
-  return __builtin_ia32_cvtsbf162ss_32(__A);
+  return __builtin_ia32_cvtsbf162ss_32(__A._Value);
 }
 
 /// Convert Two Packed Single Data to One Packed BF16 Data.


Index: clang/test/CodeGen/X86/avx512bf16-error.c
===
--- /dev/null
+++ clang/test/CodeGen/X86/avx512bf16-error.c
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -fsyntax-only -verify -ffreestanding -triple x86_64-linux-pc %s
+
+// expected-error@+1 3 {{unknown type name '__bfloat16'}}
+__bfloat16 foo(__bfloat16 a, __bfloat16 b) {
+  return a + b;
+}
+
+#include 
+
+// expected-error@+2 {{invalid operands to binary expression ('__bfloat16' (aka 'struct __bfloat16_s') and '__bfloat16')}}
+__bfloat16 bar(__bfloat16 a, __bfloat16 b) {
+  return a + b;
+}
Index: clang/lib/Headers/avx512vlbf16intrin.h
===
--- clang/lib/Headers/avx512vlbf16intrin.h
+++ clang/lib/Headers/avx512vlbf16intrin.h
@@ -415,9 +415,10 @@
 ///and fraction field is truncated to 7 bits.
 static __inline__ __bfloat16 __DEFAULT_FN_ATTRS128 _mm_cvtness_sbh(float __A) {
   __v4sf __V = {__A, 0, 0, 0};
-  __v8hi __R = __builtin_ia32_cvtneps2bf16_128_mask(
+  __v8hi __R1 = __builtin_ia32_cvtneps2bf16_128_mask(
   (__v4sf)__V, (__v8hi)_mm_undefined_si128(), (__mmask8)-1);
-  return __R[0];
+  __bfloat16 __R2 = {__R1[0]};
+  return __R2;
 }
 
 /// Convert Packed BF16 Data to Packed float Data.
Index: clang/lib/Headers/avx512bf16intrin.h
===
--- clang/lib/Headers/avx512bf16intrin.h
+++ clang/lib/Headers/avx512bf16intrin.h
@@ -15,7 +15,14 @@
 
 typedef short __m512bh __attribute__((__vector_size__(64), __aligned__(64)));
 typedef short __m256bh __attribute__((__vector_size__(32), __aligned__(32)));
-typedef unsigned short __bfloat16;
+
+/// \typedef __bfloat16
+///A target specific type to represent the storage only brain floating-point
+///format type. Define through structure to explicitly prohibit any
+///