[PATCH] aarch64: Add bfloat16_t support for aarch64

Jakub Jelinek via Gcc-patches Sun, 13 Nov 2022 13:00:45 -0800

Hi!

x86_64/i686 has for a few weeks working std::bfloat16_t support, __bf16
there is no longer a storage only type, but can be used for arithmetics
and is supported in libgcc and libstdc++.


The following patch adds similar support for AArch64.

Bootstrapped/regtested on aarch64-linux.

Regressions are:
+FAIL: 26_numerics/headers/cmath/functions_std_c++23.cc (test for excess errors)
this one is something I need to look at:
functions_std_c++23.cc:(.text._Z14test_functionsIDFb16_EvPT_PiPlPx[_Z14test_functionsIDFb16_EvPT_PiPlPx]+0x738):
 undefined reference to `__floatdibf'
(4 times).  I need to compare to x86, I believe we want to do a DI -> SF
conversion followed by SF -> BF, but it is unclear why that isn't happening.
+FAIL: gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_opt_n_1.c 
-march=armv8.2-a+sve -moverride=tune=none  (test for errors, line 21)
  svbfdot (f32, bf16, 0); /* { dg-error {invalid conversion to type 
'bfloat16_t'} } */
This test tests for something that no longer fails, so could be just
adjusted.
+FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C  -std=c++11  scan-assembler 
\\t.global\\t_Z1fPu6__bf16
+FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C  -std=c++11  scan-assembler 
\\t.global\\t_Z1gPu6__bf16S_
+FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C  -std=c++11  scan-assembler 
\\t.global\\t_ZN1SIu6__bf16u6__bf16E1iE
+FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C  -std=c++14  scan-assembler 
\\t.global\\t_Z1fPu6__bf16
+FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C  -std=c++14  scan-assembler 
\\t.global\\t_Z1gPu6__bf16S_
+FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C  -std=c++14  scan-assembler 
\\t.global\\t_ZN1SIu6__bf16u6__bf16E1iE
+FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C  -std=c++17  scan-assembler 
\\t.global\\t_Z1fPu6__bf16
+FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C  -std=c++17  scan-assembler 
\\t.global\\t_Z1gPu6__bf16S_
+FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C  -std=c++17  scan-assembler 
\\t.global\\t_ZN1SIu6__bf16u6__bf16E1iE
+FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C  -std=c++20  scan-assembler 
\\t.global\\t_Z1fPu6__bf16
+FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C  -std=c++20  scan-assembler 
\\t.global\\t_Z1gPu6__bf16S_
+FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C  -std=c++20  scan-assembler 
\\t.global\\t_ZN1SIu6__bf16u6__bf16E1iE
+FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C  -std=c++2b  scan-assembler 
\\t.global\\t_Z1fPu6__bf16
+FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C  -std=c++2b  scan-assembler 
\\t.global\\t_Z1gPu6__bf16S_
+FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C  -std=c++2b  scan-assembler 
\\t.global\\t_ZN1SIu6__bf16u6__bf16E1iE
+FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C  -std=c++98  scan-assembler 
\\t.global\\t_Z1fPu6__bf16
+FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C  -std=c++98  scan-assembler 
\\t.global\\t_Z1gPu6__bf16S_
+FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C  -std=c++98  scan-assembler 
\\t.global\\t_ZN1SIu6__bf16u6__bf16E1iE
These test the mangling, which changed from u6__bf16 to the standard DF16b.

Now, while on x86 we change the mangling and behavior of __bf16, it doesn't
need to be necessarily like that on aarch64 (although it would be nice for
consistency), for C++ portable code would just use std::bfloat16_t type
which is in libstdc++ defined to decltype(0.0bf16).
So, if you want to keep previous mangling of __bf16 type or keep it storage
only type, we can always register some other name (__bfloat16_t or whatever),
make __bf16 and __bfloat16_t be distinct types (former aarch64_bf16_type_node
in the compiler, the latter bfloat16_type_node) and thus have
0.0bf16 have the latter type and libstdc++ using it.

2022-11-13  Jakub Jelinek  <ja...@redhat.com>

gcc/
        * config/aarch64/aarch64.h (aarch64_bf16_type_node): Remove.
        (aarch64_bf16_ptr_type_node): Adjust comment.
        * config/aarch64/aarch64.cc (aarch64_gimplify_va_arg_expr): Use
        bfloat16_type_node rather than aarch64_bf16_type_node.
        (aarch64_mangle_type): Mangle BFmode as DF16b.
        (aarch64_libgcc_floating_mode_supported_p,
        aarch64_scalar_mode_supported_p): Also support BFmode.
        (aarch64_invalid_conversion, aarch64_invalid_unary_op): Remove.
        aarch64_invalid_binary_op): Remove BFmode related rejections.
        (TARGET_INVALID_CONVERSION, TARGET_INVALID_UNARY_OP): Don't redefine.
        * config/aarch64/aarch64-builtins.cc (aarch64_bf16_type_node): Remove.
        (aarch64_int_or_fp_type): Use bfloat16_type_node rather than
        aarch64_bf16_type_node.
        (aarch64_init_simd_builtin_types): Likewise.
        (aarch64_init_bf16_types): Likewise.  Don't create bfloat16_type_node,
        which is created in tree.cc already.
        * config/aarch64/aarch64-sve-builtins.def (svbfloat16_t): Likewise.
libgcc/
        * config/aarch64/t-softfp (softfp_extensions): Add bfsf.
        (softfp_truncations): Add tfbf dfbf sfbf hfbf.
        * config/aarch64/libgcc-softfp.ver (GCC_13.0.0): Export
        __extendbfsf2 and __trunc{s,d,t,h}fbf2.
        * config/aarch64/sfp-machine.h (_FP_NANFRAC_B, _FP_NANSIGN_B): Define.

--- gcc/config/aarch64/aarch64.h
+++ gcc/config/aarch64/aarch64.h
@@ -1220,9 +1220,8 @@ extern const char *aarch64_rewrite_mcpu (int argc, const 
char **argv);
 extern GTY(()) tree aarch64_fp16_type_node;
 extern GTY(()) tree aarch64_fp16_ptr_type_node;
 
-/* This type is the user-visible __bf16, and a pointer to that type.  Defined
-   in aarch64-builtins.cc.  */
-extern GTY(()) tree aarch64_bf16_type_node;
+/* Pointer to the user-visible __bf16 type.  __bf16 itself is generic
+   bfloat16_type_node.  Defined in aarch64-builtins.cc.  */
 extern GTY(()) tree aarch64_bf16_ptr_type_node;
 
 /* The generic unwind code in libgcc does not initialize the frame pointer.
--- gcc/config/aarch64/aarch64-builtins.cc
+++ gcc/config/aarch64/aarch64-builtins.cc
@@ -918,7 +918,6 @@ tree aarch64_fp16_type_node = NULL_TREE;
 tree aarch64_fp16_ptr_type_node = NULL_TREE;
 
 /* Back-end node type for brain float (bfloat) types.  */
-tree aarch64_bf16_type_node = NULL_TREE;
 tree aarch64_bf16_ptr_type_node = NULL_TREE;
 
 /* Wrapper around add_builtin_function.  NAME is the name of the built-in
@@ -1010,7 +1009,7 @@ aarch64_int_or_fp_type (machine_mode mode,
     case E_DFmode:
       return double_type_node;
     case E_BFmode:
-      return aarch64_bf16_type_node;
+      return bfloat16_type_node;
     default:
       gcc_unreachable ();
     }
@@ -1124,8 +1123,8 @@ aarch64_init_simd_builtin_types (void)
   aarch64_simd_types[Float64x2_t].eltype = double_type_node;
 
   /* Init Bfloat vector types with underlying __bf16 type.  */
-  aarch64_simd_types[Bfloat16x4_t].eltype = aarch64_bf16_type_node;
-  aarch64_simd_types[Bfloat16x8_t].eltype = aarch64_bf16_type_node;
+  aarch64_simd_types[Bfloat16x4_t].eltype = bfloat16_type_node;
+  aarch64_simd_types[Bfloat16x8_t].eltype = bfloat16_type_node;
 
   for (i = 0; i < nelts; i++)
     {
@@ -1197,7 +1196,7 @@ aarch64_init_simd_builtin_scalar_types (void)
                                             "__builtin_aarch64_simd_poly128");
   (*lang_hooks.types.register_builtin_type) (intTI_type_node,
                                             "__builtin_aarch64_simd_ti");
-  (*lang_hooks.types.register_builtin_type) (aarch64_bf16_type_node,
+  (*lang_hooks.types.register_builtin_type) (bfloat16_type_node,
                                             "__builtin_aarch64_simd_bf");
   /* Unsigned integer types for various mode sizes.  */
   (*lang_hooks.types.register_builtin_type) (unsigned_intQI_type_node,
@@ -1682,13 +1681,8 @@ aarch64_init_fp16_types (void)
 static void
 aarch64_init_bf16_types (void)
 {
-  aarch64_bf16_type_node = make_node (REAL_TYPE);
-  TYPE_PRECISION (aarch64_bf16_type_node) = 16;
-  SET_TYPE_MODE (aarch64_bf16_type_node, BFmode);
-  layout_type (aarch64_bf16_type_node);
-
-  lang_hooks.types.register_builtin_type (aarch64_bf16_type_node, "__bf16");
-  aarch64_bf16_ptr_type_node = build_pointer_type (aarch64_bf16_type_node);
+  lang_hooks.types.register_builtin_type (bfloat16_type_node, "__bf16");
+  aarch64_bf16_ptr_type_node = build_pointer_type (bfloat16_type_node);
 }
 
 /* Pointer authentication builtins that will become NOP on legacy platform.
--- gcc/config/aarch64/aarch64.cc
+++ gcc/config/aarch64/aarch64.cc
@@ -19823,7 +19823,7 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, 
gimple_seq *pre_p,
          field_ptr_t = aarch64_fp16_ptr_type_node;
          break;
        case E_BFmode:
-         field_t = aarch64_bf16_type_node;
+         field_t = bfloat16_type_node;
          field_ptr_t = aarch64_bf16_ptr_type_node;
          break;
        case E_V2SImode:
@@ -20730,7 +20730,7 @@ aarch64_mangle_type (const_tree type)
       if (TYPE_MAIN_VARIANT (type) == float16_type_node)
        return NULL;
       if (TYPE_MODE (type) == BFmode)
-       return "u6__bf16";
+       return "DF16b";
       else
        return "Dh";
     }
@@ -26428,18 +26428,18 @@ aarch64_dwarf_poly_indeterminate_value (unsigned int 
i, unsigned int *factor,
 }
 
 /* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
-   if MODE is HFmode, and punt to the generic implementation otherwise.  */
+   if MODE is [BH]Fmode, and punt to the generic implementation otherwise.  */
 
 static bool
 aarch64_libgcc_floating_mode_supported_p (scalar_float_mode mode)
 {
-  return (mode == HFmode
+  return ((mode == HFmode || mode == BFmode)
          ? true
          : default_libgcc_floating_mode_supported_p (mode));
 }
 
 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P - return TRUE
-   if MODE is HFmode, and punt to the generic implementation otherwise.  */
+   if MODE is [BH]Fmode, and punt to the generic implementation otherwise.  */
 
 static bool
 aarch64_scalar_mode_supported_p (scalar_mode mode)
@@ -26447,7 +26447,7 @@ aarch64_scalar_mode_supported_p (scalar_mode mode)
   if (DECIMAL_FLOAT_MODE_P (mode))
     return default_decimal_float_supported_p ();
 
-  return (mode == HFmode
+  return ((mode == HFmode || mode == BFmode)
          ? true
          : default_scalar_mode_supported_p (mode));
 }
@@ -26905,39 +26905,6 @@ aarch64_stack_protect_guard (void)
   return NULL_TREE;
 }
 
-/* Return the diagnostic message string if conversion from FROMTYPE to
-   TOTYPE is not allowed, NULL otherwise.  */
-
-static const char *
-aarch64_invalid_conversion (const_tree fromtype, const_tree totype)
-{
-  if (element_mode (fromtype) != element_mode (totype))
-    {
-      /* Do no allow conversions to/from BFmode scalar types.  */
-      if (TYPE_MODE (fromtype) == BFmode)
-       return N_("invalid conversion from type %<bfloat16_t%>");
-      if (TYPE_MODE (totype) == BFmode)
-       return N_("invalid conversion to type %<bfloat16_t%>");
-    }
-
-  /* Conversion allowed.  */
-  return NULL;
-}
-
-/* Return the diagnostic message string if the unary operation OP is
-   not permitted on TYPE, NULL otherwise.  */
-
-static const char *
-aarch64_invalid_unary_op (int op, const_tree type)
-{
-  /* Reject all single-operand operations on BFmode except for &.  */
-  if (element_mode (type) == BFmode && op != ADDR_EXPR)
-    return N_("operation not permitted on type %<bfloat16_t%>");
-
-  /* Operation allowed.  */
-  return NULL;
-}
-
 /* Return the diagnostic message string if the binary operation OP is
    not permitted on TYPE1 and TYPE2, NULL otherwise.  */
 
@@ -26945,11 +26912,6 @@ static const char *
 aarch64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
                           const_tree type2)
 {
-  /* Reject all 2-operand operations on BFmode.  */
-  if (element_mode (type1) == BFmode
-      || element_mode (type2) == BFmode)
-    return N_("operation not permitted on type %<bfloat16_t%>");
-
   if (VECTOR_TYPE_P (type1)
       && VECTOR_TYPE_P (type2)
       && !TYPE_INDIVISIBLE_P (type1)
@@ -27546,12 +27508,6 @@ aarch64_libgcc_floating_mode_supported_p
 #undef TARGET_MANGLE_TYPE
 #define TARGET_MANGLE_TYPE aarch64_mangle_type
 
-#undef TARGET_INVALID_CONVERSION
-#define TARGET_INVALID_CONVERSION aarch64_invalid_conversion
-
-#undef TARGET_INVALID_UNARY_OP
-#define TARGET_INVALID_UNARY_OP aarch64_invalid_unary_op
-
 #undef TARGET_INVALID_BINARY_OP
 #define TARGET_INVALID_BINARY_OP aarch64_invalid_binary_op
 
--- gcc/config/aarch64/aarch64-sve-builtins.def
+++ gcc/config/aarch64/aarch64-sve-builtins.def
@@ -61,7 +61,7 @@ DEF_SVE_MODE (u64offset, none, svuint64_t, bytes)
 DEF_SVE_MODE (vnum, none, none, vectors)
 
 DEF_SVE_TYPE (svbool_t, 10, __SVBool_t, boolean_type_node)
-DEF_SVE_TYPE (svbfloat16_t, 14, __SVBfloat16_t, aarch64_bf16_type_node)
+DEF_SVE_TYPE (svbfloat16_t, 14, __SVBfloat16_t, bfloat16_type_node)
 DEF_SVE_TYPE (svfloat16_t, 13, __SVFloat16_t, aarch64_fp16_type_node)
 DEF_SVE_TYPE (svfloat32_t, 13, __SVFloat32_t, float_type_node)
 DEF_SVE_TYPE (svfloat64_t, 13, __SVFloat64_t, double_type_node)
--- libgcc/config/aarch64/t-softfp
+++ libgcc/config/aarch64/t-softfp
@@ -1,7 +1,7 @@
 softfp_float_modes := tf
 softfp_int_modes := si di ti
-softfp_extensions := sftf dftf hftf
-softfp_truncations := tfsf tfdf tfhf
+softfp_extensions := sftf dftf hftf bfsf
+softfp_truncations := tfsf tfdf tfhf tfbf dfbf sfbf hfbf
 softfp_exclude_libgcc2 := n
 softfp_extras := fixhfti fixunshfti floattihf floatuntihf
 
--- libgcc/config/aarch64/libgcc-softfp.ver
+++ libgcc/config/aarch64/libgcc-softfp.ver
@@ -26,3 +26,12 @@ GCC_11.0 {
   __mulhc3
   __trunctfhf2
 }
+
+%inherit GCC_13.0.0 GCC_11.0.0
+GCC_13.0.0 {
+  __extendbfsf2
+  __truncdfbf2
+  __truncsfbf2
+  __trunctfbf2
+  __trunchfbf2
+}
--- libgcc/config/aarch64/sfp-machine.h
+++ libgcc/config/aarch64/sfp-machine.h
@@ -43,10 +43,12 @@ typedef int __gcc_CMPtype __attribute__ ((mode 
(__libgcc_cmp_return__)));
 #define _FP_DIV_MEAT_Q(R,X,Y)  _FP_DIV_MEAT_2_udiv(Q,R,X,Y)
 
 #define _FP_NANFRAC_H          ((_FP_QNANBIT_H << 1) - 1)
+#define _FP_NANFRAC_B          ((_FP_QNANBIT_B << 1) - 1)
 #define _FP_NANFRAC_S          ((_FP_QNANBIT_S << 1) - 1)
 #define _FP_NANFRAC_D          ((_FP_QNANBIT_D << 1) - 1)
 #define _FP_NANFRAC_Q          ((_FP_QNANBIT_Q << 1) - 1), -1
 #define _FP_NANSIGN_H          0
+#define _FP_NANSIGN_B          0
 #define _FP_NANSIGN_S          0
 #define _FP_NANSIGN_D          0
 #define _FP_NANSIGN_Q          0

        Jakub

[PATCH] aarch64: Add bfloat16_t support for aarch64

Reply via email to