Hi! x86_64/i686 has for a few weeks working std::bfloat16_t support, __bf16 there is no longer a storage only type, but can be used for arithmetics and is supported in libgcc and libstdc++.
The following patch adds similar support for AArch64. Bootstrapped/regtested on aarch64-linux. Regressions are: +FAIL: 26_numerics/headers/cmath/functions_std_c++23.cc (test for excess errors) this one is something I need to look at: functions_std_c++23.cc:(.text._Z14test_functionsIDFb16_EvPT_PiPlPx[_Z14test_functionsIDFb16_EvPT_PiPlPx]+0x738): undefined reference to `__floatdibf' (4 times). I need to compare to x86, I believe we want to do a DI -> SF conversion followed by SF -> BF, but it is unclear why that isn't happening. +FAIL: gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_opt_n_1.c -march=armv8.2-a+sve -moverride=tune=none (test for errors, line 21) svbfdot (f32, bf16, 0); /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ This test tests for something that no longer fails, so could be just adjusted. +FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C -std=c++11 scan-assembler \\t.global\\t_Z1fPu6__bf16 +FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C -std=c++11 scan-assembler \\t.global\\t_Z1gPu6__bf16S_ +FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C -std=c++11 scan-assembler \\t.global\\t_ZN1SIu6__bf16u6__bf16E1iE +FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C -std=c++14 scan-assembler \\t.global\\t_Z1fPu6__bf16 +FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C -std=c++14 scan-assembler \\t.global\\t_Z1gPu6__bf16S_ +FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C -std=c++14 scan-assembler \\t.global\\t_ZN1SIu6__bf16u6__bf16E1iE +FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C -std=c++17 scan-assembler \\t.global\\t_Z1fPu6__bf16 +FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C -std=c++17 scan-assembler \\t.global\\t_Z1gPu6__bf16S_ +FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C -std=c++17 scan-assembler \\t.global\\t_ZN1SIu6__bf16u6__bf16E1iE +FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C -std=c++20 scan-assembler \\t.global\\t_Z1fPu6__bf16 +FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C -std=c++20 scan-assembler \\t.global\\t_Z1gPu6__bf16S_ +FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C -std=c++20 scan-assembler \\t.global\\t_ZN1SIu6__bf16u6__bf16E1iE +FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C -std=c++2b scan-assembler \\t.global\\t_Z1fPu6__bf16 +FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C -std=c++2b scan-assembler \\t.global\\t_Z1gPu6__bf16S_ +FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C -std=c++2b scan-assembler \\t.global\\t_ZN1SIu6__bf16u6__bf16E1iE +FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C -std=c++98 scan-assembler \\t.global\\t_Z1fPu6__bf16 +FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C -std=c++98 scan-assembler \\t.global\\t_Z1gPu6__bf16S_ +FAIL: g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C -std=c++98 scan-assembler \\t.global\\t_ZN1SIu6__bf16u6__bf16E1iE These test the mangling, which changed from u6__bf16 to the standard DF16b. Now, while on x86 we change the mangling and behavior of __bf16, it doesn't need to be necessarily like that on aarch64 (although it would be nice for consistency), for C++ portable code would just use std::bfloat16_t type which is in libstdc++ defined to decltype(0.0bf16). So, if you want to keep previous mangling of __bf16 type or keep it storage only type, we can always register some other name (__bfloat16_t or whatever), make __bf16 and __bfloat16_t be distinct types (former aarch64_bf16_type_node in the compiler, the latter bfloat16_type_node) and thus have 0.0bf16 have the latter type and libstdc++ using it. 2022-11-13 Jakub Jelinek <ja...@redhat.com> gcc/ * config/aarch64/aarch64.h (aarch64_bf16_type_node): Remove. (aarch64_bf16_ptr_type_node): Adjust comment. * config/aarch64/aarch64.cc (aarch64_gimplify_va_arg_expr): Use bfloat16_type_node rather than aarch64_bf16_type_node. (aarch64_mangle_type): Mangle BFmode as DF16b. (aarch64_libgcc_floating_mode_supported_p, aarch64_scalar_mode_supported_p): Also support BFmode. (aarch64_invalid_conversion, aarch64_invalid_unary_op): Remove. aarch64_invalid_binary_op): Remove BFmode related rejections. (TARGET_INVALID_CONVERSION, TARGET_INVALID_UNARY_OP): Don't redefine. * config/aarch64/aarch64-builtins.cc (aarch64_bf16_type_node): Remove. (aarch64_int_or_fp_type): Use bfloat16_type_node rather than aarch64_bf16_type_node. (aarch64_init_simd_builtin_types): Likewise. (aarch64_init_bf16_types): Likewise. Don't create bfloat16_type_node, which is created in tree.cc already. * config/aarch64/aarch64-sve-builtins.def (svbfloat16_t): Likewise. libgcc/ * config/aarch64/t-softfp (softfp_extensions): Add bfsf. (softfp_truncations): Add tfbf dfbf sfbf hfbf. * config/aarch64/libgcc-softfp.ver (GCC_13.0.0): Export __extendbfsf2 and __trunc{s,d,t,h}fbf2. * config/aarch64/sfp-machine.h (_FP_NANFRAC_B, _FP_NANSIGN_B): Define. --- gcc/config/aarch64/aarch64.h +++ gcc/config/aarch64/aarch64.h @@ -1220,9 +1220,8 @@ extern const char *aarch64_rewrite_mcpu (int argc, const char **argv); extern GTY(()) tree aarch64_fp16_type_node; extern GTY(()) tree aarch64_fp16_ptr_type_node; -/* This type is the user-visible __bf16, and a pointer to that type. Defined - in aarch64-builtins.cc. */ -extern GTY(()) tree aarch64_bf16_type_node; +/* Pointer to the user-visible __bf16 type. __bf16 itself is generic + bfloat16_type_node. Defined in aarch64-builtins.cc. */ extern GTY(()) tree aarch64_bf16_ptr_type_node; /* The generic unwind code in libgcc does not initialize the frame pointer. --- gcc/config/aarch64/aarch64-builtins.cc +++ gcc/config/aarch64/aarch64-builtins.cc @@ -918,7 +918,6 @@ tree aarch64_fp16_type_node = NULL_TREE; tree aarch64_fp16_ptr_type_node = NULL_TREE; /* Back-end node type for brain float (bfloat) types. */ -tree aarch64_bf16_type_node = NULL_TREE; tree aarch64_bf16_ptr_type_node = NULL_TREE; /* Wrapper around add_builtin_function. NAME is the name of the built-in @@ -1010,7 +1009,7 @@ aarch64_int_or_fp_type (machine_mode mode, case E_DFmode: return double_type_node; case E_BFmode: - return aarch64_bf16_type_node; + return bfloat16_type_node; default: gcc_unreachable (); } @@ -1124,8 +1123,8 @@ aarch64_init_simd_builtin_types (void) aarch64_simd_types[Float64x2_t].eltype = double_type_node; /* Init Bfloat vector types with underlying __bf16 type. */ - aarch64_simd_types[Bfloat16x4_t].eltype = aarch64_bf16_type_node; - aarch64_simd_types[Bfloat16x8_t].eltype = aarch64_bf16_type_node; + aarch64_simd_types[Bfloat16x4_t].eltype = bfloat16_type_node; + aarch64_simd_types[Bfloat16x8_t].eltype = bfloat16_type_node; for (i = 0; i < nelts; i++) { @@ -1197,7 +1196,7 @@ aarch64_init_simd_builtin_scalar_types (void) "__builtin_aarch64_simd_poly128"); (*lang_hooks.types.register_builtin_type) (intTI_type_node, "__builtin_aarch64_simd_ti"); - (*lang_hooks.types.register_builtin_type) (aarch64_bf16_type_node, + (*lang_hooks.types.register_builtin_type) (bfloat16_type_node, "__builtin_aarch64_simd_bf"); /* Unsigned integer types for various mode sizes. */ (*lang_hooks.types.register_builtin_type) (unsigned_intQI_type_node, @@ -1682,13 +1681,8 @@ aarch64_init_fp16_types (void) static void aarch64_init_bf16_types (void) { - aarch64_bf16_type_node = make_node (REAL_TYPE); - TYPE_PRECISION (aarch64_bf16_type_node) = 16; - SET_TYPE_MODE (aarch64_bf16_type_node, BFmode); - layout_type (aarch64_bf16_type_node); - - lang_hooks.types.register_builtin_type (aarch64_bf16_type_node, "__bf16"); - aarch64_bf16_ptr_type_node = build_pointer_type (aarch64_bf16_type_node); + lang_hooks.types.register_builtin_type (bfloat16_type_node, "__bf16"); + aarch64_bf16_ptr_type_node = build_pointer_type (bfloat16_type_node); } /* Pointer authentication builtins that will become NOP on legacy platform. --- gcc/config/aarch64/aarch64.cc +++ gcc/config/aarch64/aarch64.cc @@ -19823,7 +19823,7 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, field_ptr_t = aarch64_fp16_ptr_type_node; break; case E_BFmode: - field_t = aarch64_bf16_type_node; + field_t = bfloat16_type_node; field_ptr_t = aarch64_bf16_ptr_type_node; break; case E_V2SImode: @@ -20730,7 +20730,7 @@ aarch64_mangle_type (const_tree type) if (TYPE_MAIN_VARIANT (type) == float16_type_node) return NULL; if (TYPE_MODE (type) == BFmode) - return "u6__bf16"; + return "DF16b"; else return "Dh"; } @@ -26428,18 +26428,18 @@ aarch64_dwarf_poly_indeterminate_value (unsigned int i, unsigned int *factor, } /* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE - if MODE is HFmode, and punt to the generic implementation otherwise. */ + if MODE is [BH]Fmode, and punt to the generic implementation otherwise. */ static bool aarch64_libgcc_floating_mode_supported_p (scalar_float_mode mode) { - return (mode == HFmode + return ((mode == HFmode || mode == BFmode) ? true : default_libgcc_floating_mode_supported_p (mode)); } /* Implement TARGET_SCALAR_MODE_SUPPORTED_P - return TRUE - if MODE is HFmode, and punt to the generic implementation otherwise. */ + if MODE is [BH]Fmode, and punt to the generic implementation otherwise. */ static bool aarch64_scalar_mode_supported_p (scalar_mode mode) @@ -26447,7 +26447,7 @@ aarch64_scalar_mode_supported_p (scalar_mode mode) if (DECIMAL_FLOAT_MODE_P (mode)) return default_decimal_float_supported_p (); - return (mode == HFmode + return ((mode == HFmode || mode == BFmode) ? true : default_scalar_mode_supported_p (mode)); } @@ -26905,39 +26905,6 @@ aarch64_stack_protect_guard (void) return NULL_TREE; } -/* Return the diagnostic message string if conversion from FROMTYPE to - TOTYPE is not allowed, NULL otherwise. */ - -static const char * -aarch64_invalid_conversion (const_tree fromtype, const_tree totype) -{ - if (element_mode (fromtype) != element_mode (totype)) - { - /* Do no allow conversions to/from BFmode scalar types. */ - if (TYPE_MODE (fromtype) == BFmode) - return N_("invalid conversion from type %<bfloat16_t%>"); - if (TYPE_MODE (totype) == BFmode) - return N_("invalid conversion to type %<bfloat16_t%>"); - } - - /* Conversion allowed. */ - return NULL; -} - -/* Return the diagnostic message string if the unary operation OP is - not permitted on TYPE, NULL otherwise. */ - -static const char * -aarch64_invalid_unary_op (int op, const_tree type) -{ - /* Reject all single-operand operations on BFmode except for &. */ - if (element_mode (type) == BFmode && op != ADDR_EXPR) - return N_("operation not permitted on type %<bfloat16_t%>"); - - /* Operation allowed. */ - return NULL; -} - /* Return the diagnostic message string if the binary operation OP is not permitted on TYPE1 and TYPE2, NULL otherwise. */ @@ -26945,11 +26912,6 @@ static const char * aarch64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2) { - /* Reject all 2-operand operations on BFmode. */ - if (element_mode (type1) == BFmode - || element_mode (type2) == BFmode) - return N_("operation not permitted on type %<bfloat16_t%>"); - if (VECTOR_TYPE_P (type1) && VECTOR_TYPE_P (type2) && !TYPE_INDIVISIBLE_P (type1) @@ -27546,12 +27508,6 @@ aarch64_libgcc_floating_mode_supported_p #undef TARGET_MANGLE_TYPE #define TARGET_MANGLE_TYPE aarch64_mangle_type -#undef TARGET_INVALID_CONVERSION -#define TARGET_INVALID_CONVERSION aarch64_invalid_conversion - -#undef TARGET_INVALID_UNARY_OP -#define TARGET_INVALID_UNARY_OP aarch64_invalid_unary_op - #undef TARGET_INVALID_BINARY_OP #define TARGET_INVALID_BINARY_OP aarch64_invalid_binary_op --- gcc/config/aarch64/aarch64-sve-builtins.def +++ gcc/config/aarch64/aarch64-sve-builtins.def @@ -61,7 +61,7 @@ DEF_SVE_MODE (u64offset, none, svuint64_t, bytes) DEF_SVE_MODE (vnum, none, none, vectors) DEF_SVE_TYPE (svbool_t, 10, __SVBool_t, boolean_type_node) -DEF_SVE_TYPE (svbfloat16_t, 14, __SVBfloat16_t, aarch64_bf16_type_node) +DEF_SVE_TYPE (svbfloat16_t, 14, __SVBfloat16_t, bfloat16_type_node) DEF_SVE_TYPE (svfloat16_t, 13, __SVFloat16_t, aarch64_fp16_type_node) DEF_SVE_TYPE (svfloat32_t, 13, __SVFloat32_t, float_type_node) DEF_SVE_TYPE (svfloat64_t, 13, __SVFloat64_t, double_type_node) --- libgcc/config/aarch64/t-softfp +++ libgcc/config/aarch64/t-softfp @@ -1,7 +1,7 @@ softfp_float_modes := tf softfp_int_modes := si di ti -softfp_extensions := sftf dftf hftf -softfp_truncations := tfsf tfdf tfhf +softfp_extensions := sftf dftf hftf bfsf +softfp_truncations := tfsf tfdf tfhf tfbf dfbf sfbf hfbf softfp_exclude_libgcc2 := n softfp_extras := fixhfti fixunshfti floattihf floatuntihf --- libgcc/config/aarch64/libgcc-softfp.ver +++ libgcc/config/aarch64/libgcc-softfp.ver @@ -26,3 +26,12 @@ GCC_11.0 { __mulhc3 __trunctfhf2 } + +%inherit GCC_13.0.0 GCC_11.0.0 +GCC_13.0.0 { + __extendbfsf2 + __truncdfbf2 + __truncsfbf2 + __trunctfbf2 + __trunchfbf2 +} --- libgcc/config/aarch64/sfp-machine.h +++ libgcc/config/aarch64/sfp-machine.h @@ -43,10 +43,12 @@ typedef int __gcc_CMPtype __attribute__ ((mode (__libgcc_cmp_return__))); #define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_2_udiv(Q,R,X,Y) #define _FP_NANFRAC_H ((_FP_QNANBIT_H << 1) - 1) +#define _FP_NANFRAC_B ((_FP_QNANBIT_B << 1) - 1) #define _FP_NANFRAC_S ((_FP_QNANBIT_S << 1) - 1) #define _FP_NANFRAC_D ((_FP_QNANBIT_D << 1) - 1) #define _FP_NANFRAC_Q ((_FP_QNANBIT_Q << 1) - 1), -1 #define _FP_NANSIGN_H 0 +#define _FP_NANSIGN_B 0 #define _FP_NANSIGN_S 0 #define _FP_NANSIGN_D 0 #define _FP_NANSIGN_Q 0 Jakub