[GCC][PATCH][ARM] Add Bfloat16_t scalar type, vector types and machine modes to ARM back-end

Stam Markianos-Wright Fri, 20 Dec 2019 06:36:44 -0800

Hi all,

This patch was developed at the same time as the aarch64 version. Richards' 
feedback on that one also applies here and we'll be addressing them in a respin.


However, it's still useful to get this up for everyone (including ARM 
maintainers) to look and and comment, too.

For reference , the latest emails in the Aarch64 thread are at:
https://gcc.gnu.org/ml/gcc-patches/2019-12/msg01364.html
https://gcc.gnu.org/ml/gcc-patches/2019-12/msg01362.html

(The respin will also be split into two in a similar fashion to the Aarch64 
version)

Regression testing on arm-none-eabi passed successfully.

This patch depends on:

https://gcc.gnu.org/ml/gcc-patches/2019-12/msg00857.html

for test suite effective_target update.

Cheers,
Stam


ACLE documents are at https://developer.arm.com/docs/101028/latest
ISA documents are at https://developer.arm.com/docs/ddi0596/latest

Details on ARM Bfloat can be found here:
https://community.arm.com/developer/ip-products/processors/b/ml-ip-blog/posts/bfloat16-processing-for-neural-networks-on-armv8_2d00_a
 



gcc/ChangeLog:

2019-12-16  Stam Markianos-Wright  <stam.markianos-wri...@arm.com>

        * config.gcc: Add arm_bf16.h.
        * config/arm/arm-builtins.c (arm_mangle_builtin_type): Fix comment.
        (arm_simd_builtin_std_type): Add BFmode.
        (arm_init_simd_builtin_types): Define element types for vector types.
        (arm_init_bf16_types): New function.
        (arm_init_builtins): Add arm_init_bf16_types function call.
        * config/arm/arm-modes.def: Add BFmode and V4BF, V8BF vector modes.
        * config/arm/arm-simd-builtin-types.def: Add V4BF, V8BF.
        * config/arm/arm.c (aapcs_vfp_sub_candidate): Add BFmode.
        (arm_hard_regno_mode_ok): Add BFmode and tidy up statements.
        (arm_vector_mode_supported_p): Add V4BF, V8BF.
        (arm_invalid_conversion): New function for target hook.
        (arm_invalid_unary_op): New function for target hook.
        (arm_invalid_binary_op): New function for target hook.
        * config/arm/arm.h: Add V4BF, V8BF to VALID_NEON_DREG_MODE,
          VALID_NEON_QREG_MODE respectively. Add export arm_bf16_type_node,
          arm_bf16_ptr_type_node.
        * config/arm/arm.md: New enabled_for_bfmode_scalar,
          enabled_for_bfmode_vector attributes. Add BFmode to movhf expand.
          pattern and define_split between ARM registers.
        * config/arm/arm_bf16.h: New file.
        * config/arm/arm_neon.h: Add arm_bf16.h and Bfloat vector types.
        * config/arm/iterators.md (ANY64_BF, VDXMOV, VHFBF, HFBF, fporbf): New.
          (VQXMOV): Add V8BF.
        * config/arm/neon.md: Add BF vector types to NEON move patterns.
        * config/arm/vfp.md: Add BFmode to movhf_vfp pattern.

2019-12-16  Stam Markianos-Wright  <stam.markianos-wri...@arm.com>

        * gcc.target/arm/bfloat16_compile-1.c: New test.
        * gcc.target/arm/bfloat16_compile-2.c: New test.
        * gcc.target/arm/bfloat16_compile-3.c: New test.
        * gcc.target/arm/bfloat16_compile-4.c: New test.
        * gcc.target/arm/bfloat16_scalar_typecheck.c: New test.
        * gcc.target/arm/bfloat16_vector_typecheck1.c: New test.
        * gcc.target/arm/bfloat16_vector_typecheck2.c: New test.

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 5aa0130135fa3ce95df502b3f84e78832b368375..bf1b6319643cf333321970495f846392983255bd 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -344,7 +344,7 @@ arc*-*-*)
 arm*-*-*)
 	cpu_type=arm
 	extra_objs="arm-builtins.o aarch-common.o"
-	extra_headers="mmintrin.h arm_neon.h arm_acle.h arm_fp16.h arm_cmse.h"
+	extra_headers="mmintrin.h arm_neon.h arm_acle.h arm_fp16.h arm_cmse.h arm_bf16.h"
 	target_type_format_char='%'
 	c_target_objs="arm-c.o"
 	cxx_target_objs="arm-c.o"
diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
index 2d902d0b325bc1fe5e22831ef8a59a2bb37c1225..b998a4b935d522ca9ec7b5a928fc6bcc6649d5a3 100644
--- a/gcc/config/arm/arm-builtins.c
+++ b/gcc/config/arm/arm-builtins.c
@@ -315,12 +315,14 @@ arm_set_sat_qualifiers[SIMD_MAX_BUILTIN_ARGS]
 #define v8qi_UP  E_V8QImode
 #define v4hi_UP  E_V4HImode
 #define v4hf_UP  E_V4HFmode
+#define v4bf_UP  E_V4BFmode
 #define v2si_UP  E_V2SImode
 #define v2sf_UP  E_V2SFmode
 #define di_UP    E_DImode
 #define v16qi_UP E_V16QImode
 #define v8hi_UP  E_V8HImode
 #define v8hf_UP  E_V8HFmode
+#define v8bf_UP  E_V8BFmode
 #define v4si_UP  E_V4SImode
 #define v4sf_UP  E_V4SFmode
 #define v2di_UP  E_V2DImode
@@ -328,9 +330,10 @@ arm_set_sat_qualifiers[SIMD_MAX_BUILTIN_ARGS]
 #define ei_UP	 E_EImode
 #define oi_UP	 E_OImode
 #define hf_UP	 E_HFmode
+#define bf_UP    E_BFmode
 #define si_UP	 E_SImode
 #define void_UP	 E_VOIDmode
-
+#define sf_UP	 E_SFmode
 #define UP(X) X##_UP
 
 typedef struct {
@@ -806,6 +809,11 @@ static struct arm_simd_type_info arm_simd_types [] = {
 
 /* The user-visible __fp16 type.  */
 tree arm_fp16_type_node = NULL_TREE;
+
+/* Back-end node type for brain float (bfloat) types.  */
+tree arm_bf16_type_node = NULL_TREE;
+tree arm_bf16_ptr_type_node = NULL_TREE;
+
 static tree arm_simd_intOI_type_node = NULL_TREE;
 static tree arm_simd_intEI_type_node = NULL_TREE;
 static tree arm_simd_intCI_type_node = NULL_TREE;
@@ -856,7 +864,7 @@ const char *
 arm_mangle_builtin_type (const_tree type)
 {
   const char *mangle;
-  /* Walk through all the AArch64 builtins types tables to filter out the
+  /* Walk through all the Arm builtins types tables to filter out the
      incoming type.  */
   if ((mangle = arm_mangle_builtin_vector_type (type))
       || (mangle = arm_mangle_builtin_scalar_type (type)))
@@ -897,6 +905,8 @@ arm_simd_builtin_std_type (machine_mode mode,
       return float_type_node;
     case E_DFmode:
       return double_type_node;
+    case E_BFmode:
+      return arm_bf16_type_node;
     default:
       gcc_unreachable ();
     }
@@ -1002,6 +1012,10 @@ arm_init_simd_builtin_types (void)
   arm_simd_types[Float32x2_t].eltype = float_type_node;
   arm_simd_types[Float32x4_t].eltype = float_type_node;
 
+  /* Init Bfloat vector types with underlying __bf16 scalar type.  */
+  arm_simd_types[Bfloat16x4_t].eltype = arm_bf16_type_node;
+  arm_simd_types[Bfloat16x8_t].eltype = arm_bf16_type_node;
+
   for (i = 0; i < nelts; i++)
     {
       tree eltype = arm_simd_types[i].eltype;
@@ -1187,6 +1201,19 @@ arm_init_builtin (unsigned int fcode, arm_builtin_datum *d,
   arm_builtin_decls[fcode] = fndecl;
 }
 
+/* Initialize the backend REAL_TYPE type supporting bfloat types.  */
+static void
+arm_init_bf16_types (void)
+{
+  arm_bf16_type_node = make_node (REAL_TYPE);
+  TYPE_PRECISION (arm_bf16_type_node) = 16;
+  SET_TYPE_MODE (arm_bf16_type_node, BFmode);
+  layout_type (arm_bf16_type_node);
+
+  (*lang_hooks.types.register_builtin_type) (arm_bf16_type_node, "__bf16");
+  arm_bf16_ptr_type_node = build_pointer_type (arm_bf16_type_node);
+}
+
 /* Set up ACLE builtins, even builtins for instructions that are not
    in the current target ISA to allow the user to compile particular modules
    with different target specific options that differ from the command line
@@ -1955,6 +1982,8 @@ arm_init_builtins (void)
      arm_init_neon_builtins which uses it.  */
   arm_init_fp16_builtins ();
 
+  arm_init_bf16_types ();
+
   if (TARGET_MAYBE_HARD_FLOAT)
     {
       arm_init_neon_builtins ();
diff --git a/gcc/config/arm/arm-modes.def b/gcc/config/arm/arm-modes.def
index 2ce53e0efba46608026f334794b6cdb8c2c3c957..80c3c1a6eb258d116b07ad71fafafc9befb76e8b 100644
--- a/gcc/config/arm/arm-modes.def
+++ b/gcc/config/arm/arm-modes.def
@@ -78,6 +78,11 @@ VECTOR_MODES (FLOAT, 8);      /*            V4HF V2SF */
 VECTOR_MODES (FLOAT, 16);     /*       V8HF V4SF V2DF */
 VECTOR_MODE (FLOAT, HF, 2);   /*                 V2HF */
 
+FLOAT_MODE (BF, 2, 0);
+ADJUST_FLOAT_FORMAT (BF, &arm_bfloat_half_format);
+VECTOR_MODE (FLOAT, BF, 4);   /*		 V4BF.  */
+VECTOR_MODE (FLOAT, BF, 8);   /*		 V8BF.  */
+
 /* Fraction and accumulator vector modes.  */
 VECTOR_MODES (FRACT, 4);      /* V4QQ  V2HQ */
 VECTOR_MODES (UFRACT, 4);     /* V4UQQ V2UHQ */
diff --git a/gcc/config/arm/arm-simd-builtin-types.def b/gcc/config/arm/arm-simd-builtin-types.def
index aff7926d58832b685b150e0d064c26f967a5ce83..ee240f85c5618417fff039ec43b81641b187c126 100644
--- a/gcc/config/arm/arm-simd-builtin-types.def
+++ b/gcc/config/arm/arm-simd-builtin-types.def
@@ -48,3 +48,5 @@
   ENTRY (Float16x8_t, V8HF, none, 128, float16, 19)
   ENTRY (Float32x4_t, V4SF, none, 128, float32, 19)
 
+  ENTRY (Bfloat16x4_t, V4BF, none, 64, bfloat16, 20)
+  ENTRY (Bfloat16x8_t, V8BF, none, 128, bfloat16, 20)
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 6c8ff6637d20f295d823e5e0f65e3abecf1254b6..a4b51c28512663e2567a009169e6bb1f7f3155c2 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -81,6 +81,11 @@ extern void (*arm_lang_output_object_attributes_hook)(void);
    the backend.  Defined in arm-builtins.c.  */
 extern tree arm_fp16_type_node;
 
+/* This type is the user-visible __bf16.  We need it in a few places in
+   the backend.  Defined in arm-builtins.c.  */
+extern tree arm_bf16_type_node;
+extern tree arm_bf16_ptr_type_node;
+
 
 #undef  CPP_SPEC
 #define CPP_SPEC "%(subtarget_cpp_spec)					\
@@ -1019,12 +1024,14 @@ extern int arm_arch_bf16;
 /* Modes valid for Neon D registers.  */
 #define VALID_NEON_DREG_MODE(MODE) \
   ((MODE) == V2SImode || (MODE) == V4HImode || (MODE) == V8QImode \
-   || (MODE) == V4HFmode || (MODE) == V2SFmode || (MODE) == DImode)
+   || (MODE) == V4HFmode || (MODE) == V2SFmode || (MODE) == DImode \
+   || (MODE) == V4BFmode)
 
 /* Modes valid for Neon Q registers.  */
 #define VALID_NEON_QREG_MODE(MODE) \
   ((MODE) == V4SImode || (MODE) == V8HImode || (MODE) == V16QImode \
-   || (MODE) == V8HFmode || (MODE) == V4SFmode || (MODE) == V2DImode)
+   || (MODE) == V8HFmode || (MODE) == V4SFmode || (MODE) == V2DImode \
+   || (MODE) == V8BFmode)
 
 /* Structure modes valid for Neon registers.  */
 #define VALID_NEON_STRUCT_MODE(MODE) \
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 378601fd035d3cacde08ab868b08f281f4f03e18..d04f1527d4017e6f938fb3298f7f3dae20f5e78c 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -688,6 +688,15 @@ static const struct attribute_spec arm_attribute_table[] =
 #undef TARGET_MANGLE_TYPE
 #define TARGET_MANGLE_TYPE arm_mangle_type
 
+#undef TARGET_INVALID_CONVERSION
+#define TARGET_INVALID_CONVERSION arm_invalid_conversion
+
+#undef TARGET_INVALID_UNARY_OP
+#define TARGET_INVALID_UNARY_OP arm_invalid_unary_op
+
+#undef TARGET_INVALID_BINARY_OP
+#define TARGET_INVALID_BINARY_OP arm_invalid_binary_op
+
 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
 
@@ -6029,7 +6038,7 @@ aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
     {
     case REAL_TYPE:
       mode = TYPE_MODE (type);
-      if (mode != DFmode && mode != SFmode && mode != HFmode)
+      if (mode != DFmode && mode != SFmode && mode != HFmode && mode != BFmode)
 	return -1;
 
       if (*modep == VOIDmode)
@@ -24497,17 +24506,11 @@ arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
 
   if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
     {
-      if (mode == SFmode || mode == SImode)
-	return VFP_REGNO_OK_FOR_SINGLE (regno);
-
       if (mode == DFmode)
 	return VFP_REGNO_OK_FOR_DOUBLE (regno);
 
-      if (mode == HFmode)
-	return VFP_REGNO_OK_FOR_SINGLE (regno);
-
-      /* VFP registers can hold HImode values.  */
-      if (mode == HImode)
+      if (mode == HFmode || mode == BFmode || mode == HImode
+	  || mode == SFmode || mode == SImode)
 	return VFP_REGNO_OK_FOR_SINGLE (regno);
 
       if (TARGET_NEON)
@@ -28043,6 +28046,10 @@ arm_vector_mode_supported_p (machine_mode mode)
       || mode == V2DImode || mode == V8HFmode))
     return true;
 
+  /* This includes V4BF and V8BF only.  */
+  if (TARGET_BF16_SIMD && (mode == V4BFmode || mode == V8BFmode))
+    return true;
+
   if ((TARGET_NEON || TARGET_IWMMXT)
       && ((mode == V2SImode)
 	  || (mode == V4HImode)
@@ -32363,6 +32370,69 @@ arm_coproc_ldc_stc_legitimate_address (rtx op)
   return false;
 }
 
+
+/* Return the diagnostic message string if conversion from FROMTYPE to
+   TOTYPE is not allowed, NULL otherwise.  */
+
+static const char *
+arm_invalid_conversion (const_tree fromtype, const_tree totype)
+{
+  static char templ[100];
+  if ((GET_MODE_INNER (TYPE_MODE (fromtype)) == BFmode
+       || GET_MODE_INNER (TYPE_MODE (totype)) == BFmode)
+       && TYPE_MODE (fromtype) != TYPE_MODE (totype))
+  {
+    snprintf (templ, sizeof (templ),
+      "incompatible types when assigning to type '%s' from type '%s'",
+      IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (totype))),
+      IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (fromtype))));
+
+    return N_(templ);
+  }
+  /* Conversion allowed.  */
+  return NULL;
+}
+
+/* Return the diagnostic message string if the unary operation OP is
+   not permitted on TYPE, NULL otherwise.  */
+
+static const char *
+arm_invalid_unary_op (int op, const_tree type)
+{
+  static char templ[100];
+  /* Reject all operations on BFmode except for &.  */
+  if (GET_MODE_INNER (TYPE_MODE (type)) == BFmode && op != ADDR_EXPR)
+  {
+    snprintf (templ, sizeof (templ),
+      "operation not permitted on type '%s'",
+      IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))));
+    return N_(templ);
+  }
+  return NULL;
+}
+
+/* Return the diagnostic message string if the binary operation OP is
+   not permitted on TYPE1 and TYPE2, NULL otherwise.  */
+
+static const char *
+arm_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
+		       const_tree type2)
+{
+  static char templ[100];
+  /* Reject operations on BFmode.  */
+  if (GET_MODE_INNER (TYPE_MODE (type1)) == BFmode
+      || GET_MODE_INNER (TYPE_MODE (type2)) == BFmode)
+  {
+    snprintf (templ, sizeof (templ),
+      "operation not permitted on types '%s', '%s'",
+      IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type1))),
+      IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type2))));
+    return N_(templ);
+  }
+  return NULL;
+}
+
+
 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
 
    In VFPv1, VFP registers could only be accessed in the mode they were
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 823312e7eac61b21bc61269ecbc902d1a37e0304..24fc30d515668620c7aecc905bb8f28d693cf16d 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -128,7 +128,7 @@
 ; arm_arch6.  "v6t2" for Thumb-2 with arm_arch6 and "v8mb" for ARMv8-M
 ; Baseline.  This attribute is used to compute attribute "enabled",
 ; use type "any" to enable an alternative in all cases.
-(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,v6t2,v8mb,iwmmxt,iwmmxt2,armv6_or_vfpv3,neon"
+(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,v6t2,v8mb,iwmmxt,iwmmxt2,armv6_or_vfpv3,neon,not_bf16_scalar,not_bf16_vector"
   (const_string "any"))
 
 (define_attr "arch_enabled" "no,yes"
@@ -182,6 +182,14 @@
 	 (and (eq_attr "arch" "neon")
 	      (match_test "TARGET_NEON"))
 	 (const_string "yes")
+
+	 (and (eq_attr "arch" "not_bf16_scalar")
+	      (match_test "!TARGET_BF16_FP"))
+	 (const_string "yes")
+
+	 (and (eq_attr "arch" "not_bf16_vector")
+	      (match_test "!TARGET_BF16_SIMD"))
+	 (const_string "yes")
 	]
 
 	(const_string "no")))
@@ -6181,8 +6189,8 @@
 )
 
 (define_split
-  [(set (match_operand:ANY64 0 "arm_general_register_operand" "")
-	(match_operand:ANY64 1 "arm_general_register_operand" ""))]
+  [(set (match_operand:ANY64_BF 0 "arm_general_register_operand" "")
+	(match_operand:ANY64_BF 1 "arm_general_register_operand" ""))]
   "TARGET_EITHER && reload_completed"
   [(set (match_dup 0) (match_dup 1))
    (set (match_dup 2) (match_dup 3))]
@@ -7130,25 +7138,25 @@
    (set_attr "length" "2,4,4,2,4,2,2,4,4")]
 )
 
-;; HFmode moves
-(define_expand "movhf"
-  [(set (match_operand:HF 0 "general_operand")
-	(match_operand:HF 1 "general_operand"))]
+;; HFmode and BFmode moves.
+(define_expand "mov<mode>"
+  [(set (match_operand:HFBF 0 "general_operand")
+	(match_operand:HFBF 1 "general_operand"))]
   "TARGET_EITHER"
   "
-  gcc_checking_assert (aligned_operand (operands[0], HFmode));
-  gcc_checking_assert (aligned_operand (operands[1], HFmode));
+  gcc_checking_assert (aligned_operand (operands[0], <MODE>mode));
+  gcc_checking_assert (aligned_operand (operands[1], <MODE>mode));
   if (TARGET_32BIT)
     {
       if (MEM_P (operands[0]))
-        operands[1] = force_reg (HFmode, operands[1]);
+	operands[1] = force_reg (<MODE>mode, operands[1]);
     }
   else /* TARGET_THUMB1 */
     {
       if (can_create_pseudo_p ())
         {
            if (!REG_P (operands[0]))
-	     operands[1] = force_reg (HFmode, operands[1]);
+	     operands[1] = force_reg (<MODE>mode, operands[1]);
         }
     }
   "
diff --git a/gcc/config/arm/arm_bf16.h b/gcc/config/arm/arm_bf16.h
new file mode 100644
index 0000000000000000000000000000000000000000..39f759a87bb4a54ffde2365794bb1131a2371a14
--- /dev/null
+++ b/gcc/config/arm/arm_bf16.h
@@ -0,0 +1,54 @@
+/* Arm BF16 intrinsics include file.
+
+   Copyright (C) 2019 Free Software Foundation, Inc.
+   Contributed by Arm Ltd.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+
+#ifndef _GCC_ARM_BF16_H
+#define _GCC_ARM_BF16_H 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+#pragma GCC push_options
+#pragma GCC target ("fpu=fp-armv8")
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.2-a+bf16")
+
+
+#ifdef __ARM_FEATURE_BF16_SCALAR_ARITHMETIC
+
+typedef __bf16 bfloat16_t;
+typedef float float32_t;
+
+#endif
+
+
+#pragma GCC pop_options
+#pragma GCC pop_options
+
+#endif
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index 1f200d491d1de3993bc3a682d586da137958ff6b..231adf8299bcbd01d12ae2f8a4d2afcee145e1f3 100644
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -39,6 +39,7 @@ extern "C" {
 #endif
 
 #include <arm_fp16.h>
+#include <arm_bf16.h>
 #include <stdint.h>
 
 typedef __simd64_int8_t int8x8_t;
@@ -83,6 +84,17 @@ typedef __simd128_uint64_t uint64x2_t;
 
 typedef float float32_t;
 
+#pragma GCC push_options
+#pragma GCC target ("fpu=neon-fp-armv8")
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.2-a+bf16")
+#ifdef __ARM_FEATURE_BF16_VECTOR_ARITHMETIC
+typedef __simd128_bfloat16_t bfloat16x8_t;
+typedef __simd64_bfloat16_t bfloat16x4_t;
+#endif
+#pragma GCC pop_options
+#pragma GCC pop_options
+
 /* The Poly types are user visible and live in their own world,
    keep them that way.  */
 typedef __builtin_neon_poly8 poly8_t;
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index c412851843f4468c2c18bce264288705e076ac50..439021fa0733ac31706287c4f98d62b080afc3a1 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -28,6 +28,12 @@
 ;; registers.
 (define_mode_iterator ANY64 [DI DF V8QI V4HI V4HF V2SI V2SF])
 
+;; Additional definition of ANY64 that also includes the special V4BF mode.
+;; BFmode is allowed only on define_split between ARM registers.
+(define_mode_iterator ANY64_BF [(DI "") (DF "") (V8QI "") (V4HI "")
+				(V4BF "TARGET_BF16_SIMD") (V4HF "")
+				(V2SI "") (V2SF "")])
+
 (define_mode_iterator ANY128 [V2DI V2DF V16QI V8HI V4SI V4SF])
 
 ;; A list of integer modes that are up to one word long
@@ -80,6 +86,10 @@
 ;; Double-width vector modes plus 64-bit elements.
 (define_mode_iterator VDX [V8QI V4HI V4HF V2SI V2SF DI])
 
+;; Double-width vector modes plus 64-bit elements,
+;; with V4BFmode added, suitable for moves.
+(define_mode_iterator VDXMOV [V8QI V4HI V4HF V4BF V2SI V2SF DI])
+
 ;; Double-width vector modes, with V4HF - for vldN_lane and vstN_lane.
 (define_mode_iterator VD_LANE [V8QI V4HI V4HF V2SI V2SF])
 
@@ -101,8 +111,8 @@
 ;; Quad-width vector modes without floating-point elements.
 (define_mode_iterator VQI [V16QI V8HI V4SI])
 
-;; Quad-width vector modes, with TImode added, for moves.
-(define_mode_iterator VQXMOV [V16QI V8HI V8HF V4SI V4SF V2DI TI])
+;; Quad-width vector modes, with TImode and V8BFmode added, suitable for moves.
+(define_mode_iterator VQXMOV [V16QI V8HI V8HF V8BF V4SI V4SF V2DI TI])
 
 ;; Opaque structure types wider than TImode.
 (define_mode_iterator VSTRUCT [EI OI CI XI])
@@ -201,6 +211,13 @@
 ;; Vector modes for 16-bit floating-point support.
 (define_mode_iterator VH [V8HF V4HF])
 
+;; 16-bit floating-point vector modes suitable for moving (includes BFmode).
+(define_mode_iterator VHFBF [(V8HF "") (V4HF "") (V4BF "TARGET_BF16_SIMD")
+			     (V8BF "TARGET_BF16_SIMD")])
+
+;; 16-bit floating-point scalar modes suitable for moving (includes BFmode).
+(define_mode_iterator HFBF [(HF "") (BF "TARGET_BF16_FP")])
+
 ;; Iterators used for fixed-point support.
 (define_mode_iterator FIXED [QQ HQ SQ UQQ UHQ USQ HA SA UHA USA])
 
@@ -485,6 +502,9 @@
 ;; vtbl<n> suffix for NEON vector modes.
 (define_mode_attr VTAB_n [(TI "2") (EI "3") (OI "4")])
 
+;; __fp16 or __bf16 marker for 16-bit float modes.
+(define_mode_attr fporbf [(HF "fp16") (BF "bf16")])
+
 ;; (Opposite) mode to convert to/from for NEON mode conversions.
 (define_mode_attr V_CVTTO [(V2SI "V2SF") (V2SF "V2SI")
                (V4SI "V4SF") (V4SF "V4SI")])
@@ -804,6 +824,7 @@
 		     (V4HF "") (V8HF "_q")
 		     (V2SF "") (V4SF "_q")
 		     (V4HF "") (V8HF "_q")
+		     (V4BF "") (V8BF "_q")
 		     (DI "")   (V2DI "_q")
 		     (DF "")   (V2DF "_q")
 		     (HF "")])
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 6a0ee28efc9aa9f1fba7b5ae031564f40aa095fe..3e7ebd7464d4d42eac6a525b5f1b39eae08c9086 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -34,9 +34,9 @@
   [(set_attr "type" "neon_store1_1reg")])
 
 (define_insn "*neon_mov<mode>"
-  [(set (match_operand:VDX 0 "nonimmediate_operand"
+  [(set (match_operand:VDXMOV 0 "nonimmediate_operand"
 	  "=w,Un,w, w, w,  ?r,?w,?r, ?Us,*r")
-	(match_operand:VDX 1 "general_operand"
+	(match_operand:VDXMOV 1 "general_operand"
 	  " w,w, Dm,Dn,Uni, w, r, Usi,r,*r"))]
   "TARGET_NEON
    && (register_operand (operands[0], <MODE>mode)
@@ -78,7 +78,9 @@
   (set_attr "length" "4,4,4,4,4,4,4,8,8,8")
   (set_attr "arm_pool_range"     "*,*,*,*,1020,*,*,1020,*,*")
   (set_attr "thumb2_pool_range"     "*,*,*,*,1018,*,*,1018,*,*")
-  (set_attr "neg_pool_range" "*,*,*,*,1004,*,*,1004,*,*")])
+  (set_attr "neg_pool_range" "*,*,*,*,1004,*,*,1004,*,*")
+  (set_attr "arch" "*,*,not_bf16_vector,not_bf16_vector,*,*,*,*,*,*")
+])
 
 (define_insn "*neon_mov<mode>"
   [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
@@ -123,7 +125,9 @@
    (set_attr "length" "4,8,4,4,8,8,8,16,8,16")
    (set_attr "arm_pool_range" "*,*,*,*,1020,*,*,*,1020,*")
    (set_attr "thumb2_pool_range" "*,*,*,*,1018,*,*,*,1018,*")
-   (set_attr "neg_pool_range" "*,*,*,*,996,*,*,*,996,*")])
+   (set_attr "neg_pool_range" "*,*,*,*,996,*,*,*,996,*")
+   (set_attr "arch" "*,*,not_bf16_vector,not_bf16_vector,*,*,*,*,*,*")
+])
 
 /* We define these mov expanders to match the standard mov$a optab to prevent
    the mid-end from trying to do a subreg for these modes which is the most
@@ -161,8 +165,8 @@
 })
 
 (define_expand "mov<mode>"
-  [(set (match_operand:VH 0 "s_register_operand")
-	(match_operand:VH 1 "s_register_operand"))]
+  [(set (match_operand:VHFBF 0 "s_register_operand")
+	(match_operand:VHFBF 1 "s_register_operand"))]
   "TARGET_NEON"
 {
   gcc_checking_assert (aligned_operand (operands[0], <MODE>mode));
diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md
index 1979aa6fdb423450a22ecf31f019b8c7ba15c903..caaccf056bd7b2e2525f0eae73a460b47c7abe7f 100644
--- a/gcc/config/arm/vfp.md
+++ b/gcc/config/arm/vfp.md
@@ -363,32 +363,32 @@
    (set_attr "arch"           "t2,any,any,any,a,t2,any,any,any,any,any,any")]
 )
 
-;; HFmode moves
+;; HFmode and BFmode moves
 
-(define_insn "*movhf_vfp_fp16"
-  [(set (match_operand:HF 0 "nonimmediate_operand"
-			  "= r,m,t,r,t,r,t,t,Um,r")
-	(match_operand:HF 1 "general_operand"
-			  "  m,r,t,r,r,t,Dv,Um,t,F"))]
+(define_insn "*mov<mode>_vfp_<mode>16"
+  [(set (match_operand:HFBF 0 "nonimmediate_operand"
+			  "= ?r,?m,t,r,t,r,t, t, Um,r")
+	(match_operand:HFBF 1 "general_operand"
+			  "  m,r,t,r,r,t,Dv,Um,t, F"))]
   "TARGET_32BIT
    && TARGET_VFP_FP16INST
-   && (s_register_operand (operands[0], HFmode)
-       || s_register_operand (operands[1], HFmode))"
+   && (s_register_operand (operands[0], <MODE>mode)
+       || s_register_operand (operands[1], <MODE>mode))"
  {
   switch (which_alternative)
     {
     case 0: /* ARM register from memory.  */
-      return \"ldrh%?\\t%0, %1\\t%@ __fp16\";
+      return \"ldrh%?\\t%0, %1\\t%@ __<fporbf>\";
     case 1: /* Memory from ARM register.  */
-      return \"strh%?\\t%1, %0\\t%@ __fp16\";
+      return \"strh%?\\t%1, %0\\t%@ __<fporbf>\";
     case 2: /* S register from S register.  */
-      return \"vmov\\t%0, %1\t%@ __fp16\";
+      return \"vmov\\t%0, %1\t%@ __<fporbf>\";
     case 3: /* ARM register from ARM register.  */
-      return \"mov%?\\t%0, %1\\t%@ __fp16\";
+      return \"mov%?\\t%0, %1\\t%@ __<fporbf>\";
     case 4: /* S register from ARM register.  */
     case 5: /* ARM register from S register.  */
     case 6: /* S register from immediate.  */
-      return \"vmov.f16\\t%0, %1\t%@ __fp16\";
+      return \"vmov.f16\\t%0, %1\t%@ __<fporbf>\";
     case 7: /* S register from memory.  */
       return \"vld1.16\\t{%z0}, %A1\";
     case 8: /* Memory from S register.  */
@@ -399,7 +399,7 @@
 	rtx ops[4];
 
 	bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (operands[1]),
-			       HFmode);
+			       <MODE>mode);
 	ops[0] = operands[0];
 	ops[1] = GEN_INT (bits);
 	ops[2] = GEN_INT (bits & 0xff00);
@@ -439,17 +439,19 @@
      (const_int 4)
      (if_then_else (match_test "arm_arch_thumb2")
       (const_int 4)
-      (const_int 8))])]
+      (const_int 8))])
+   (set_attr "arch" "*,*,*,*,*,*,not_bf16_scalar,*,*,not_bf16_scalar")
+  ]
 )
 
-(define_insn "*movhf_vfp_neon"
-  [(set (match_operand:HF 0 "nonimmediate_operand" "= t,Um,r,m,t,r,t,r,r")
-	(match_operand:HF 1 "general_operand"	   " Um, t,m,r,t,r,r,t,F"))]
+(define_insn "*mov<mode>_vfp_neon"
+  [(set (match_operand:HFBF 0 "nonimmediate_operand" "= t,Um,r,m,t,r,t,r,r")
+	(match_operand:HFBF 1 "general_operand"	   " Um, t,m,r,t,r,r,t,F"))]
   "TARGET_32BIT
    && TARGET_HARD_FLOAT && TARGET_NEON_FP16
    && !TARGET_VFP_FP16INST
-   && (   s_register_operand (operands[0], HFmode)
-       || s_register_operand (operands[1], HFmode))"
+   && (   s_register_operand (operands[0], <MODE>mode)
+       || s_register_operand (operands[1], <MODE>mode))"
   "*
   switch (which_alternative)
     {
@@ -458,13 +460,13 @@
     case 1:     /* memory from S register */
       return \"vst1.16\\t{%z1}, %A0\";
     case 2:     /* ARM register from memory */
-      return \"ldrh\\t%0, %1\\t%@ __fp16\";
+      return \"ldrh\\t%0, %1\\t%@ __<fporbf>\";
     case 3:     /* memory from ARM register */
-      return \"strh\\t%1, %0\\t%@ __fp16\";
+      return \"strh\\t%1, %0\\t%@ __<fporbf>\";
     case 4:	/* S register from S register */
       return \"vmov.f32\\t%0, %1\";
     case 5:	/* ARM register from ARM register */
-      return \"mov\\t%0, %1\\t%@ __fp16\";
+      return \"mov\\t%0, %1\\t%@ __<fporbf>\";
     case 6:	/* S register from ARM register */
       return \"vmov\\t%0, %1\";
     case 7:	/* ARM register from S register */
@@ -475,7 +477,7 @@
 	rtx ops[4];
 
 	bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (operands[1]),
-			       HFmode);
+			       <MODE>mode);
 	ops[0] = operands[0];
 	ops[1] = GEN_INT (bits);
 	ops[2] = GEN_INT (bits & 0xff00);
@@ -494,7 +496,8 @@
   [(set_attr "conds" "unconditional")
    (set_attr "type" "neon_load1_1reg,neon_store1_1reg,\
                      load_4,store_4,fmov,mov_reg,f_mcr,f_mrc,multiple")
-   (set_attr "length" "4,4,4,4,4,4,4,4,8")]
+   (set_attr "length" "4,4,4,4,4,4,4,4,8")
+   (set_attr "arch" "*,*,*,*,*,*,*,*,not_bf16_scalar")]
 )
 
 ;; FP16 without element load/store instructions.
diff --git a/gcc/testsuite/gcc.target/arm/bfloat16_compile-1.c b/gcc/testsuite/gcc.target/arm/bfloat16_compile-1.c
new file mode 100644
index 0000000000000000000000000000000000000000..3d9ac5097bfeaa06bddd8df37aca1a790323e5b1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/bfloat16_compile-1.c
@@ -0,0 +1,50 @@
+/* { dg-do assemble { target { arm*-*-* } } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon }  */
+/* { dg-additional-options "-O3 --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_neon.h>
+
+/*
+**stacktest1:
+**	...
+**	vst1.16	{d[0-9]+\[[0-9]+\]}, \[r[0-9]+\]
+**	vld1.16	{d[0-9]+\[[0-9]+\]}, \[r[0-9]+\]
+**	...
+**	bx	lr
+*/
+bfloat16_t stacktest1 (bfloat16_t __a)
+{
+  volatile bfloat16_t b = __a;
+  return b;
+}
+
+/*
+**stacktest2:
+**	...
+**	vstr	d[0-9]+, \[sp\]
+**	vldr	d[0-9]+, \[sp\]
+**	...
+**	bx	lr
+*/
+bfloat16x4_t stacktest2 (bfloat16x4_t __a)
+{
+  volatile bfloat16x4_t b = __a;
+  return b;
+}
+
+/*
+**stacktest3:
+**	...
+**	vst1.64	{d[0-9]+-d[0-9]+}, \[sp:[0-9]+\]
+**	vld1.64	{d[0-9]+-d[0-9]+}, \[sp:[0-9]+\]
+**	...
+**	bx	lr
+*/
+bfloat16x8_t stacktest3 (bfloat16x8_t __a)
+{
+  volatile bfloat16x8_t b = __a;
+  return b;
+}
+
diff --git a/gcc/testsuite/gcc.target/arm/bfloat16_compile-2.c b/gcc/testsuite/gcc.target/arm/bfloat16_compile-2.c
new file mode 100644
index 0000000000000000000000000000000000000000..f6bfbc6f217214765fb0b2e3c9e6b5444d3dad92
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/bfloat16_compile-2.c
@@ -0,0 +1,49 @@
+/* { dg-do assemble { target { arm*-*-* } } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-additional-options "-march=armv8.2-a+bf16 -mfloat-abi=softfp -mfpu=neon-fp-armv8 -O3 --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_neon.h>
+
+/*
+**stacktest1:
+**	...
+**	strh	r[0-9]+, \[r[0-9]+\]	@ __bf16
+**	ldrh	r[0-9]+, \[sp, #[0-9]+\]	@ __bf16
+**	...
+**	bx	lr
+*/
+bfloat16_t stacktest1 (bfloat16_t __a)
+{
+  volatile bfloat16_t b = __a;
+  return b;
+}
+
+/*
+**stacktest2:
+**	...
+**	strd	r[0-9]+, \[sp\]
+**	ldrd	r[0-9]+, \[sp\]
+**	...
+**	bx	lr
+*/
+bfloat16x4_t stacktest2 (bfloat16x4_t __a)
+{
+  volatile bfloat16x4_t b = __a;
+  return b;
+}
+
+/*
+**stacktest3:
+**	...
+**	stm	sp, {r[0-9]+-r[0-9]+}
+**	ldmia	sp, {r[0-9]+-r[0-9]+}
+**	...
+**	bx	lr
+*/
+bfloat16x8_t stacktest3 (bfloat16x8_t __a)
+{
+  volatile bfloat16x8_t b = __a;
+  return b;
+}
+
diff --git a/gcc/testsuite/gcc.target/arm/bfloat16_scalar_typecheck.c b/gcc/testsuite/gcc.target/arm/bfloat16_scalar_typecheck.c
new file mode 100644
index 0000000000000000000000000000000000000000..e24c50f12a69ee3ecbc7f6c108fbb7503bc0f9ef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/bfloat16_scalar_typecheck.c
@@ -0,0 +1,83 @@
+/* { dg-do compile { target { arm*-*-* } } } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+/* { dg-options "-march=armv8.6-a+bf16+fp16 -mfloat-abi=hard -mfpu=fp-armv8" } */
+
+#include <arm_neon.h>
+
+bfloat16_t glob;
+float is_a_float;
+int n;
+
+bfloat16_t footest (bfloat16_t scalar0)
+{
+
+  /* Initialisation  */
+
+  bfloat16_t scalar1 = 0.1; /* { dg-error "incompatible types when assigning to type 'bfloat16_t' from type 'double'" "" {target *-*-*} } */
+  bfloat16_t scalar2 = 0;   /* { dg-error "incompatible types when assigning to type 'bfloat16_t' from type 'int'" "" {target *-*-*} } */
+  bfloat16_t scalar3 = {}; /* { dg-error "empty scalar initializer" "" {target *-*-*} } */
+
+  float16_t initi_a = scalar1; /* { dg-error "incompatible types when assigning to type 'float16_t' from type 'bfloat16_t'" "" {target *-*-*} } */
+  float16_t initi_b = { scalar1 }; /* { dg-error "incompatible types when assigning to type 'float16_t' from type 'bfloat16_t'" "" {target *-*-*} } */
+
+  /* Compound literals.  */
+
+  (bfloat16_t) {}; /* { dg-error "empty scalar initializer" "" {target *-*-*} } */
+  (bfloat16_t) { scalar1 };
+
+  (int) { scalar1 }; /* { dg-error "incompatible types when assigning to type 'int' from type 'bfloat16_t'" "" {target *-*-*} } */
+
+  /* Casting.  */
+
+  (void) scalar1;
+  (bfloat16_t) scalar1;
+
+  /* Arrays and Structs.  */
+
+  typedef bfloat16_t array_type[2];
+  extern bfloat16_t extern_array[];
+
+  bfloat16_t array[2];
+  bfloat16_t zero_length_array[0];
+  bfloat16_t empty_init_array[] = {};
+  typedef bfloat16_t vla_type[n];
+
+  struct struct1 {
+    bfloat16_t a;
+  };
+
+  union union1 {
+    bfloat16_t a;
+  };
+
+  /* Assignments.  */
+
+  n = scalar1; /* { dg-error "incompatible types when assigning to type 'int' from type 'bfloat16_t'" "" {target *-*-*} } */
+  is_a_float = scalar1; /* { dg-error "incompatible types when assigning to type 'float' from type 'bfloat16_t'" "" {target *-*-*} } */
+  scalar1 = 0; /* { dg-error "incompatible types when assigning to type 'bfloat16_t' from type 'int'" "" {target *-*-*} } */
+  scalar1 = 0.1; /* { dg-error "incompatible types when assigning to type 'bfloat16_t' from type 'double'" "" {target *-*-*} } */
+  scalar1 = scalar2;
+
+  /* Addressing and dereferencing.  */
+
+  bfloat16_t *bfloat_ptr = &scalar1;
+  scalar1 = *bfloat_ptr;
+
+  /* Pointer assignment.  */
+
+  bfloat16_t *bfloat_ptr2 = bfloat_ptr;
+
+  /* Single-operand operation.  */
+
+  scalar1 = !glob; /* { dg-error "operation not permitted on type 'bfloat16_t'" "" {target *-*-*} } */
+
+  /* Double-operand operations.  */
+
+  scalar1 = glob + *bfloat_ptr; /* { dg-error "operation not permitted on types 'bfloat16_t', 'bfloat16_t'" "" {target *-*-*} } */
+  scalar1 = glob + 0.1; /* { dg-error "operation not permitted on types 'bfloat16_t', 'double'" "" {target *-*-*} } */
+  scalar1 = glob + 0; /* { dg-error "operation not permitted on types 'bfloat16_t', 'int'" "" {target *-*-*} } */
+  scalar1 = glob + is_a_float; /* { dg-error "operation not permitted on types 'bfloat16_t', 'float'" "" {target *-*-*} } */
+
+  return scalar0;
+}
+
diff --git a/gcc/testsuite/gcc.target/arm/bfloat16_vector_typecheck1.c b/gcc/testsuite/gcc.target/arm/bfloat16_vector_typecheck1.c
new file mode 100644
index 0000000000000000000000000000000000000000..923d1c467a7942680e85deda08bc9229b1363341
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/bfloat16_vector_typecheck1.c
@@ -0,0 +1,85 @@
+/* { dg-do compile { target { arm*-*-* } } } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+/* { dg-options "-march=armv8.6-a+bf16+fp16 -mfloat-abi=hard -mfpu=neon-fp-armv8" } */
+
+#include <arm_neon.h>
+
+bfloat16x4_t glob;
+float is_a_float;
+float16x4_t is_a_floatx4;
+int16x4_t intvec;
+int n;
+short n2;
+bfloat16_t scalar0, scalar1, scalar2, scalar3;
+
+bfloat16x4_t footest (bfloat16x4_t vector0)
+{
+  /* Initialisation  */
+
+  bfloat16x4_t vector1 = { 0.0, n, n2, is_a_float };
+  /* { dg-error "incompatible types when assigning to type '__bf16' from type 'double'" "" {target *-*-*} 19 } */
+  /* { dg-error "incompatible types when assigning to type '__bf16' from type 'int'" "" {target *-*-*} 19 } */
+  /* { dg-error "incompatible types when assigning to type '__bf16' from type 'short int'" "" {target *-*-*} 19 } */
+  /* { dg-error "incompatible types when assigning to type '__bf16' from type 'float'" "" {target *-*-*} 19 } */
+
+  bfloat16x4_t vector2 = {};
+
+  (bfloat16x4_t) {};
+
+  bfloat16x4_t vector4 = { scalar0, scalar1, scalar2, scalar3 };
+
+  float16x4_t initi_a = vector1; /* { dg-error "incompatible types when initializing type 'float16x4_t' using type 'bfloat16x4_t'" "" {target *-*-*} } */
+
+  /* Casting.  */
+
+  (void) vector1;
+  (bfloat16x4_t) vector1;
+
+  /* Arrays and Structs.  */
+
+  typedef bfloat16x4_t array_type[2];
+  extern bfloat16x4_t extern_array[];
+
+  bfloat16x4_t array[2];
+  bfloat16x4_t zero_length_array[0];
+  bfloat16x4_t empty_init_array[] = {};
+  typedef bfloat16x4_t some_other_type[n];
+
+  struct struct1 {
+    bfloat16x4_t a;
+  };
+
+  union union1 {
+    bfloat16x4_t a;
+  };
+
+  /* Assignments.  */
+
+  intvec = vector1; /* { dg-error "incompatible types when assigning to type 'int16x4_t' from type 'bfloat16x4_t'" "" {target *-*-*} } */
+  is_a_floatx4 = vector1; /* { dg-error "incompatible types when assigning to type 'float16x4_t' from type 'bfloat16x4_t'" "" {target *-*-*} } */
+  vector1 = 0; /* { dg-error "incompatible types when assigning to type 'bfloat16x4_t' from type 'int'" "" {target *-*-*} } */
+  vector1 = 0.1; /* { dg-error "incompatible types when assigning to type 'bfloat16x4_t' from type 'double'" "" {target *-*-*} } */
+  vector1 = vector2;
+
+  /* Addressing and dereferencing.  */
+
+  bfloat16x4_t *bfloat_ptr = &vector1;
+  vector1 = *bfloat_ptr;
+
+  /* Pointer assignment.  */
+
+  bfloat16x4_t *bfloat_ptr2 = bfloat_ptr;
+
+  /* Single-operand operation.  */
+
+  vector1 = !glob; /* { dg-error "operation not permitted on type 'bfloat16x4_t'" "" {target *-*-*} } */
+
+  /* Double-operand operations.  */
+
+  vector1 = glob + *bfloat_ptr; /* { dg-error "operation not permitted on types 'bfloat16x4_t', 'bfloat16x4_t'" "" {target *-*-*} } */
+  vector1 = glob + 0.1; /* { dg-error "operation not permitted on types 'bfloat16x4_t', 'double'" "" {target *-*-*} } */
+  vector1 = glob + 0; /* { dg-error "operation not permitted on types 'bfloat16x4_t', 'int'" "" {target *-*-*} } */
+  vector1 = glob + is_a_floatx4; /* { dg-error "operation not permitted on types 'bfloat16x4_t', 'float16x4_t'" "" {target *-*-*} } */
+
+  return vector0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/bfloat16_vector_typecheck2.c b/gcc/testsuite/gcc.target/arm/bfloat16_vector_typecheck2.c
new file mode 100644
index 0000000000000000000000000000000000000000..a8fbdec28fe6a735c613f64ef7cd6c71c5f5671e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/bfloat16_vector_typecheck2.c
@@ -0,0 +1,95 @@
+/* { dg-do compile { target { arm*-*-* } } } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+/* { dg-options "-march=armv8.6-a+bf16+fp16 -mfloat-abi=hard -mfpu=neon-fp-armv8" } */
+
+#include <arm_neon.h>
+
+bfloat16_t scalar0, scalar1, scalar2, scalar3;
+bfloat16x8_t glob;
+
+float16_t is_a_half_float;
+float16x8_t is_a_floatx8;
+int16x8_t intvec;
+
+float is_a_float;
+int is_an_int;
+unsigned int is_a_uint;
+short is_a_short_int;
+long is_long;
+long double is_long_double;
+
+bfloat16x8_t footest (bfloat16x8_t vector0)
+{
+  /* Initialisation  */
+
+  bfloat16x8_t vector1 = { 0.0, is_an_int, is_a_short_int, is_a_float, is_a_half_float, is_long, is_a_uint, is_long_double };
+  /* { dg-error "incompatible types when assigning to type '__bf16' from type 'double'" "" {target *-*-*} 25 } */
+  /* { dg-error "incompatible types when assigning to type '__bf16' from type 'int'" "" {target *-*-*} 25 } */
+  /* { dg-error "incompatible types when assigning to type '__bf16' from type 'short int'" "" {target *-*-*} 25 } */
+  /* { dg-error "incompatible types when assigning to type '__bf16' from type 'float'" "" {target *-*-*} 25 } */
+  /* { dg-error "incompatible types when assigning to type '__bf16' from type 'float16_t'" "" {target *-*-*} 25 } */
+  /* { dg-error "incompatible types when assigning to type '__bf16' from type 'long int'" "" {target *-*-*} 25 } */
+  /* { dg-error "incompatible types when assigning to type '__bf16' from type 'unsigned int'" "" {target *-*-*} 25 } */
+  /* { dg-error "incompatible types when assigning to type '__bf16' from type 'long double'" "" {target *-*-*} 25 } */
+
+  bfloat16x8_t vector2 = {};
+
+  (bfloat16x8_t) {};
+
+  bfloat16x8_t vector4 = { scalar0, scalar1, scalar2, scalar3, scalar0, scalar1, scalar2, scalar3 };
+
+  float16x8_t initi_a = vector1; /* { dg-error "incompatible types when initializing type 'float16x8_t' using type 'bfloat16x8_t'" "" {target *-*-*} } */
+
+  /* Casting.  */
+
+  (void) vector1;
+  (bfloat16x8_t) vector1;
+
+  /* Arrays and Structs.  */
+
+  typedef bfloat16x8_t array_type[2];
+  extern bfloat16x8_t extern_array[];
+
+  bfloat16x8_t array[2];
+  bfloat16x8_t zero_length_array[0];
+  bfloat16x8_t empty_init_array[] = {};
+  typedef bfloat16x8_t some_other_type[is_an_int];
+
+  struct struct1 {
+    bfloat16x8_t a;
+  };
+
+  union union1 {
+    bfloat16x8_t a;
+  };
+
+  /* Assignments.  */
+
+  intvec = vector1; /* { dg-error "incompatible types when assigning to type 'int16x8_t' from type 'bfloat16x8_t'" "" {target *-*-*} } */
+  is_a_floatx8 = vector1; /* { dg-error "incompatible types when assigning to type 'float16x8_t' from type 'bfloat16x8_t'" "" {target *-*-*} } */
+  vector1 = 0; /* { dg-error "incompatible types when assigning to type 'bfloat16x8_t' from type 'int'" "" {target *-*-*} } */
+  vector1 = 0.1; /* { dg-error "incompatible types when assigning to type 'bfloat16x8_t' from type 'double'" "" {target *-*-*} } */
+  vector1 = vector2;
+
+  /* Addressing and dereferencing.  */
+
+  bfloat16x8_t *bfloat_ptr = &vector1;
+  vector1 = *bfloat_ptr;
+
+  /* Pointer assignment.  */
+
+  bfloat16x8_t *bfloat_ptr2 = bfloat_ptr;
+
+  /* Single-operand operation.  */
+
+  vector1 = !glob; /* { dg-error "operation not permitted on type 'bfloat16x8_t'" "" {target *-*-*} } */
+
+  /* Double-operand operations.  */
+
+  vector1 = glob + *bfloat_ptr; /* { dg-error "operation not permitted on types 'bfloat16x8_t', 'bfloat16x8_t'" "" {target *-*-*} } */
+  vector1 = glob + 0.1; /* { dg-error "operation not permitted on types 'bfloat16x8_t', 'double'" "" {target *-*-*} } */
+  vector1 = glob + 0; /* { dg-error "operation not permitted on types 'bfloat16x8_t', 'int'" "" {target *-*-*} } */
+  vector1 = glob + is_a_floatx8; /* { dg-error "operation not permitted on types 'bfloat16x8_t', 'float16x8_t'" "" {target *-*-*} } */
+
+  return vector0;
+}

[GCC][PATCH][ARM] Add Bfloat16_t scalar type, vector types and machine modes to ARM back-end

Reply via email to