[backport gcc-10][AArch64] ACLE bf16 get

2020-12-10 Thread Dennis Zhang via Gcc-patches
Hi all,

This patch backports the commit 3553c658533e430b232997bdfd97faf6606fb102.
The original is approved at 
https://gcc.gnu.org/pipermail/gcc-patches/2020-November/557871.html
There is a change to remove FPCR-reading flag for builtin declaration since 
it's not supported in gcc-10.

Another change is to remove a test (bf16_get-be.c) that fails compiling on 
aarch64-none-linux-gnu in the original patch.
This is reported at 
https://gcc.gnu.org/pipermail/gcc-patches/2020-November/558195.html
The failure happens for several bf16 big-endian tests so the bug would be fixed 
in a separate patch.
And the test should be added after the bug is fixed.

Is it OK to backport?

Cheers
Dennisdiff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index ba2bda26dcdd4947dc724851433451433d378724..05726db1f6137f9ab29fcdd51f804199e24bbfcf 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -718,6 +718,10 @@
   VAR1 (QUADOP_LANE, bfmlalb_lane_q, 0, v4sf)
   VAR1 (QUADOP_LANE, bfmlalt_lane_q, 0, v4sf)
 
+  /* Implemented by aarch64_vget_lo/hi_halfv8bf.  */
+  VAR1 (UNOP, vget_lo_half, 0, v8bf)
+  VAR1 (UNOP, vget_hi_half, 0, v8bf)
+
   /* Implemented by aarch64_simd_mmlav16qi.  */
   VAR1 (TERNOP, simd_smmla, 0, v16qi)
   VAR1 (TERNOPU, simd_ummla, 0, v16qi)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 9f0e2bd1e6ff5246f84e919402c687687a84beb8..43ac3cd40fe8379567b7a60772f360d37818e8e9 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -7159,6 +7159,27 @@
   [(set_attr "type" "neon_dot")]
 )
 
+;; vget_low/high_bf16
+(define_expand "aarch64_vget_lo_halfv8bf"
+  [(match_operand:V4BF 0 "register_operand")
+   (match_operand:V8BF 1 "register_operand")]
+  "TARGET_BF16_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, false);
+  emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
+  DONE;
+})
+
+(define_expand "aarch64_vget_hi_halfv8bf"
+  [(match_operand:V4BF 0 "register_operand")
+   (match_operand:V8BF 1 "register_operand")]
+  "TARGET_BF16_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, true);
+  emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
+  DONE;
+})
+
 ;; bfmmla
 (define_insn "aarch64_bfmmlaqv4sf"
   [(set (match_operand:V4SF 0 "register_operand" "=w")
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 95bfa5ebba21b739ee3c84e3971337646f8881d4..0fd78a6fd076f788d2618c492a026246e61e438c 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -35680,6 +35680,20 @@ vbfmlaltq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b,
   return __builtin_aarch64_bfmlalt_lane_qv4sf (__r, __a, __b, __index);
 }
 
+__extension__ extern __inline bfloat16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vget_low_bf16 (bfloat16x8_t __a)
+{
+  return __builtin_aarch64_vget_lo_halfv8bf (__a);
+}
+
+__extension__ extern __inline bfloat16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vget_high_bf16 (bfloat16x8_t __a)
+{
+  return __builtin_aarch64_vget_hi_halfv8bf (__a);
+}
+
 __extension__ extern __inline bfloat16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvt_bf16_f32 (float32x4_t __a)
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_get.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_get.c
new file mode 100644
index ..2193753ffbb6246aa16eb5033559b21266a556a6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_get.c
@@ -0,0 +1,27 @@
+/* { dg-do assemble { target { aarch64*-*-* } } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+/* { dg-additional-options "-save-temps" } */
+/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+
+#include 
+
+/*
+**test_vget_low_bf16:
+** ret
+*/
+bfloat16x4_t test_vget_low_bf16 (bfloat16x8_t a)
+{
+  return vget_low_bf16 (a);
+}
+
+/*
+**test_vget_high_bf16:
+** dup	d0, v0.d\[1\]
+** ret
+*/
+bfloat16x4_t test_vget_high_bf16 (bfloat16x8_t a)
+{
+  return vget_high_bf16 (a);
+}


RE: [backport gcc-10][AArch64] ACLE bf16 get

2020-12-11 Thread Kyrylo Tkachov via Gcc-patches



> -Original Message-
> From: Dennis Zhang 
> Sent: 10 December 2020 14:35
> To: gcc-patches@gcc.gnu.org
> Cc: nd ; Richard Earnshaw ;
> Marcus Shawcroft ; Kyrylo Tkachov
> ; Richard Sandiford
> 
> Subject: [backport gcc-10][AArch64] ACLE bf16 get
> 
> Hi all,
> 
> This patch backports the commit
> 3553c658533e430b232997bdfd97faf6606fb102.
> The original is approved at https://gcc.gnu.org/pipermail/gcc-patches/2020-
> November/557871.html
> There is a change to remove FPCR-reading flag for builtin declaration since
> it's not supported in gcc-10.
> 
> Another change is to remove a test (bf16_get-be.c) that fails compiling on
> aarch64-none-linux-gnu in the original patch.
> This is reported at https://gcc.gnu.org/pipermail/gcc-patches/2020-
> November/558195.html
> The failure happens for several bf16 big-endian tests so the bug would be
> fixed in a separate patch.
> And the test should be added after the bug is fixed.
> 
> Is it OK to backport?

But do the tests added here work for big-endian?
Ok if they do.
Thanks,
Kyrill

> 
> Cheers
> Dennis


Re: [backport gcc-10][AArch64] ACLE bf16 get

2020-12-11 Thread Dennis Zhang via Gcc-patches
Hi Kyrylo,

> 
> From: Kyrylo Tkachov 
> Sent: Friday, December 11, 2020 11:58 AM
> To: Dennis Zhang; gcc-patches@gcc.gnu.org
> Cc: nd; Richard Earnshaw; Marcus Shawcroft; Richard Sandiford
> Subject: RE: [backport gcc-10][AArch64] ACLE bf16 get
> 
> > -Original Message-
> > From: Dennis Zhang 
> > Sent: 10 December 2020 14:35
> > To: gcc-patches@gcc.gnu.org
> > Cc: nd ; Richard Earnshaw ;
> > Marcus Shawcroft ; Kyrylo Tkachov
> > ; Richard Sandiford
> > 
> > Subject: [backport gcc-10][AArch64] ACLE bf16 get
> >
> > Hi all,
> >
> > This patch backports the commit
> > 3553c658533e430b232997bdfd97faf6606fb102.
> > The original is approved at https://gcc.gnu.org/pipermail/gcc-patches/2020-
> > November/557871.html
> > There is a change to remove FPCR-reading flag for builtin declaration since
> > it's not supported in gcc-10.
> >
> > Another change is to remove a test (bf16_get-be.c) that fails compiling on
> > aarch64-none-linux-gnu in the original patch.
> > This is reported at https://gcc.gnu.org/pipermail/gcc-patches/2020-
> > November/558195.html
> > The failure happens for several bf16 big-endian tests so the bug would be
> > fixed in a separate patch.
> > And the test should be added after the bug is fixed.
> >
> > Is it OK to backport?
> 
> But do the tests added here work for big-endian?
> Ok if they do.
> Thanks,
> Kyrill

Thanks for asking. The added test (bf16_get.c) works for both 
aarch64-none-linux-gnu and aarch64_be-none-linux-gnu.
The patch is commited as c25f7eac6555d67523f0520c7e93bbc398d0da84.

Cheers
Dennis