Hi all,

Second patch of the serie here adding vst1_lane_bf16, vst1q_lane_bf16
bfloat16 related neon intrinsics.

Please see refer to:
ACLE <https://developer.arm.com/docs/101028/latest>
ISA  <https://developer.arm.com/docs/ddi0596/latest>

Regtested and bootstrapped.

Okay for trunk?

  Andrea
  
>From 4b66af535f7c08c58633096210f1aef945de36f5 Mon Sep 17 00:00:00 2001
From: Andrea Corallo <andrea.cora...@arm.com>
Date: Fri, 23 Oct 2020 14:21:56 +0200
Subject: [PATCH] arm: Add vst1_lane_bf16 + vstq_lane_bf16 intrinsics

gcc/ChangeLog

2020-10-23  Andrea Corallo  <andrea.cora...@arm.com>

        * config/arm/arm-builtins.c (VAR14): Define macro.
        * config/arm/arm_neon.h (vst1_lane_bf16, vst1q_lane_bf16): Add
        intrinsics.
        * config/arm/arm_neon_builtins.def (STORE1LANE): Add v4bf, v8bf.

gcc/testsuite/ChangeLog

2020-10-23  Andrea Corallo  <andrea.cora...@arm.com>

        * gcc.target/arm/simd/vst1_lane_bf16_1.c: New testcase.
        * gcc.target/arm/simd/vstq1_lane_bf16_indices_1.c: Likewise.
        * gcc.target/arm/simd/vst1_lane_bf16_indices_1.c: Likewise.
---
 gcc/config/arm/arm-builtins.c                 |  3 +++
 gcc/config/arm/arm_neon.h                     | 14 +++++++++++++
 gcc/config/arm/arm_neon_builtins.def          |  4 ++--
 .../gcc.target/arm/simd/vst1_lane_bf16_1.c    | 21 +++++++++++++++++++
 .../arm/simd/vst1_lane_bf16_indices_1.c       | 15 +++++++++++++
 .../arm/simd/vstq1_lane_bf16_indices_1.c      | 15 +++++++++++++
 6 files changed, 70 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arm/simd/vst1_lane_bf16_1.c
 create mode 100644 gcc/testsuite/gcc.target/arm/simd/vst1_lane_bf16_indices_1.c
 create mode 100644 
gcc/testsuite/gcc.target/arm/simd/vstq1_lane_bf16_indices_1.c

diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
index 33e8015b140..6dc5df93216 100644
--- a/gcc/config/arm/arm-builtins.c
+++ b/gcc/config/arm/arm-builtins.c
@@ -946,6 +946,9 @@ typedef struct {
 #define VAR13(T, N, A, B, C, D, E, F, G, H, I, J, K, L, M) \
   VAR12 (T, N, A, B, C, D, E, F, G, H, I, J, K, L) \
   VAR1 (T, N, M)
+#define VAR14(T, N, A, B, C, D, E, F, G, H, I, J, K, L, M, O) \
+  VAR13 (T, N, A, B, C, D, E, F, G, H, I, J, K, L, M) \
+  VAR1 (T, N, O)
 
 /* The builtin data can be found in arm_neon_builtins.def, arm_vfp_builtins.def
    and arm_acle_builtins.def.  The entries in arm_neon_builtins.def require
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index fcd8020425e..432d77fb272 100644
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -19679,6 +19679,20 @@ vld1q_lane_bf16 (const bfloat16_t * __a, bfloat16x8_t 
__b, const int __c)
   return __builtin_neon_vld1_lanev8bf (__a, __b, __c);
 }
 
+__extension__ extern __inline void
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vst1_lane_bf16 (bfloat16_t * __a, bfloat16x4_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev4bf (__a, __b, __c);
+}
+
+__extension__ extern __inline void
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vst1q_lane_bf16 (bfloat16_t * __a, bfloat16x8_t __b, const int __c)
+{
+  __builtin_neon_vst1_lanev8bf (__a, __b, __c);
+}
+
 #pragma GCC pop_options
 
 #ifdef __cplusplus
diff --git a/gcc/config/arm/arm_neon_builtins.def 
b/gcc/config/arm/arm_neon_builtins.def
index 7cdcd251243..3db7fb9f1f3 100644
--- a/gcc/config/arm/arm_neon_builtins.def
+++ b/gcc/config/arm/arm_neon_builtins.def
@@ -318,8 +318,8 @@ VAR10 (LOAD1, vld1_dup,
        v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
 VAR12 (STORE1, vst1,
        v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
-VAR12 (STORE1LANE, vst1_lane,
-       v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
+VAR14 (STORE1LANE, vst1_lane,
+       v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di, 
v4bf, v8bf)
 VAR13 (LOAD1, vld2,
        v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v4bf, 
v8bf)
 VAR9 (LOAD1LANE, vld2_lane,
diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1_lane_bf16_1.c 
b/gcc/testsuite/gcc.target/arm/simd/vst1_lane_bf16_1.c
new file mode 100644
index 00000000000..e018ec6592f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/vst1_lane_bf16_1.c
@@ -0,0 +1,21 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+/* { dg-additional-options "-O3 --save-temps" } */
+
+#include "arm_neon.h"
+
+void
+test_vst1_lane_bf16 (bfloat16_t *a, bfloat16x4_t b)
+{
+  vst1_lane_bf16 (a, b, 1);
+}
+
+void
+test_vst1q_lane_bf16 (bfloat16_t *a, bfloat16x8_t b)
+{
+  vst1q_lane_bf16 (a, b, 2);
+}
+
+/* { dg-final { scan-assembler "vst1.16\t{d0\\\[1\\\]}, \\\[r0\\\]" } } */
+/* { dg-final { scan-assembler "vst1.16\t{d0\\\[2\\\]}, \\\[r0\\\]" } } */
diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1_lane_bf16_indices_1.c 
b/gcc/testsuite/gcc.target/arm/simd/vst1_lane_bf16_indices_1.c
new file mode 100644
index 00000000000..39870dc054c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/vst1_lane_bf16_indices_1.c
@@ -0,0 +1,15 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+
+#include "arm_neon.h"
+
+void
+test_vst1_lane_bf16 (bfloat16_t *a, bfloat16x4_t b)
+{
+  vst1_lane_bf16 (a, b, -1);
+  vst1_lane_bf16 (a, b, 4);
+}
+
+/* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */
+/* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */
diff --git a/gcc/testsuite/gcc.target/arm/simd/vstq1_lane_bf16_indices_1.c 
b/gcc/testsuite/gcc.target/arm/simd/vstq1_lane_bf16_indices_1.c
new file mode 100644
index 00000000000..f31bd120fc2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/vstq1_lane_bf16_indices_1.c
@@ -0,0 +1,15 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+
+#include "arm_neon.h"
+
+void
+test_vstq1_lane_bf16 (bfloat16_t *a, bfloat16x8_t b)
+{
+  vst1q_lane_bf16 (a, b, -1);
+  vst1q_lane_bf16 (a, b, 8);
+}
+
+/* { dg-error "lane -1 out of range 0 - 7" "" { target *-*-* } 0 } */
+/* { dg-error "lane 8 out of range 0 - 7" "" { target *-*-* } 0 } */
-- 
2.20.1

Reply via email to