Re: [GCC][PATCH][ARM] Add vreinterpret, vdup, vget and vset bfloat16 intrinsic
Hi, On Thu, 27 Feb 2020 at 18:03, Kyrill Tkachov wrote: > > Hi Mihail, > > On 2/27/20 2:44 PM, Mihail Ionescu wrote: > > Hi Kyrill, > > > > On 02/27/2020 11:09 AM, Kyrill Tkachov wrote: > >> Hi Mihail, > >> > >> On 2/27/20 10:27 AM, Mihail Ionescu wrote: > >>> Hi, > >>> > >>> This patch adds support for the bf16 vector create, get, set, > >>> duplicate and reinterpret intrinsics. > >>> ACLE documents are at https://developer.arm.com/docs/101028/latest > >>> ISA documents are at https://developer.arm.com/docs/ddi0596/latest > >>> > >>> Regression tested on arm-none-eabi. > >>> > >>> > >>> gcc/ChangeLog: > >>> > >>> 2020-02-27 Mihail Ionescu > >>> > >>> * (__ARM_NUM_LANES, __arm_lane, __arm_lane_q): Move to the > >>> beginning of the file. > >>> (vcreate_bf16, vcombine_bf16): New. > >>> (vdup_n_bf16, vdupq_n_bf16): New. > >>> (vdup_lane_bf16, vdup_laneq_bf16): New. > >>> (vdupq_lane_bf16, vdupq_laneq_bf16): New. > >>> (vduph_lane_bf16, vduph_laneq_bf16): New. > >>> (vset_lane_bf16, vsetq_lane_bf16): New. > >>> (vget_lane_bf16, vgetq_lane_bf16): New. > >>> (vget_high_bf16, vget_low_bf16): New. > >>> (vreinterpret_bf16_u8, vreinterpretq_bf16_u8): New. > >>> (vreinterpret_bf16_u16, vreinterpretq_bf16_u16): New. > >>> (vreinterpret_bf16_u32, vreinterpretq_bf16_u32): New. > >>> (vreinterpret_bf16_u64, vreinterpretq_bf16_u64): New. > >>> (vreinterpret_bf16_s8, vreinterpretq_bf16_s8): New. > >>> (vreinterpret_bf16_s16, vreinterpretq_bf16_s16): New. > >>> (vreinterpret_bf16_s32, vreinterpretq_bf16_s32): New. > >>> (vreinterpret_bf16_s64, vreinterpretq_bf16_s64): New. > >>> (vreinterpret_bf16_p8, vreinterpretq_bf16_p8): New. > >>> (vreinterpret_bf16_p16, vreinterpretq_bf16_p16): New. > >>> (vreinterpret_bf16_p64, vreinterpretq_bf16_p64): New. > >>> (vreinterpret_bf16_f32, vreinterpretq_bf16_f32): New. > >>> (vreinterpret_bf16_f64, vreinterpretq_bf16_f64): New. > >>> (vreinterpretq_bf16_p128): New. > >>> (vreinterpret_s8_bf16, vreinterpretq_s8_bf16): New. > >>> (vreinterpret_s16_bf16, vreinterpretq_s16_bf16): New. > >>> (vreinterpret_s32_bf16, vreinterpretq_s32_bf16): New. > >>> (vreinterpret_s64_bf16, vreinterpretq_s64_bf16): New. > >>> (vreinterpret_u8_bf16, vreinterpretq_u8_bf16): New. > >>> (vreinterpret_u16_bf16, vreinterpretq_u16_bf16): New. > >>> (vreinterpret_u32_bf16, vreinterpretq_u32_bf16): New. > >>> (vreinterpret_u64_bf16, vreinterpretq_u64_bf16): New. > >>> (vreinterpret_p8_bf16, vreinterpretq_p8_bf16): New. > >>> (vreinterpret_p16_bf16, vreinterpretq_p16_bf16): New. > >>> (vreinterpret_p64_bf16, vreinterpretq_p64_bf16): New. > >>> (vreinterpret_f32_bf16, vreinterpretq_f32_bf16): New. > >>> (vreinterpretq_p128_bf16): New. > >>> * config/arm/arm_neon_builtins.def (VDX): Add V4BF. > >>> (V_elem): Likewise. > >>> (V_elem_l): Likewise. > >>> (VD_LANE): Likewise. > >>> (VQX) Add V8BF. > >>> (V_DOUBLE): Likewise. > >>> (VDQX): Add V4BF and V8BF. > >>> (V_two_elem, V_three_elem, V_four_elem): Likewise. > >>> (V_reg): Likewise. > >>> (V_HALF): Likewise. > >>> (V_double_vector_mode): Likewise. > >>> (V_cmp_result): Likewise. > >>> (V_uf_sclr): Likewise. > >>> (V_sz_elem): Likewise. > >>> (Is_d_reg): Likewise. > >>> (V_mode_nunits): Likewise. > >>> * config/arm/neon.md (neon_vdup_lane): Enable for BFloat. > >>> > >>> gcc/testsuite/ChangeLog: > >>> > >>> 2020-02-27 Mihail Ionescu > >>> > >>> * gcc.target/arm/bf16_dup.c: New test. > >>> * gcc.target/arm/bf16_reinterpret.c: Likewise. > >>> > >>> Is it ok for trunk? > >> > >> This looks mostly ok with a few nits... > >> > >> > >>> > >>> Regards, > >>> Mihail > >>> > >>> > >>> ### Attachment also inlined for ease of reply > >>> ### > >>> > >>> > >>> diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h > >>> index > >>> 09297831cdcd6e695843c17b7724c114f3a129fe..5901a8f1fb84f204ae95f0ccc97bf5ae944c482c > >>> 100644 > >>> --- a/gcc/config/arm/arm_neon.h > >>> +++ b/gcc/config/arm/arm_neon.h > >>> @@ -42,6 +42,15 @@ extern "C" { > >>> #include > >>> #include > >>> > >>> +#ifdef __ARM_BIG_ENDIAN > >>> +#define __ARM_NUM_LANES(__v) (sizeof (__v) / sizeof (__v[0])) > >>> +#define __arm_lane(__vec, __idx) (__idx ^ (__ARM_NUM_LANES(__vec) - > >>> 1)) > >>> +#define __arm_laneq(__vec, __idx) (__idx ^ > >>> (__ARM_NUM_LANES(__vec)/2 - 1)) > >>> +#else > >>> +#define __arm_lane(__vec, __idx) __idx > >>> +#define __arm_laneq(__vec, __idx) __idx > >>> +#endif > >>> + > >>> typedef __simd64_int8_t int8x8_t; > >>> typedef __simd64_int16_t int16x4_t; > >>> typedef __simd64_int32_t int32x2_
Re: [GCC][PATCH][ARM] Add vreinterpret, vdup, vget and vset bfloat16 intrinsic
Hi Mihail, On 2/27/20 2:44 PM, Mihail Ionescu wrote: Hi Kyrill, On 02/27/2020 11:09 AM, Kyrill Tkachov wrote: Hi Mihail, On 2/27/20 10:27 AM, Mihail Ionescu wrote: Hi, This patch adds support for the bf16 vector create, get, set, duplicate and reinterpret intrinsics. ACLE documents are at https://developer.arm.com/docs/101028/latest ISA documents are at https://developer.arm.com/docs/ddi0596/latest Regression tested on arm-none-eabi. gcc/ChangeLog: 2020-02-27 Mihail Ionescu * (__ARM_NUM_LANES, __arm_lane, __arm_lane_q): Move to the beginning of the file. (vcreate_bf16, vcombine_bf16): New. (vdup_n_bf16, vdupq_n_bf16): New. (vdup_lane_bf16, vdup_laneq_bf16): New. (vdupq_lane_bf16, vdupq_laneq_bf16): New. (vduph_lane_bf16, vduph_laneq_bf16): New. (vset_lane_bf16, vsetq_lane_bf16): New. (vget_lane_bf16, vgetq_lane_bf16): New. (vget_high_bf16, vget_low_bf16): New. (vreinterpret_bf16_u8, vreinterpretq_bf16_u8): New. (vreinterpret_bf16_u16, vreinterpretq_bf16_u16): New. (vreinterpret_bf16_u32, vreinterpretq_bf16_u32): New. (vreinterpret_bf16_u64, vreinterpretq_bf16_u64): New. (vreinterpret_bf16_s8, vreinterpretq_bf16_s8): New. (vreinterpret_bf16_s16, vreinterpretq_bf16_s16): New. (vreinterpret_bf16_s32, vreinterpretq_bf16_s32): New. (vreinterpret_bf16_s64, vreinterpretq_bf16_s64): New. (vreinterpret_bf16_p8, vreinterpretq_bf16_p8): New. (vreinterpret_bf16_p16, vreinterpretq_bf16_p16): New. (vreinterpret_bf16_p64, vreinterpretq_bf16_p64): New. (vreinterpret_bf16_f32, vreinterpretq_bf16_f32): New. (vreinterpret_bf16_f64, vreinterpretq_bf16_f64): New. (vreinterpretq_bf16_p128): New. (vreinterpret_s8_bf16, vreinterpretq_s8_bf16): New. (vreinterpret_s16_bf16, vreinterpretq_s16_bf16): New. (vreinterpret_s32_bf16, vreinterpretq_s32_bf16): New. (vreinterpret_s64_bf16, vreinterpretq_s64_bf16): New. (vreinterpret_u8_bf16, vreinterpretq_u8_bf16): New. (vreinterpret_u16_bf16, vreinterpretq_u16_bf16): New. (vreinterpret_u32_bf16, vreinterpretq_u32_bf16): New. (vreinterpret_u64_bf16, vreinterpretq_u64_bf16): New. (vreinterpret_p8_bf16, vreinterpretq_p8_bf16): New. (vreinterpret_p16_bf16, vreinterpretq_p16_bf16): New. (vreinterpret_p64_bf16, vreinterpretq_p64_bf16): New. (vreinterpret_f32_bf16, vreinterpretq_f32_bf16): New. (vreinterpretq_p128_bf16): New. * config/arm/arm_neon_builtins.def (VDX): Add V4BF. (V_elem): Likewise. (V_elem_l): Likewise. (VD_LANE): Likewise. (VQX) Add V8BF. (V_DOUBLE): Likewise. (VDQX): Add V4BF and V8BF. (V_two_elem, V_three_elem, V_four_elem): Likewise. (V_reg): Likewise. (V_HALF): Likewise. (V_double_vector_mode): Likewise. (V_cmp_result): Likewise. (V_uf_sclr): Likewise. (V_sz_elem): Likewise. (Is_d_reg): Likewise. (V_mode_nunits): Likewise. * config/arm/neon.md (neon_vdup_lane): Enable for BFloat. gcc/testsuite/ChangeLog: 2020-02-27 Mihail Ionescu * gcc.target/arm/bf16_dup.c: New test. * gcc.target/arm/bf16_reinterpret.c: Likewise. Is it ok for trunk? This looks mostly ok with a few nits... Regards, Mihail ### Attachment also inlined for ease of reply ### diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index 09297831cdcd6e695843c17b7724c114f3a129fe..5901a8f1fb84f204ae95f0ccc97bf5ae944c482c 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -42,6 +42,15 @@ extern "C" { #include #include +#ifdef __ARM_BIG_ENDIAN +#define __ARM_NUM_LANES(__v) (sizeof (__v) / sizeof (__v[0])) +#define __arm_lane(__vec, __idx) (__idx ^ (__ARM_NUM_LANES(__vec) - 1)) +#define __arm_laneq(__vec, __idx) (__idx ^ (__ARM_NUM_LANES(__vec)/2 - 1)) +#else +#define __arm_lane(__vec, __idx) __idx +#define __arm_laneq(__vec, __idx) __idx +#endif + typedef __simd64_int8_t int8x8_t; typedef __simd64_int16_t int16x4_t; typedef __simd64_int32_t int32x2_t; @@ -6147,14 +6156,6 @@ vget_lane_s32 (int32x2_t __a, const int __b) /* For big-endian, GCC's vector indices are reversed within each 64 bits compared to the architectural lane indices used by Neon intrinsics. */ Please move this comment as well. -#ifdef __ARM_BIG_ENDIAN -#define __ARM_NUM_LANES(__v) (sizeof (__v) / sizeof (__v[0])) -#define __arm_lane(__vec, __idx) (__idx ^ (__ARM_NUM_LANES(__vec) - 1)) -#define __arm_laneq(__vec, __idx) (__idx ^ (__ARM_NUM_LANES(__vec)/2 - 1)) -#else -#define __arm_lane(__vec, __idx) __idx -#define __arm_laneq(__vec, __idx) __idx -#endif #define vget_lane_f16(__v, __idx) \ __extension__ \ @@ -14476,6 +14477,15 @@ vreinterpret_p16_u32 (uint32x2
Re: [GCC][PATCH][ARM] Add vreinterpret, vdup, vget and vset bfloat16 intrinsic
Hi Kyrill, On 02/27/2020 11:09 AM, Kyrill Tkachov wrote: Hi Mihail, On 2/27/20 10:27 AM, Mihail Ionescu wrote: Hi, This patch adds support for the bf16 vector create, get, set, duplicate and reinterpret intrinsics. ACLE documents are at https://developer.arm.com/docs/101028/latest ISA documents are at https://developer.arm.com/docs/ddi0596/latest Regression tested on arm-none-eabi. gcc/ChangeLog: 2020-02-27 Mihail Ionescu * (__ARM_NUM_LANES, __arm_lane, __arm_lane_q): Move to the beginning of the file. (vcreate_bf16, vcombine_bf16): New. (vdup_n_bf16, vdupq_n_bf16): New. (vdup_lane_bf16, vdup_laneq_bf16): New. (vdupq_lane_bf16, vdupq_laneq_bf16): New. (vduph_lane_bf16, vduph_laneq_bf16): New. (vset_lane_bf16, vsetq_lane_bf16): New. (vget_lane_bf16, vgetq_lane_bf16): New. (vget_high_bf16, vget_low_bf16): New. (vreinterpret_bf16_u8, vreinterpretq_bf16_u8): New. (vreinterpret_bf16_u16, vreinterpretq_bf16_u16): New. (vreinterpret_bf16_u32, vreinterpretq_bf16_u32): New. (vreinterpret_bf16_u64, vreinterpretq_bf16_u64): New. (vreinterpret_bf16_s8, vreinterpretq_bf16_s8): New. (vreinterpret_bf16_s16, vreinterpretq_bf16_s16): New. (vreinterpret_bf16_s32, vreinterpretq_bf16_s32): New. (vreinterpret_bf16_s64, vreinterpretq_bf16_s64): New. (vreinterpret_bf16_p8, vreinterpretq_bf16_p8): New. (vreinterpret_bf16_p16, vreinterpretq_bf16_p16): New. (vreinterpret_bf16_p64, vreinterpretq_bf16_p64): New. (vreinterpret_bf16_f32, vreinterpretq_bf16_f32): New. (vreinterpret_bf16_f64, vreinterpretq_bf16_f64): New. (vreinterpretq_bf16_p128): New. (vreinterpret_s8_bf16, vreinterpretq_s8_bf16): New. (vreinterpret_s16_bf16, vreinterpretq_s16_bf16): New. (vreinterpret_s32_bf16, vreinterpretq_s32_bf16): New. (vreinterpret_s64_bf16, vreinterpretq_s64_bf16): New. (vreinterpret_u8_bf16, vreinterpretq_u8_bf16): New. (vreinterpret_u16_bf16, vreinterpretq_u16_bf16): New. (vreinterpret_u32_bf16, vreinterpretq_u32_bf16): New. (vreinterpret_u64_bf16, vreinterpretq_u64_bf16): New. (vreinterpret_p8_bf16, vreinterpretq_p8_bf16): New. (vreinterpret_p16_bf16, vreinterpretq_p16_bf16): New. (vreinterpret_p64_bf16, vreinterpretq_p64_bf16): New. (vreinterpret_f32_bf16, vreinterpretq_f32_bf16): New. (vreinterpretq_p128_bf16): New. * config/arm/arm_neon_builtins.def (VDX): Add V4BF. (V_elem): Likewise. (V_elem_l): Likewise. (VD_LANE): Likewise. (VQX) Add V8BF. (V_DOUBLE): Likewise. (VDQX): Add V4BF and V8BF. (V_two_elem, V_three_elem, V_four_elem): Likewise. (V_reg): Likewise. (V_HALF): Likewise. (V_double_vector_mode): Likewise. (V_cmp_result): Likewise. (V_uf_sclr): Likewise. (V_sz_elem): Likewise. (Is_d_reg): Likewise. (V_mode_nunits): Likewise. * config/arm/neon.md (neon_vdup_lane): Enable for BFloat. gcc/testsuite/ChangeLog: 2020-02-27 Mihail Ionescu * gcc.target/arm/bf16_dup.c: New test. * gcc.target/arm/bf16_reinterpret.c: Likewise. Is it ok for trunk? This looks mostly ok with a few nits... Regards, Mihail ### Attachment also inlined for ease of reply ### diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index 09297831cdcd6e695843c17b7724c114f3a129fe..5901a8f1fb84f204ae95f0ccc97bf5ae944c482c 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -42,6 +42,15 @@ extern "C" { #include #include +#ifdef __ARM_BIG_ENDIAN +#define __ARM_NUM_LANES(__v) (sizeof (__v) / sizeof (__v[0])) +#define __arm_lane(__vec, __idx) (__idx ^ (__ARM_NUM_LANES(__vec) - 1)) +#define __arm_laneq(__vec, __idx) (__idx ^ (__ARM_NUM_LANES(__vec)/2 - 1)) +#else +#define __arm_lane(__vec, __idx) __idx +#define __arm_laneq(__vec, __idx) __idx +#endif + typedef __simd64_int8_t int8x8_t; typedef __simd64_int16_t int16x4_t; typedef __simd64_int32_t int32x2_t; @@ -6147,14 +6156,6 @@ vget_lane_s32 (int32x2_t __a, const int __b) /* For big-endian, GCC's vector indices are reversed within each 64 bits compared to the architectural lane indices used by Neon intrinsics. */ Please move this comment as well. -#ifdef __ARM_BIG_ENDIAN -#define __ARM_NUM_LANES(__v) (sizeof (__v) / sizeof (__v[0])) -#define __arm_lane(__vec, __idx) (__idx ^ (__ARM_NUM_LANES(__vec) - 1)) -#define __arm_laneq(__vec, __idx) (__idx ^ (__ARM_NUM_LANES(__vec)/2 - 1)) -#else -#define __arm_lane(__vec, __idx) __idx -#define __arm_laneq(__vec, __idx) __idx -#endif #define vget_lane_f16(__v, __idx) \ __extension__ \ @@ -14476,6 +14477,15 @@ vreinterpret_p16_u32 (uint32x2_t __a) #if defined (__ARM_FP16_FORMAT_IEEE) || defined
Re: [GCC][PATCH][ARM] Add vreinterpret, vdup, vget and vset bfloat16 intrinsic
Hi Mihail, On 2/27/20 10:27 AM, Mihail Ionescu wrote: Hi, This patch adds support for the bf16 vector create, get, set, duplicate and reinterpret intrinsics. ACLE documents are at https://developer.arm.com/docs/101028/latest ISA documents are at https://developer.arm.com/docs/ddi0596/latest Regression tested on arm-none-eabi. gcc/ChangeLog: 2020-02-27 Mihail Ionescu * (__ARM_NUM_LANES, __arm_lane, __arm_lane_q): Move to the beginning of the file. (vcreate_bf16, vcombine_bf16): New. (vdup_n_bf16, vdupq_n_bf16): New. (vdup_lane_bf16, vdup_laneq_bf16): New. (vdupq_lane_bf16, vdupq_laneq_bf16): New. (vduph_lane_bf16, vduph_laneq_bf16): New. (vset_lane_bf16, vsetq_lane_bf16): New. (vget_lane_bf16, vgetq_lane_bf16): New. (vget_high_bf16, vget_low_bf16): New. (vreinterpret_bf16_u8, vreinterpretq_bf16_u8): New. (vreinterpret_bf16_u16, vreinterpretq_bf16_u16): New. (vreinterpret_bf16_u32, vreinterpretq_bf16_u32): New. (vreinterpret_bf16_u64, vreinterpretq_bf16_u64): New. (vreinterpret_bf16_s8, vreinterpretq_bf16_s8): New. (vreinterpret_bf16_s16, vreinterpretq_bf16_s16): New. (vreinterpret_bf16_s32, vreinterpretq_bf16_s32): New. (vreinterpret_bf16_s64, vreinterpretq_bf16_s64): New. (vreinterpret_bf16_p8, vreinterpretq_bf16_p8): New. (vreinterpret_bf16_p16, vreinterpretq_bf16_p16): New. (vreinterpret_bf16_p64, vreinterpretq_bf16_p64): New. (vreinterpret_bf16_f32, vreinterpretq_bf16_f32): New. (vreinterpret_bf16_f64, vreinterpretq_bf16_f64): New. (vreinterpretq_bf16_p128): New. (vreinterpret_s8_bf16, vreinterpretq_s8_bf16): New. (vreinterpret_s16_bf16, vreinterpretq_s16_bf16): New. (vreinterpret_s32_bf16, vreinterpretq_s32_bf16): New. (vreinterpret_s64_bf16, vreinterpretq_s64_bf16): New. (vreinterpret_u8_bf16, vreinterpretq_u8_bf16): New. (vreinterpret_u16_bf16, vreinterpretq_u16_bf16): New. (vreinterpret_u32_bf16, vreinterpretq_u32_bf16): New. (vreinterpret_u64_bf16, vreinterpretq_u64_bf16): New. (vreinterpret_p8_bf16, vreinterpretq_p8_bf16): New. (vreinterpret_p16_bf16, vreinterpretq_p16_bf16): New. (vreinterpret_p64_bf16, vreinterpretq_p64_bf16): New. (vreinterpret_f32_bf16, vreinterpretq_f32_bf16): New. (vreinterpretq_p128_bf16): New. * config/arm/arm_neon_builtins.def (VDX): Add V4BF. (V_elem): Likewise. (V_elem_l): Likewise. (VD_LANE): Likewise. (VQX) Add V8BF. (V_DOUBLE): Likewise. (VDQX): Add V4BF and V8BF. (V_two_elem, V_three_elem, V_four_elem): Likewise. (V_reg): Likewise. (V_HALF): Likewise. (V_double_vector_mode): Likewise. (V_cmp_result): Likewise. (V_uf_sclr): Likewise. (V_sz_elem): Likewise. (Is_d_reg): Likewise. (V_mode_nunits): Likewise. * config/arm/neon.md (neon_vdup_lane): Enable for BFloat. gcc/testsuite/ChangeLog: 2020-02-27 Mihail Ionescu * gcc.target/arm/bf16_dup.c: New test. * gcc.target/arm/bf16_reinterpret.c: Likewise. Is it ok for trunk? This looks mostly ok with a few nits... Regards, Mihail ### Attachment also inlined for ease of reply ### diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index 09297831cdcd6e695843c17b7724c114f3a129fe..5901a8f1fb84f204ae95f0ccc97bf5ae944c482c 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -42,6 +42,15 @@ extern "C" { #include #include +#ifdef __ARM_BIG_ENDIAN +#define __ARM_NUM_LANES(__v) (sizeof (__v) / sizeof (__v[0])) +#define __arm_lane(__vec, __idx) (__idx ^ (__ARM_NUM_LANES(__vec) - 1)) +#define __arm_laneq(__vec, __idx) (__idx ^ (__ARM_NUM_LANES(__vec)/2 - 1)) +#else +#define __arm_lane(__vec, __idx) __idx +#define __arm_laneq(__vec, __idx) __idx +#endif + typedef __simd64_int8_t int8x8_t; typedef __simd64_int16_t int16x4_t; typedef __simd64_int32_t int32x2_t; @@ -6147,14 +6156,6 @@ vget_lane_s32 (int32x2_t __a, const int __b) /* For big-endian, GCC's vector indices are reversed within each 64 bits compared to the architectural lane indices used by Neon intrinsics. */ Please move this comment as well. -#ifdef __ARM_BIG_ENDIAN -#define __ARM_NUM_LANES(__v) (sizeof (__v) / sizeof (__v[0])) -#define __arm_lane(__vec, __idx) (__idx ^ (__ARM_NUM_LANES(__vec) - 1)) -#define __arm_laneq(__vec, __idx) (__idx ^ (__ARM_NUM_LANES(__vec)/2 - 1)) -#else -#define __arm_lane(__vec, __idx) __idx -#define __arm_laneq(__vec, __idx) __idx -#endif #define vget_lane_f16(__v, __idx) \ __extension__ \ @@ -14476,6 +14477,15 @@ vreinterpret_p16_u32 (uint32x2_t __a) #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) __extension__ extern