Implement vcmlaq using the new MVE builtins framework. 2023-07-13 Christophe Lyon <christophe.l...@linaro.org>
gcc/ * config/arm/arm-mve-builtins-base.cc (vcmlaq, vcmlaq_rot90) (vcmlaq_rot180, vcmlaq_rot270): New. * config/arm/arm-mve-builtins-base.def (vcmlaq, vcmlaq_rot90) (vcmlaq_rot180, vcmlaq_rot270): New. * config/arm/arm-mve-builtins-base.h: (vcmlaq, vcmlaq_rot90) (vcmlaq_rot180, vcmlaq_rot270): New. * config/arm/arm-mve-builtins.cc (function_instance::has_inactive_argument): Handle vcmlaq, vcmlaq_rot90, vcmlaq_rot180, vcmlaq_rot270. * config/arm/arm_mve.h (vcmlaq): Delete. (vcmlaq_rot180): Delete. (vcmlaq_rot270): Delete. (vcmlaq_rot90): Delete. (vcmlaq_m): Delete. (vcmlaq_rot180_m): Delete. (vcmlaq_rot270_m): Delete. (vcmlaq_rot90_m): Delete. (vcmlaq_f16): Delete. (vcmlaq_rot180_f16): Delete. (vcmlaq_rot270_f16): Delete. (vcmlaq_rot90_f16): Delete. (vcmlaq_f32): Delete. (vcmlaq_rot180_f32): Delete. (vcmlaq_rot270_f32): Delete. (vcmlaq_rot90_f32): Delete. (vcmlaq_m_f32): Delete. (vcmlaq_m_f16): Delete. (vcmlaq_rot180_m_f32): Delete. (vcmlaq_rot180_m_f16): Delete. (vcmlaq_rot270_m_f32): Delete. (vcmlaq_rot270_m_f16): Delete. (vcmlaq_rot90_m_f32): Delete. (vcmlaq_rot90_m_f16): Delete. (__arm_vcmlaq_f16): Delete. (__arm_vcmlaq_rot180_f16): Delete. (__arm_vcmlaq_rot270_f16): Delete. (__arm_vcmlaq_rot90_f16): Delete. (__arm_vcmlaq_f32): Delete. (__arm_vcmlaq_rot180_f32): Delete. (__arm_vcmlaq_rot270_f32): Delete. (__arm_vcmlaq_rot90_f32): Delete. (__arm_vcmlaq_m_f32): Delete. (__arm_vcmlaq_m_f16): Delete. (__arm_vcmlaq_rot180_m_f32): Delete. (__arm_vcmlaq_rot180_m_f16): Delete. (__arm_vcmlaq_rot270_m_f32): Delete. (__arm_vcmlaq_rot270_m_f16): Delete. (__arm_vcmlaq_rot90_m_f32): Delete. (__arm_vcmlaq_rot90_m_f16): Delete. (__arm_vcmlaq): Delete. (__arm_vcmlaq_rot180): Delete. (__arm_vcmlaq_rot270): Delete. (__arm_vcmlaq_rot90): Delete. (__arm_vcmlaq_m): Delete. (__arm_vcmlaq_rot180_m): Delete. (__arm_vcmlaq_rot270_m): Delete. (__arm_vcmlaq_rot90_m): Delete. --- gcc/config/arm/arm-mve-builtins-base.cc | 4 + gcc/config/arm/arm-mve-builtins-base.def | 4 + gcc/config/arm/arm-mve-builtins-base.h | 16 +- gcc/config/arm/arm-mve-builtins.cc | 4 + gcc/config/arm/arm_mve.h | 304 ----------------------- 5 files changed, 22 insertions(+), 310 deletions(-) diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc index 3ad8df304e8..e31095ae112 100644 --- a/gcc/config/arm/arm-mve-builtins-base.cc +++ b/gcc/config/arm/arm-mve-builtins-base.cc @@ -262,6 +262,10 @@ FUNCTION_WITH_RTX_M (vandq, AND, VANDQ) FUNCTION_ONLY_N (vbrsrq, VBRSRQ) FUNCTION (vcaddq_rot90, unspec_mve_function_exact_insn_rot, (UNSPEC_VCADD90, UNSPEC_VCADD90, UNSPEC_VCADD90, VCADDQ_ROT90_M_S, VCADDQ_ROT90_M_U, VCADDQ_ROT90_M_F)) FUNCTION (vcaddq_rot270, unspec_mve_function_exact_insn_rot, (UNSPEC_VCADD270, UNSPEC_VCADD270, UNSPEC_VCADD270, VCADDQ_ROT270_M_S, VCADDQ_ROT270_M_U, VCADDQ_ROT270_M_F)) +FUNCTION (vcmlaq, unspec_mve_function_exact_insn_rot, (-1, -1, UNSPEC_VCMLA, -1, -1, VCMLAQ_M_F)) +FUNCTION (vcmlaq_rot90, unspec_mve_function_exact_insn_rot, (-1, -1, UNSPEC_VCMLA90, -1, -1, VCMLAQ_ROT90_M_F)) +FUNCTION (vcmlaq_rot180, unspec_mve_function_exact_insn_rot, (-1, -1, UNSPEC_VCMLA180, -1, -1, VCMLAQ_ROT180_M_F)) +FUNCTION (vcmlaq_rot270, unspec_mve_function_exact_insn_rot, (-1, -1, UNSPEC_VCMLA270, -1, -1, VCMLAQ_ROT270_M_F)) FUNCTION (vcmulq, unspec_mve_function_exact_insn_rot, (-1, -1, UNSPEC_VCMUL, -1, -1, VCMULQ_M_F)) FUNCTION (vcmulq_rot90, unspec_mve_function_exact_insn_rot, (-1, -1, UNSPEC_VCMUL90, -1, -1, VCMULQ_ROT90_M_F)) FUNCTION (vcmulq_rot180, unspec_mve_function_exact_insn_rot, (-1, -1, UNSPEC_VCMUL180, -1, -1, VCMULQ_ROT180_M_F)) diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def index cbcf0d296cd..e7d466f2efd 100644 --- a/gcc/config/arm/arm-mve-builtins-base.def +++ b/gcc/config/arm/arm-mve-builtins-base.def @@ -158,6 +158,10 @@ DEF_MVE_FUNCTION (vandq, binary, all_float, mx_or_none) DEF_MVE_FUNCTION (vbrsrq, binary_imm32, all_float, mx_or_none) DEF_MVE_FUNCTION (vcaddq_rot90, binary, all_float, mx_or_none) DEF_MVE_FUNCTION (vcaddq_rot270, binary, all_float, mx_or_none) +DEF_MVE_FUNCTION (vcmlaq, ternary, all_float, m_or_none) +DEF_MVE_FUNCTION (vcmlaq_rot90, ternary, all_float, m_or_none) +DEF_MVE_FUNCTION (vcmlaq_rot180, ternary, all_float, m_or_none) +DEF_MVE_FUNCTION (vcmlaq_rot270, ternary, all_float, m_or_none) DEF_MVE_FUNCTION (vcmulq, binary, all_float, mx_or_none) DEF_MVE_FUNCTION (vcmulq_rot90, binary, all_float, mx_or_none) DEF_MVE_FUNCTION (vcmulq_rot180, binary, all_float, mx_or_none) diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h index 875b333ebef..be3698b4f4c 100644 --- a/gcc/config/arm/arm-mve-builtins-base.h +++ b/gcc/config/arm/arm-mve-builtins-base.h @@ -33,14 +33,14 @@ extern const function_base *const vaddvaq; extern const function_base *const vaddvq; extern const function_base *const vandq; extern const function_base *const vbrsrq; -extern const function_base *const vcaddq_rot90; extern const function_base *const vcaddq_rot270; -extern const function_base *const vcmulq; -extern const function_base *const vcmulq_rot90; -extern const function_base *const vcmulq_rot180; -extern const function_base *const vcmulq_rot270; +extern const function_base *const vcaddq_rot90; extern const function_base *const vclsq; extern const function_base *const vclzq; +extern const function_base *const vcmlaq; +extern const function_base *const vcmlaq_rot180; +extern const function_base *const vcmlaq_rot270; +extern const function_base *const vcmlaq_rot90; extern const function_base *const vcmpcsq; extern const function_base *const vcmpeqq; extern const function_base *const vcmpgeq; @@ -49,6 +49,10 @@ extern const function_base *const vcmphiq; extern const function_base *const vcmpleq; extern const function_base *const vcmpltq; extern const function_base *const vcmpneq; +extern const function_base *const vcmulq; +extern const function_base *const vcmulq_rot180; +extern const function_base *const vcmulq_rot270; +extern const function_base *const vcmulq_rot90; extern const function_base *const vcreateq; extern const function_base *const vdupq; extern const function_base *const veorq; @@ -56,8 +60,8 @@ extern const function_base *const vfmaq; extern const function_base *const vfmasq; extern const function_base *const vfmsq; extern const function_base *const vhaddq; -extern const function_base *const vhcaddq_rot90; extern const function_base *const vhcaddq_rot270; +extern const function_base *const vhcaddq_rot90; extern const function_base *const vhsubq; extern const function_base *const vmaxaq; extern const function_base *const vmaxavq; diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-builtins.cc index 7033e41a571..3272ece6326 100644 --- a/gcc/config/arm/arm-mve-builtins.cc +++ b/gcc/config/arm/arm-mve-builtins.cc @@ -670,6 +670,10 @@ function_instance::has_inactive_argument () const return false; if (mode_suffix_id == MODE_r + || base == functions::vcmlaq + || base == functions::vcmlaq_rot90 + || base == functions::vcmlaq_rot180 + || base == functions::vcmlaq_rot270 || base == functions::vcmpeqq || base == functions::vcmpneq || base == functions::vcmpgeq diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h index b9d3a876369..88b2e77ffd9 100644 --- a/gcc/config/arm/arm_mve.h +++ b/gcc/config/arm/arm_mve.h @@ -159,18 +159,10 @@ #define vcvtq_m(__inactive, __a, __p) __arm_vcvtq_m(__inactive, __a, __p) #define vcvtbq_m(__a, __b, __p) __arm_vcvtbq_m(__a, __b, __p) #define vcvttq_m(__a, __b, __p) __arm_vcvttq_m(__a, __b, __p) -#define vcmlaq(__a, __b, __c) __arm_vcmlaq(__a, __b, __c) -#define vcmlaq_rot180(__a, __b, __c) __arm_vcmlaq_rot180(__a, __b, __c) -#define vcmlaq_rot270(__a, __b, __c) __arm_vcmlaq_rot270(__a, __b, __c) -#define vcmlaq_rot90(__a, __b, __c) __arm_vcmlaq_rot90(__a, __b, __c) #define vcvtmq_m(__inactive, __a, __p) __arm_vcvtmq_m(__inactive, __a, __p) #define vcvtnq_m(__inactive, __a, __p) __arm_vcvtnq_m(__inactive, __a, __p) #define vcvtpq_m(__inactive, __a, __p) __arm_vcvtpq_m(__inactive, __a, __p) #define vcvtq_m_n(__inactive, __a, __imm6, __p) __arm_vcvtq_m_n(__inactive, __a, __imm6, __p) -#define vcmlaq_m(__a, __b, __c, __p) __arm_vcmlaq_m(__a, __b, __c, __p) -#define vcmlaq_rot180_m(__a, __b, __c, __p) __arm_vcmlaq_rot180_m(__a, __b, __c, __p) -#define vcmlaq_rot270_m(__a, __b, __c, __p) __arm_vcmlaq_rot270_m(__a, __b, __c, __p) -#define vcmlaq_rot90_m(__a, __b, __c, __p) __arm_vcmlaq_rot90_m(__a, __b, __c, __p) #define vcvtq_x(__a, __p) __arm_vcvtq_x(__a, __p) #define vcvtq_x_n(__a, __imm6, __p) __arm_vcvtq_x_n(__a, __imm6, __p) @@ -286,10 +278,6 @@ #define vcvtbq_m_f32_f16(__inactive, __a, __p) __arm_vcvtbq_m_f32_f16(__inactive, __a, __p) #define vcvttq_m_f16_f32(__a, __b, __p) __arm_vcvttq_m_f16_f32(__a, __b, __p) #define vcvttq_m_f32_f16(__inactive, __a, __p) __arm_vcvttq_m_f32_f16(__inactive, __a, __p) -#define vcmlaq_f16(__a, __b, __c) __arm_vcmlaq_f16(__a, __b, __c) -#define vcmlaq_rot180_f16(__a, __b, __c) __arm_vcmlaq_rot180_f16(__a, __b, __c) -#define vcmlaq_rot270_f16(__a, __b, __c) __arm_vcmlaq_rot270_f16(__a, __b, __c) -#define vcmlaq_rot90_f16(__a, __b, __c) __arm_vcmlaq_rot90_f16(__a, __b, __c) #define vcvtmq_m_s16_f16(__inactive, __a, __p) __arm_vcvtmq_m_s16_f16(__inactive, __a, __p) #define vcvtnq_m_s16_f16(__inactive, __a, __p) __arm_vcvtnq_m_s16_f16(__inactive, __a, __p) #define vcvtpq_m_s16_f16(__inactive, __a, __p) __arm_vcvtpq_m_s16_f16(__inactive, __a, __p) @@ -298,10 +286,6 @@ #define vcvtnq_m_u16_f16(__inactive, __a, __p) __arm_vcvtnq_m_u16_f16(__inactive, __a, __p) #define vcvtpq_m_u16_f16(__inactive, __a, __p) __arm_vcvtpq_m_u16_f16(__inactive, __a, __p) #define vcvtq_m_u16_f16(__inactive, __a, __p) __arm_vcvtq_m_u16_f16(__inactive, __a, __p) -#define vcmlaq_f32(__a, __b, __c) __arm_vcmlaq_f32(__a, __b, __c) -#define vcmlaq_rot180_f32(__a, __b, __c) __arm_vcmlaq_rot180_f32(__a, __b, __c) -#define vcmlaq_rot270_f32(__a, __b, __c) __arm_vcmlaq_rot270_f32(__a, __b, __c) -#define vcmlaq_rot90_f32(__a, __b, __c) __arm_vcmlaq_rot90_f32(__a, __b, __c) #define vcvtmq_m_s32_f32(__inactive, __a, __p) __arm_vcvtmq_m_s32_f32(__inactive, __a, __p) #define vcvtnq_m_s32_f32(__inactive, __a, __p) __arm_vcvtnq_m_s32_f32(__inactive, __a, __p) #define vcvtpq_m_s32_f32(__inactive, __a, __p) __arm_vcvtpq_m_s32_f32(__inactive, __a, __p) @@ -344,14 +328,6 @@ #define vmulltq_poly_m_p16(__inactive, __a, __b, __p) __arm_vmulltq_poly_m_p16(__inactive, __a, __b, __p) #define vbicq_m_f32(__inactive, __a, __b, __p) __arm_vbicq_m_f32(__inactive, __a, __b, __p) #define vbicq_m_f16(__inactive, __a, __b, __p) __arm_vbicq_m_f16(__inactive, __a, __b, __p) -#define vcmlaq_m_f32(__a, __b, __c, __p) __arm_vcmlaq_m_f32(__a, __b, __c, __p) -#define vcmlaq_m_f16(__a, __b, __c, __p) __arm_vcmlaq_m_f16(__a, __b, __c, __p) -#define vcmlaq_rot180_m_f32(__a, __b, __c, __p) __arm_vcmlaq_rot180_m_f32(__a, __b, __c, __p) -#define vcmlaq_rot180_m_f16(__a, __b, __c, __p) __arm_vcmlaq_rot180_m_f16(__a, __b, __c, __p) -#define vcmlaq_rot270_m_f32(__a, __b, __c, __p) __arm_vcmlaq_rot270_m_f32(__a, __b, __c, __p) -#define vcmlaq_rot270_m_f16(__a, __b, __c, __p) __arm_vcmlaq_rot270_m_f16(__a, __b, __c, __p) -#define vcmlaq_rot90_m_f32(__a, __b, __c, __p) __arm_vcmlaq_rot90_m_f32(__a, __b, __c, __p) -#define vcmlaq_rot90_m_f16(__a, __b, __c, __p) __arm_vcmlaq_rot90_m_f16(__a, __b, __c, __p) #define vcvtq_m_n_s32_f32(__inactive, __a, __imm6, __p) __arm_vcvtq_m_n_s32_f32(__inactive, __a, __imm6, __p) #define vcvtq_m_n_s16_f16(__inactive, __a, __imm6, __p) __arm_vcvtq_m_n_s16_f16(__inactive, __a, __imm6, __p) #define vcvtq_m_n_u32_f32(__inactive, __a, __imm6, __p) __arm_vcvtq_m_n_u32_f32(__inactive, __a, __imm6, __p) @@ -4645,34 +4621,6 @@ __arm_vcvttq_m_f32_f16 (float32x4_t __inactive, float16x8_t __a, mve_pred16_t __ return __builtin_mve_vcvttq_m_f32_f16v4sf (__inactive, __a, __p); } -__extension__ extern __inline float16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcmlaq_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c) -{ - return __builtin_mve_vcmlaqv8hf (__a, __b, __c); -} - -__extension__ extern __inline float16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcmlaq_rot180_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c) -{ - return __builtin_mve_vcmlaq_rot180v8hf (__a, __b, __c); -} - -__extension__ extern __inline float16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcmlaq_rot270_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c) -{ - return __builtin_mve_vcmlaq_rot270v8hf (__a, __b, __c); -} - -__extension__ extern __inline float16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcmlaq_rot90_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c) -{ - return __builtin_mve_vcmlaq_rot90v8hf (__a, __b, __c); -} - __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcvtmq_m_s16_f16 (int16x8_t __inactive, float16x8_t __a, mve_pred16_t __p) @@ -4729,34 +4677,6 @@ __arm_vcvtq_m_u16_f16 (uint16x8_t __inactive, float16x8_t __a, mve_pred16_t __p) return __builtin_mve_vcvtq_m_from_f_uv8hi (__inactive, __a, __p); } -__extension__ extern __inline float32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcmlaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c) -{ - return __builtin_mve_vcmlaqv4sf (__a, __b, __c); -} - -__extension__ extern __inline float32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcmlaq_rot180_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c) -{ - return __builtin_mve_vcmlaq_rot180v4sf (__a, __b, __c); -} - -__extension__ extern __inline float32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcmlaq_rot270_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c) -{ - return __builtin_mve_vcmlaq_rot270v4sf (__a, __b, __c); -} - -__extension__ extern __inline float32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcmlaq_rot90_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c) -{ - return __builtin_mve_vcmlaq_rot90v4sf (__a, __b, __c); -} - __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcvtmq_m_s32_f32 (int32x4_t __inactive, float32x4_t __a, mve_pred16_t __p) @@ -4855,62 +4775,6 @@ __arm_vbicq_m_f16 (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve return __builtin_mve_vbicq_m_fv8hf (__inactive, __a, __b, __p); } -__extension__ extern __inline float32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcmlaq_m_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c, mve_pred16_t __p) -{ - return __builtin_mve_vcmlaq_m_fv4sf (__a, __b, __c, __p); -} - -__extension__ extern __inline float16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcmlaq_m_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c, mve_pred16_t __p) -{ - return __builtin_mve_vcmlaq_m_fv8hf (__a, __b, __c, __p); -} - -__extension__ extern __inline float32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcmlaq_rot180_m_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c, mve_pred16_t __p) -{ - return __builtin_mve_vcmlaq_rot180_m_fv4sf (__a, __b, __c, __p); -} - -__extension__ extern __inline float16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcmlaq_rot180_m_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c, mve_pred16_t __p) -{ - return __builtin_mve_vcmlaq_rot180_m_fv8hf (__a, __b, __c, __p); -} - -__extension__ extern __inline float32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcmlaq_rot270_m_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c, mve_pred16_t __p) -{ - return __builtin_mve_vcmlaq_rot270_m_fv4sf (__a, __b, __c, __p); -} - -__extension__ extern __inline float16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcmlaq_rot270_m_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c, mve_pred16_t __p) -{ - return __builtin_mve_vcmlaq_rot270_m_fv8hf (__a, __b, __c, __p); -} - -__extension__ extern __inline float32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcmlaq_rot90_m_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c, mve_pred16_t __p) -{ - return __builtin_mve_vcmlaq_rot90_m_fv4sf (__a, __b, __c, __p); -} - -__extension__ extern __inline float16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcmlaq_rot90_m_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c, mve_pred16_t __p) -{ - return __builtin_mve_vcmlaq_rot90_m_fv8hf (__a, __b, __c, __p); -} - __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcvtq_m_n_s32_f32 (int32x4_t __inactive, float32x4_t __a, const int __imm6, mve_pred16_t __p) @@ -8481,34 +8345,6 @@ __arm_vcvttq_m (float32x4_t __inactive, float16x8_t __a, mve_pred16_t __p) return __arm_vcvttq_m_f32_f16 (__inactive, __a, __p); } -__extension__ extern __inline float16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcmlaq (float16x8_t __a, float16x8_t __b, float16x8_t __c) -{ - return __arm_vcmlaq_f16 (__a, __b, __c); -} - -__extension__ extern __inline float16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcmlaq_rot180 (float16x8_t __a, float16x8_t __b, float16x8_t __c) -{ - return __arm_vcmlaq_rot180_f16 (__a, __b, __c); -} - -__extension__ extern __inline float16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcmlaq_rot270 (float16x8_t __a, float16x8_t __b, float16x8_t __c) -{ - return __arm_vcmlaq_rot270_f16 (__a, __b, __c); -} - -__extension__ extern __inline float16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcmlaq_rot90 (float16x8_t __a, float16x8_t __b, float16x8_t __c) -{ - return __arm_vcmlaq_rot90_f16 (__a, __b, __c); -} - __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcvtmq_m (int16x8_t __inactive, float16x8_t __a, mve_pred16_t __p) @@ -8565,34 +8401,6 @@ __arm_vcvtq_m (uint16x8_t __inactive, float16x8_t __a, mve_pred16_t __p) return __arm_vcvtq_m_u16_f16 (__inactive, __a, __p); } -__extension__ extern __inline float32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcmlaq (float32x4_t __a, float32x4_t __b, float32x4_t __c) -{ - return __arm_vcmlaq_f32 (__a, __b, __c); -} - -__extension__ extern __inline float32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcmlaq_rot180 (float32x4_t __a, float32x4_t __b, float32x4_t __c) -{ - return __arm_vcmlaq_rot180_f32 (__a, __b, __c); -} - -__extension__ extern __inline float32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcmlaq_rot270 (float32x4_t __a, float32x4_t __b, float32x4_t __c) -{ - return __arm_vcmlaq_rot270_f32 (__a, __b, __c); -} - -__extension__ extern __inline float32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcmlaq_rot90 (float32x4_t __a, float32x4_t __b, float32x4_t __c) -{ - return __arm_vcmlaq_rot90_f32 (__a, __b, __c); -} - __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcvtmq_m (int32x4_t __inactive, float32x4_t __a, mve_pred16_t __p) @@ -8691,62 +8499,6 @@ __arm_vbicq_m (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pre return __arm_vbicq_m_f16 (__inactive, __a, __b, __p); } -__extension__ extern __inline float32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcmlaq_m (float32x4_t __a, float32x4_t __b, float32x4_t __c, mve_pred16_t __p) -{ - return __arm_vcmlaq_m_f32 (__a, __b, __c, __p); -} - -__extension__ extern __inline float16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcmlaq_m (float16x8_t __a, float16x8_t __b, float16x8_t __c, mve_pred16_t __p) -{ - return __arm_vcmlaq_m_f16 (__a, __b, __c, __p); -} - -__extension__ extern __inline float32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcmlaq_rot180_m (float32x4_t __a, float32x4_t __b, float32x4_t __c, mve_pred16_t __p) -{ - return __arm_vcmlaq_rot180_m_f32 (__a, __b, __c, __p); -} - -__extension__ extern __inline float16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcmlaq_rot180_m (float16x8_t __a, float16x8_t __b, float16x8_t __c, mve_pred16_t __p) -{ - return __arm_vcmlaq_rot180_m_f16 (__a, __b, __c, __p); -} - -__extension__ extern __inline float32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcmlaq_rot270_m (float32x4_t __a, float32x4_t __b, float32x4_t __c, mve_pred16_t __p) -{ - return __arm_vcmlaq_rot270_m_f32 (__a, __b, __c, __p); -} - -__extension__ extern __inline float16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcmlaq_rot270_m (float16x8_t __a, float16x8_t __b, float16x8_t __c, mve_pred16_t __p) -{ - return __arm_vcmlaq_rot270_m_f16 (__a, __b, __c, __p); -} - -__extension__ extern __inline float32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcmlaq_rot90_m (float32x4_t __a, float32x4_t __b, float32x4_t __c, mve_pred16_t __p) -{ - return __arm_vcmlaq_rot90_m_f32 (__a, __b, __c, __p); -} - -__extension__ extern __inline float16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vcmlaq_rot90_m (float16x8_t __a, float16x8_t __b, float16x8_t __c, mve_pred16_t __p) -{ - return __arm_vcmlaq_rot90_m_f16 (__a, __b, __c, __p); -} - __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcvtq_m_n (int32x4_t __inactive, float32x4_t __a, const int __imm6, mve_pred16_t __p) @@ -9620,34 +9372,6 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vcvtq_m_n_f16_u16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2, p3), \ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vcvtq_m_n_f32_u32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2, p3));}) -#define __arm_vcmlaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmlaq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t)), \ - int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmlaq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t)));}) - -#define __arm_vcmlaq_rot180(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmlaq_rot180_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t)), \ - int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmlaq_rot180_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t)));}) - -#define __arm_vcmlaq_rot270(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmlaq_rot270_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t)), \ - int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmlaq_rot270_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t)));}) - -#define __arm_vcmlaq_rot90(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmlaq_rot90_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t)), \ - int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmlaq_rot90_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t)));}) - #define __arm_vcvtbq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ @@ -9697,34 +9421,6 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vbicq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vbicq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));}) -#define __arm_vcmlaq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmlaq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \ - int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmlaq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));}) - -#define __arm_vcmlaq_rot180_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmlaq_rot180_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \ - int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmlaq_rot180_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));}) - -#define __arm_vcmlaq_rot270_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmlaq_rot270_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \ - int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmlaq_rot270_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));}) - -#define __arm_vcmlaq_rot90_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - __typeof(p2) __p2 = (p2); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmlaq_rot90_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \ - int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmlaq_rot90_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));}) - #define __arm_vornq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ __typeof(p2) __p2 = (p2); \ -- 2.34.1