> -----Original Message-----
> From: Christophe Lyon <christophe.l...@linaro.org>
> Sent: Thursday, November 16, 2023 3:26 PM
> To: gcc-patches@gcc.gnu.org; Richard Sandiford
> <richard.sandif...@arm.com>; Richard Earnshaw
> <richard.earns...@arm.com>; Kyrylo Tkachov <kyrylo.tkac...@arm.com>
> Cc: Christophe Lyon <christophe.l...@linaro.org>
> Subject: [PATCH 6/6] arm: [MVE intrinsics] rework vldq1 vst1q
>
> Implement vld1q, vst1q using the new MVE builtins framework.
Ok. Nice to see more MVE intrinsics getting the good treatment.
Thanks,
Kyrill
>
> 2023-11-16 Christophe Lyon <christophe.l...@linaro.org>
>
> gcc/
> * config/arm/arm-mve-builtins-base.cc (vld1_impl, vld1q)
> (vst1_impl, vst1q): New.
> * config/arm/arm-mve-builtins-base.def (vld1q, vst1q): New.
> * config/arm/arm-mve-builtins-base.h (vld1q, vst1q): New.
> * config/arm/arm_mve.h
> (vld1q): Delete.
> (vst1q): Delete.
> (vld1q_s8): Delete.
> (vld1q_s32): Delete.
> (vld1q_s16): Delete.
> (vld1q_u8): Delete.
> (vld1q_u32): Delete.
> (vld1q_u16): Delete.
> (vld1q_f32): Delete.
> (vld1q_f16): Delete.
> (vst1q_f32): Delete.
> (vst1q_f16): Delete.
> (vst1q_s8): Delete.
> (vst1q_s32): Delete.
> (vst1q_s16): Delete.
> (vst1q_u8): Delete.
> (vst1q_u32): Delete.
> (vst1q_u16): Delete.
> (__arm_vld1q_s8): Delete.
> (__arm_vld1q_s32): Delete.
> (__arm_vld1q_s16): Delete.
> (__arm_vld1q_u8): Delete.
> (__arm_vld1q_u32): Delete.
> (__arm_vld1q_u16): Delete.
> (__arm_vst1q_s8): Delete.
> (__arm_vst1q_s32): Delete.
> (__arm_vst1q_s16): Delete.
> (__arm_vst1q_u8): Delete.
> (__arm_vst1q_u32): Delete.
> (__arm_vst1q_u16): Delete.
> (__arm_vld1q_f32): Delete.
> (__arm_vld1q_f16): Delete.
> (__arm_vst1q_f32): Delete.
> (__arm_vst1q_f16): Delete.
> (__arm_vld1q): Delete.
> (__arm_vst1q): Delete.
> * config/arm/mve.md (mve_vld1q_f<mode>): Rename into ...
> (@mve_vld1q_f<mode>): ... this.
> (mve_vld1q_<supf><mode>): Rename into ...
> (@mve_vld1q_<supf><mode>) ... this.
> (mve_vst1q_f<mode>): Rename into ...
> (@mve_vst1q_f<mode>): ... this.
> (mve_vst1q_<supf><mode>): Rename into ...
> (@mve_vst1q_<supf><mode>) ... this.
> ---
> gcc/config/arm/arm-mve-builtins-base.cc | 58 +++++
> gcc/config/arm/arm-mve-builtins-base.def | 4 +
> gcc/config/arm/arm-mve-builtins-base.h | 4 +-
> gcc/config/arm/arm_mve.h | 282 -----------------------
> gcc/config/arm/mve.md | 8 +-
> 5 files changed, 69 insertions(+), 287 deletions(-)
>
> diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-
> mve-builtins-base.cc
> index 5478cac8aeb..cfe1b954a29 100644
> --- a/gcc/config/arm/arm-mve-builtins-base.cc
> +++ b/gcc/config/arm/arm-mve-builtins-base.cc
> @@ -83,6 +83,62 @@ class vuninitializedq_impl : public
> quiet<function_base>
> }
> };
>
> +class vld1_impl : public full_width_access
> +{
> +public:
> + unsigned int
> + call_properties (const function_instance &) const override
> + {
> + return CP_READ_MEMORY;
> + }
> +
> + rtx
> + expand (function_expander &e) const override
> + {
> + insn_code icode;
> + if (e.type_suffix (0).float_p)
> + icode = code_for_mve_vld1q_f(e.vector_mode (0));
> + else
> + {
> + if (e.type_suffix (0).unsigned_p)
> + icode = code_for_mve_vld1q(VLD1Q_U,
> + e.vector_mode (0));
> + else
> + icode = code_for_mve_vld1q(VLD1Q_S,
> + e.vector_mode (0));
> + }
> + return e.use_contiguous_load_insn (icode);
> + }
> +};
> +
> +class vst1_impl : public full_width_access
> +{
> +public:
> + unsigned int
> + call_properties (const function_instance &) const override
> + {
> + return CP_WRITE_MEMORY;
> + }
> +
> + rtx
> + expand (function_expander &e) const override
> + {
> + insn_code icode;
> + if (e.type_suffix (0).float_p)
> + icode = code_for_mve_vst1q_f(e.vector_mode (0));
> + else
> + {
> + if (e.type_suffix (0).unsigned_p)
> + icode = code_for_mve_vst1q(VST1Q_U,
> + e.vector_mode (0));
> + else
> + icode = code_for_mve_vst1q(VST1Q_S,
> + e.vector_mode (0));
> + }
> + return e.use_contiguous_store_insn (icode);
> + }
> +};
> +
> } /* end anonymous namespace */
>
> namespace arm_mve {
> @@ -290,6 +346,7 @@ FUNCTION (vfmasq,
> unspec_mve_function_exact_insn, (-1, -1, -1, -1, -1, VFMASQ_N_
> FUNCTION (vfmsq, unspec_mve_function_exact_insn, (-1, -1, VFMSQ_F, -1, -
> 1, -1, -1, -1, VFMSQ_M_F, -1, -1, -1))
> FUNCTION_WITH_M_N_NO_F (vhaddq, VHADDQ)
> FUNCTION_WITH_M_N_NO_F (vhsubq, VHSUBQ)
> +FUNCTION (vld1q, vld1_impl,)
> FUNCTION_PRED_P_S (vmaxavq, VMAXAVQ)
> FUNCTION_WITHOUT_N_NO_U_F (vmaxaq, VMAXAQ)
> FUNCTION_ONLY_F (vmaxnmaq, VMAXNMAQ)
> @@ -405,6 +462,7 @@ FUNCTION_ONLY_N_NO_F (vshrntq, VSHRNTQ)
> FUNCTION_ONLY_N_NO_F (vshrq, VSHRQ)
> FUNCTION_ONLY_N_NO_F (vsliq, VSLIQ)
> FUNCTION_ONLY_N_NO_F (vsriq, VSRIQ)
> +FUNCTION (vst1q, vst1_impl,)
> FUNCTION_WITH_RTX_M_N (vsubq, MINUS, VSUBQ)
> FUNCTION (vuninitializedq, vuninitializedq_impl,)
>
> diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-
> mve-builtins-base.def
> index 01dfbdef8a3..16879246237 100644
> --- a/gcc/config/arm/arm-mve-builtins-base.def
> +++ b/gcc/config/arm/arm-mve-builtins-base.def
> @@ -47,6 +47,7 @@ DEF_MVE_FUNCTION (vhaddq, binary_opt_n,
> all_integer, mx_or_none)
> DEF_MVE_FUNCTION (vhcaddq_rot90, binary, all_signed, mx_or_none)
> DEF_MVE_FUNCTION (vhcaddq_rot270, binary, all_signed, mx_or_none)
> DEF_MVE_FUNCTION (vhsubq, binary_opt_n, all_integer, mx_or_none)
> +DEF_MVE_FUNCTION (vld1q, load, all_integer, none)
> DEF_MVE_FUNCTION (vmaxaq, binary_maxamina, all_signed, m_or_none)
> DEF_MVE_FUNCTION (vmaxavq, binary_maxavminav, all_signed, p_or_none)
> DEF_MVE_FUNCTION (vmaxq, binary, all_integer, mx_or_none)
> @@ -150,6 +151,7 @@ DEF_MVE_FUNCTION (vshrntq, binary_rshift_narrow,
> integer_16_32, m_or_none)
> DEF_MVE_FUNCTION (vshrq, binary_rshift, all_integer, mx_or_none)
> DEF_MVE_FUNCTION (vsliq, ternary_lshift, all_integer, m_or_none)
> DEF_MVE_FUNCTION (vsriq, ternary_rshift, all_integer, m_or_none)
> +DEF_MVE_FUNCTION (vst1q, store, all_integer, none)
> DEF_MVE_FUNCTION (vsubq, binary_opt_n, all_integer, mx_or_none)
> DEF_MVE_FUNCTION (vuninitializedq, inherent, all_integer_with_64, none)
> #undef REQUIRES_FLOAT
> @@ -182,6 +184,7 @@ DEF_MVE_FUNCTION (veorq, binary, all_float,
> mx_or_none)
> DEF_MVE_FUNCTION (vfmaq, ternary_opt_n, all_float, m_or_none)
> DEF_MVE_FUNCTION (vfmasq, ternary_n, all_float, m_or_none)
> DEF_MVE_FUNCTION (vfmsq, ternary, all_float, m_or_none)
> +DEF_MVE_FUNCTION (vld1q, load, all_float, none)
> DEF_MVE_FUNCTION (vmaxnmaq, binary, all_float, m_or_none)
> DEF_MVE_FUNCTION (vmaxnmavq, binary_maxvminv, all_float, p_or_none)
> DEF_MVE_FUNCTION (vmaxnmq, binary, all_float, mx_or_none)
> @@ -203,6 +206,7 @@ DEF_MVE_FUNCTION (vrndnq, unary, all_float,
> mx_or_none)
> DEF_MVE_FUNCTION (vrndpq, unary, all_float, mx_or_none)
> DEF_MVE_FUNCTION (vrndq, unary, all_float, mx_or_none)
> DEF_MVE_FUNCTION (vrndxq, unary, all_float, mx_or_none)
> +DEF_MVE_FUNCTION (vst1q, store, all_float, none)
> DEF_MVE_FUNCTION (vsubq, binary_opt_n, all_float, mx_or_none)
> DEF_MVE_FUNCTION (vuninitializedq, inherent, all_float, none)
> #undef REQUIRES_FLOAT
> diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-
> mve-builtins-base.h
> index c574c32ac53..8c7e5fe5c3e 100644
> --- a/gcc/config/arm/arm-mve-builtins-base.h
> +++ b/gcc/config/arm/arm-mve-builtins-base.h
> @@ -63,6 +63,7 @@ extern const function_base *const vhaddq;
> extern const function_base *const vhcaddq_rot270;
> extern const function_base *const vhcaddq_rot90;
> extern const function_base *const vhsubq;
> +extern const function_base *const vld1q;
> extern const function_base *const vmaxaq;
> extern const function_base *const vmaxavq;
> extern const function_base *const vmaxnmaq;
> @@ -103,8 +104,8 @@ extern const function_base *const vmovnbq;
> extern const function_base *const vmovntq;
> extern const function_base *const vmulhq;
> extern const function_base *const vmullbq_int;
> -extern const function_base *const vmulltq_int;
> extern const function_base *const vmullbq_poly;
> +extern const function_base *const vmulltq_int;
> extern const function_base *const vmulltq_poly;
> extern const function_base *const vmulq;
> extern const function_base *const vmvnq;
> @@ -178,6 +179,7 @@ extern const function_base *const vshrntq;
> extern const function_base *const vshrq;
> extern const function_base *const vsliq;
> extern const function_base *const vsriq;
> +extern const function_base *const vst1q;
> extern const function_base *const vsubq;
> extern const function_base *const vuninitializedq;
>
> diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
> index b82d94e59bd..cc027f9cbb5 100644
> --- a/gcc/config/arm/arm_mve.h
> +++ b/gcc/config/arm/arm_mve.h
> @@ -56,7 +56,6 @@
> #define vstrbq_scatter_offset_p(__base, __offset, __value, __p)
> __arm_vstrbq_scatter_offset_p(__base, __offset, __value, __p)
> #define vstrwq_scatter_base_p(__addr, __offset, __value, __p)
> __arm_vstrwq_scatter_base_p(__addr, __offset, __value, __p)
> #define vldrbq_gather_offset_z(__base, __offset, __p)
> __arm_vldrbq_gather_offset_z(__base, __offset, __p)
> -#define vld1q(__base) __arm_vld1q(__base)
> #define vldrhq_gather_offset(__base, __offset)
> __arm_vldrhq_gather_offset(__base, __offset)
> #define vldrhq_gather_offset_z(__base, __offset, __p)
> __arm_vldrhq_gather_offset_z(__base, __offset, __p)
> #define vldrhq_gather_shifted_offset(__base, __offset)
> __arm_vldrhq_gather_shifted_offset(__base, __offset)
> @@ -69,7 +68,6 @@
> #define vldrwq_gather_offset_z(__base, __offset, __p)
> __arm_vldrwq_gather_offset_z(__base, __offset, __p)
> #define vldrwq_gather_shifted_offset(__base, __offset)
> __arm_vldrwq_gather_shifted_offset(__base, __offset)
> #define vldrwq_gather_shifted_offset_z(__base, __offset, __p)
> __arm_vldrwq_gather_shifted_offset_z(__base, __offset, __p)
> -#define vst1q(__addr, __value) __arm_vst1q(__addr, __value)
> #define vstrhq_scatter_offset(__base, __offset, __value)
> __arm_vstrhq_scatter_offset(__base, __offset, __value)
> #define vstrhq_scatter_offset_p(__base, __offset, __value, __p)
> __arm_vstrhq_scatter_offset_p(__base, __offset, __value, __p)
> #define vstrhq_scatter_shifted_offset(__base, __offset, __value)
> __arm_vstrhq_scatter_shifted_offset(__base, __offset, __value)
> @@ -346,12 +344,6 @@
> #define vldrbq_z_u32(__base, __p) __arm_vldrbq_z_u32(__base, __p)
> #define vldrwq_gather_base_z_u32(__addr, __offset, __p)
> __arm_vldrwq_gather_base_z_u32(__addr, __offset, __p)
> #define vldrwq_gather_base_z_s32(__addr, __offset, __p)
> __arm_vldrwq_gather_base_z_s32(__addr, __offset, __p)
> -#define vld1q_s8(__base) __arm_vld1q_s8(__base)
> -#define vld1q_s32(__base) __arm_vld1q_s32(__base)
> -#define vld1q_s16(__base) __arm_vld1q_s16(__base)
> -#define vld1q_u8(__base) __arm_vld1q_u8(__base)
> -#define vld1q_u32(__base) __arm_vld1q_u32(__base)
> -#define vld1q_u16(__base) __arm_vld1q_u16(__base)
> #define vldrhq_gather_offset_s32(__base, __offset)
> __arm_vldrhq_gather_offset_s32(__base, __offset)
> #define vldrhq_gather_offset_s16(__base, __offset)
> __arm_vldrhq_gather_offset_s16(__base, __offset)
> #define vldrhq_gather_offset_u32(__base, __offset)
> __arm_vldrhq_gather_offset_u32(__base, __offset)
> @@ -380,8 +372,6 @@
> #define vldrwq_u32(__base) __arm_vldrwq_u32(__base)
> #define vldrwq_z_s32(__base, __p) __arm_vldrwq_z_s32(__base, __p)
> #define vldrwq_z_u32(__base, __p) __arm_vldrwq_z_u32(__base, __p)
> -#define vld1q_f32(__base) __arm_vld1q_f32(__base)
> -#define vld1q_f16(__base) __arm_vld1q_f16(__base)
> #define vldrhq_f16(__base) __arm_vldrhq_f16(__base)
> #define vldrhq_z_f16(__base, __p) __arm_vldrhq_z_f16(__base, __p)
> #define vldrwq_f32(__base) __arm_vldrwq_f32(__base)
> @@ -416,14 +406,6 @@
> #define vldrwq_gather_shifted_offset_z_f32(__base, __offset, __p)
> __arm_vldrwq_gather_shifted_offset_z_f32(__base, __offset, __p)
> #define vldrwq_gather_shifted_offset_z_s32(__base, __offset, __p)
> __arm_vldrwq_gather_shifted_offset_z_s32(__base, __offset, __p)
> #define vldrwq_gather_shifted_offset_z_u32(__base, __offset, __p)
> __arm_vldrwq_gather_shifted_offset_z_u32(__base, __offset, __p)
> -#define vst1q_f32(__addr, __value) __arm_vst1q_f32(__addr, __value)
> -#define vst1q_f16(__addr, __value) __arm_vst1q_f16(__addr, __value)
> -#define vst1q_s8(__addr, __value) __arm_vst1q_s8(__addr, __value)
> -#define vst1q_s32(__addr, __value) __arm_vst1q_s32(__addr, __value)
> -#define vst1q_s16(__addr, __value) __arm_vst1q_s16(__addr, __value)
> -#define vst1q_u8(__addr, __value) __arm_vst1q_u8(__addr, __value)
> -#define vst1q_u32(__addr, __value) __arm_vst1q_u32(__addr, __value)
> -#define vst1q_u16(__addr, __value) __arm_vst1q_u16(__addr, __value)
> #define vstrhq_f16(__addr, __value) __arm_vstrhq_f16(__addr, __value)
> #define vstrhq_scatter_offset_s32( __base, __offset, __value)
> __arm_vstrhq_scatter_offset_s32( __base, __offset, __value)
> #define vstrhq_scatter_offset_s16( __base, __offset, __value)
> __arm_vstrhq_scatter_offset_s16( __base, __offset, __value)
> @@ -1537,48 +1519,6 @@ __arm_vldrwq_gather_base_z_u32 (uint32x4_t
> __addr, const int __offset, mve_pred1
> return __builtin_mve_vldrwq_gather_base_z_uv4si (__addr, __offset, __p);
> }
>
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vld1q_s8 (int8_t const * __base)
> -{
> - return __builtin_mve_vld1q_sv16qi ((__builtin_neon_qi *) __base);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vld1q_s32 (int32_t const * __base)
> -{
> - return __builtin_mve_vld1q_sv4si ((__builtin_neon_si *) __base);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vld1q_s16 (int16_t const * __base)
> -{
> - return __builtin_mve_vld1q_sv8hi ((__builtin_neon_hi *) __base);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vld1q_u8 (uint8_t const * __base)
> -{
> - return __builtin_mve_vld1q_uv16qi ((__builtin_neon_qi *) __base);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vld1q_u32 (uint32_t const * __base)
> -{
> - return __builtin_mve_vld1q_uv4si ((__builtin_neon_si *) __base);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vld1q_u16 (uint16_t const * __base)
> -{
> - return __builtin_mve_vld1q_uv8hi ((__builtin_neon_hi *) __base);
> -}
> -
> __extension__ extern __inline int32x4_t
> __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> __arm_vldrhq_gather_offset_s32 (int16_t const * __base, uint32x4_t
> __offset)
> @@ -1917,48 +1857,6 @@ __arm_vldrwq_gather_shifted_offset_z_u32
> (uint32_t const * __base, uint32x4_t __
> return __builtin_mve_vldrwq_gather_shifted_offset_z_uv4si
> ((__builtin_neon_si *) __base, __offset, __p);
> }
>
> -__extension__ extern __inline void
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vst1q_s8 (int8_t * __addr, int8x16_t __value)
> -{
> - __builtin_mve_vst1q_sv16qi ((__builtin_neon_qi *) __addr, __value);
> -}
> -
> -__extension__ extern __inline void
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vst1q_s32 (int32_t * __addr, int32x4_t __value)
> -{
> - __builtin_mve_vst1q_sv4si ((__builtin_neon_si *) __addr, __value);
> -}
> -
> -__extension__ extern __inline void
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vst1q_s16 (int16_t * __addr, int16x8_t __value)
> -{
> - __builtin_mve_vst1q_sv8hi ((__builtin_neon_hi *) __addr, __value);
> -}
> -
> -__extension__ extern __inline void
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vst1q_u8 (uint8_t * __addr, uint8x16_t __value)
> -{
> - __builtin_mve_vst1q_uv16qi ((__builtin_neon_qi *) __addr, __value);
> -}
> -
> -__extension__ extern __inline void
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vst1q_u32 (uint32_t * __addr, uint32x4_t __value)
> -{
> - __builtin_mve_vst1q_uv4si ((__builtin_neon_si *) __addr, __value);
> -}
> -
> -__extension__ extern __inline void
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vst1q_u16 (uint16_t * __addr, uint16x8_t __value)
> -{
> - __builtin_mve_vst1q_uv8hi ((__builtin_neon_hi *) __addr, __value);
> -}
> -
> __extension__ extern __inline void
> __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> __arm_vstrhq_scatter_offset_s32 (int16_t * __base, uint32x4_t __offset,
> int32x4_t __value)
> @@ -4421,20 +4319,6 @@ __arm_vornq_m_f16 (float16x8_t __inactive,
> float16x8_t __a, float16x8_t __b, mve
> return __builtin_mve_vornq_m_fv8hf (__inactive, __a, __b, __p);
> }
>
> -__extension__ extern __inline float32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vld1q_f32 (float32_t const * __base)
> -{
> - return __builtin_mve_vld1q_fv4sf((__builtin_neon_si *) __base);
> -}
> -
> -__extension__ extern __inline float16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vld1q_f16 (float16_t const * __base)
> -{
> - return __builtin_mve_vld1q_fv8hf((__builtin_neon_hi *) __base);
> -}
> -
> __extension__ extern __inline float32x4_t
> __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> __arm_vldrwq_f32 (float32_t const * __base)
> @@ -4547,20 +4431,6 @@ __arm_vstrwq_f32 (float32_t * __addr,
> float32x4_t __value)
> __builtin_mve_vstrwq_fv4sf ((__builtin_neon_si *) __addr, __value);
> }
>
> -__extension__ extern __inline void
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vst1q_f32 (float32_t * __addr, float32x4_t __value)
> -{
> - __builtin_mve_vst1q_fv4sf ((__builtin_neon_si *) __addr, __value);
> -}
> -
> -__extension__ extern __inline void
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vst1q_f16 (float16_t * __addr, float16x8_t __value)
> -{
> - __builtin_mve_vst1q_fv8hf ((__builtin_neon_hi *) __addr, __value);
> -}
> -
> __extension__ extern __inline void
> __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> __arm_vstrhq_f16 (float16_t * __addr, float16x8_t __value)
> @@ -5651,48 +5521,6 @@ __arm_vldrbq_gather_offset_z (uint8_t const *
> __base, uint16x8_t __offset, mve_p
> return __arm_vldrbq_gather_offset_z_u16 (__base, __offset, __p);
> }
>
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vld1q (int8_t const * __base)
> -{
> - return __arm_vld1q_s8 (__base);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vld1q (int32_t const * __base)
> -{
> - return __arm_vld1q_s32 (__base);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vld1q (int16_t const * __base)
> -{
> - return __arm_vld1q_s16 (__base);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vld1q (uint8_t const * __base)
> -{
> - return __arm_vld1q_u8 (__base);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vld1q (uint32_t const * __base)
> -{
> - return __arm_vld1q_u32 (__base);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vld1q (uint16_t const * __base)
> -{
> - return __arm_vld1q_u16 (__base);
> -}
> -
> __extension__ extern __inline int32x4_t
> __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> __arm_vldrhq_gather_offset (int16_t const * __base, uint32x4_t __offset)
> @@ -5917,48 +5745,6 @@ __arm_vldrwq_gather_shifted_offset_z (uint32_t
> const * __base, uint32x4_t __offs
> return __arm_vldrwq_gather_shifted_offset_z_u32 (__base, __offset, __p);
> }
>
> -__extension__ extern __inline void
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vst1q (int8_t * __addr, int8x16_t __value)
> -{
> - __arm_vst1q_s8 (__addr, __value);
> -}
> -
> -__extension__ extern __inline void
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vst1q (int32_t * __addr, int32x4_t __value)
> -{
> - __arm_vst1q_s32 (__addr, __value);
> -}
> -
> -__extension__ extern __inline void
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vst1q (int16_t * __addr, int16x8_t __value)
> -{
> - __arm_vst1q_s16 (__addr, __value);
> -}
> -
> -__extension__ extern __inline void
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vst1q (uint8_t * __addr, uint8x16_t __value)
> -{
> - __arm_vst1q_u8 (__addr, __value);
> -}
> -
> -__extension__ extern __inline void
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vst1q (uint32_t * __addr, uint32x4_t __value)
> -{
> - __arm_vst1q_u32 (__addr, __value);
> -}
> -
> -__extension__ extern __inline void
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vst1q (uint16_t * __addr, uint16x8_t __value)
> -{
> - __arm_vst1q_u16 (__addr, __value);
> -}
> -
> __extension__ extern __inline void
> __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> __arm_vstrhq_scatter_offset (int16_t * __base, uint32x4_t __offset,
> int32x4_t __value)
> @@ -7809,20 +7595,6 @@ __arm_vornq_m (float16x8_t __inactive,
> float16x8_t __a, float16x8_t __b, mve_pre
> return __arm_vornq_m_f16 (__inactive, __a, __b, __p);
> }
>
> -__extension__ extern __inline float32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vld1q (float32_t const * __base)
> -{
> - return __arm_vld1q_f32 (__base);
> -}
> -
> -__extension__ extern __inline float16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vld1q (float16_t const * __base)
> -{
> - return __arm_vld1q_f16 (__base);
> -}
> -
> __extension__ extern __inline float16x8_t
> __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> __arm_vldrhq_gather_offset (float16_t const * __base, uint16x8_t __offset)
> @@ -7893,20 +7665,6 @@ __arm_vstrwq (float32_t * __addr, float32x4_t
> __value)
> __arm_vstrwq_f32 (__addr, __value);
> }
>
> -__extension__ extern __inline void
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vst1q (float32_t * __addr, float32x4_t __value)
> -{
> - __arm_vst1q_f32 (__addr, __value);
> -}
> -
> -__extension__ extern __inline void
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vst1q (float16_t * __addr, float16x8_t __value)
> -{
> - __arm_vst1q_f16 (__addr, __value);
> -}
> -
> __extension__ extern __inline void
> __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> __arm_vstrhq (float16_t * __addr, float16x8_t __value)
> @@ -8670,17 +8428,6 @@ extern void *__ARM_undef;
> int
> (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_
> mve_type_float16x8_t]: __arm_vornq_m_f16 (__ARM_mve_coerce(__p0,
> float16x8_t), __ARM_mve_coerce(__p1, float16x8_t),
> __ARM_mve_coerce(__p2, float16x8_t), p3), \
> int
> (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_
> mve_type_float32x4_t]: __arm_vornq_m_f32 (__ARM_mve_coerce(__p0,
> float32x4_t), __ARM_mve_coerce(__p1, float32x4_t),
> __ARM_mve_coerce(__p2, float32x4_t), p3));})
>
> -#define __arm_vld1q(p0) (\
> - _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
> - int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_s8
> (__ARM_mve_coerce_s8_ptr(p0, int8_t *)), \
> - int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_s16
> (__ARM_mve_coerce_s16_ptr(p0, int16_t *)), \
> - int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_s32
> (__ARM_mve_coerce_s32_ptr(p0, int32_t *)), \
> - int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_u8
> (__ARM_mve_coerce_u8_ptr(p0, uint8_t *)), \
> - int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_u16
> (__ARM_mve_coerce_u16_ptr(p0, uint16_t *)), \
> - int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_u32
> (__ARM_mve_coerce_u32_ptr(p0, uint32_t *)), \
> - int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld1q_f16
> (__ARM_mve_coerce_f16_ptr(p0, float16_t *)), \
> - int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld1q_f32
> (__ARM_mve_coerce_f32_ptr(p0, float32_t *))))
> -
> #define __arm_vld1q_z(p0,p1) ( \
> _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
> int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_z_s8
> (__ARM_mve_coerce_s8_ptr(p0, int8_t *), p1), \
> @@ -8792,17 +8539,6 @@ extern void *__ARM_undef;
> int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_float16x8x2_t]:
> __arm_vst2q_f16 (__ARM_mve_coerce_f16_ptr(p0, float16_t *),
> __ARM_mve_coerce(__p1, float16x8x2_t)), \
> int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4x2_t]:
> __arm_vst2q_f32 (__ARM_mve_coerce_f32_ptr(p0, float32_t *),
> __ARM_mve_coerce(__p1, float32x4x2_t)));})
>
> -#define __arm_vst1q(p0,p1) ({ __typeof(p1) __p1 = (p1); \
> - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
> - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]:
> __arm_vst1q_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *),
> __ARM_mve_coerce(__p1, int8x16_t)), \
> - int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_int16x8_t]:
> __arm_vst1q_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *),
> __ARM_mve_coerce(__p1, int16x8_t)), \
> - int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]:
> __arm_vst1q_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *),
> __ARM_mve_coerce(__p1, int32x4_t)), \
> - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]:
> __arm_vst1q_u8 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *),
> __ARM_mve_coerce(__p1, uint8x16_t)), \
> - int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t]:
> __arm_vst1q_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *),
> __ARM_mve_coerce(__p1, uint16x8_t)), \
> - int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]:
> __arm_vst1q_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *),
> __ARM_mve_coerce(__p1, uint32x4_t)), \
> - int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_float16x8_t]:
> __arm_vst1q_f16 (__ARM_mve_coerce_f16_ptr(p0, float16_t *),
> __ARM_mve_coerce(__p1, float16x8_t)), \
> - int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4_t]:
> __arm_vst1q_f32 (__ARM_mve_coerce_f32_ptr(p0, float32_t *),
> __ARM_mve_coerce(__p1, float32x4_t)));})
> -
> #define __arm_vstrhq(p0,p1) ({ __typeof(p1) __p1 = (p1); \
> _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
> int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_int16x8_t]:
> __arm_vstrhq_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *),
> __ARM_mve_coerce(__p1, int16x8_t)), \
> @@ -9149,15 +8885,6 @@ extern void *__ARM_undef;
> int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_p_s32
> (p0, p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
> int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_p_u32
> (p0, p1, __ARM_mve_coerce(__p2, uint32x4_t), p3));})
>
> -#define __arm_vld1q(p0) (\
> - _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
> - int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_s8
> (__ARM_mve_coerce_s8_ptr(p0, int8_t *)), \
> - int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_s16
> (__ARM_mve_coerce_s16_ptr(p0, int16_t *)), \
> - int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_s32
> (__ARM_mve_coerce_s32_ptr(p0, int32_t *)), \
> - int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_u8
> (__ARM_mve_coerce_u8_ptr(p0, uint8_t *)), \
> - int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_u16
> (__ARM_mve_coerce_u16_ptr(p0, uint16_t *)), \
> - int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_u32
> (__ARM_mve_coerce_u32_ptr(p0, uint32_t *))))
> -
> #define __arm_vldrhq_gather_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \
> _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
> int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t]:
> __arm_vldrhq_gather_offset_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t
> *), __ARM_mve_coerce(__p1, uint16x8_t)), \
> @@ -9206,15 +8933,6 @@ extern void *__ARM_undef;
> int (*)[__ARM_mve_type_int32_t_ptr]:
> __arm_vldrwq_gather_shifted_offset_z_s32
> (__ARM_mve_coerce_s32_ptr(__p0, int32_t *), p1, p2), \
> int (*)[__ARM_mve_type_uint32_t_ptr]:
> __arm_vldrwq_gather_shifted_offset_z_u32
> (__ARM_mve_coerce_u32_ptr(__p0, uint32_t *), p1, p2));})
>
> -#define __arm_vst1q(p0,p1) ({ __typeof(p1) __p1 = (p1); \
> - _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
> - int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]:
> __arm_vst1q_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *),
> __ARM_mve_coerce(__p1, int8x16_t)), \
> - int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_int16x8_t]:
> __arm_vst1q_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *),
> __ARM_mve_coerce(__p1, int16x8_t)), \
> - int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]:
> __arm_vst1q_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *),
> __ARM_mve_coerce(__p1, int32x4_t)), \
> - int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]:
> __arm_vst1q_u8 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *),
> __ARM_mve_coerce(__p1, uint8x16_t)), \
> - int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t]:
> __arm_vst1q_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *),
> __ARM_mve_coerce(__p1, uint16x8_t)), \
> - int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]:
> __arm_vst1q_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *),
> __ARM_mve_coerce(__p1, uint32x4_t)));})
> -
> #define __arm_vst1q_p(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
> _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
> int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]:
> __arm_vst1q_p_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *),
> __ARM_mve_coerce(__p1, int8x16_t), p2), \
> diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> index 366cec0812a..b0d3443da9c 100644
> --- a/gcc/config/arm/mve.md
> +++ b/gcc/config/arm/mve.md
> @@ -3690,7 +3690,7 @@ (define_insn "mve_vldrwq_z_<supf>v4si"
> }
> [(set_attr "length" "8")])
>
> -(define_expand "mve_vld1q_f<mode>"
> +(define_expand "@mve_vld1q_f<mode>"
> [(match_operand:MVE_0 0 "s_register_operand")
> (unspec:MVE_0 [(match_operand:<MVE_CNVT> 1
> "mve_memory_operand")] VLD1Q_F)
> ]
> @@ -3700,7 +3700,7 @@ (define_expand "mve_vld1q_f<mode>"
> DONE;
> })
>
> -(define_expand "mve_vld1q_<supf><mode>"
> +(define_expand "@mve_vld1q_<supf><mode>"
> [(match_operand:MVE_2 0 "s_register_operand")
> (unspec:MVE_2 [(match_operand:MVE_2 1 "mve_memory_operand")]
> VLD1Q)
> ]
> @@ -4408,7 +4408,7 @@ (define_insn "mve_vstrwq_<supf>v4si"
> }
> [(set_attr "length" "4")])
>
> -(define_expand "mve_vst1q_f<mode>"
> +(define_expand "@mve_vst1q_f<mode>"
> [(match_operand:<MVE_CNVT> 0 "mve_memory_operand")
> (unspec:<MVE_CNVT> [(match_operand:MVE_0 1 "s_register_operand")]
> VST1Q_F)
> ]
> @@ -4418,7 +4418,7 @@ (define_expand "mve_vst1q_f<mode>"
> DONE;
> })
>
> -(define_expand "mve_vst1q_<supf><mode>"
> +(define_expand "@mve_vst1q_<supf><mode>"
> [(match_operand:MVE_2 0 "mve_memory_operand")
> (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand")] VST1Q)
> ]
> --
> 2.34.1