Hi All, This adds implementation for the optabs for complex additions. With this the following C code:
void f90 (float complex a[restrict N], float complex b[restrict N], float complex c[restrict N]) { for (int i=0; i < N; i++) c[i] = a[i] + (b[i] * I); } generates f90: add r3, r2, #1600 .L2: vld1.32 {q8}, [r0]! vld1.32 {q9}, [r1]! vcadd.f32 q8, q8, q9, #90 vst1.32 {q8}, [r2]! cmp r3, r2 bne .L2 bx lr instead of f90: add r3, r2, #1600 .L2: vld2.32 {d24-d27}, [r0]! vld2.32 {d20-d23}, [r1]! vsub.f32 q8, q12, q11 vadd.f32 q9, q13, q10 vst2.32 {d16-d19}, [r2]! cmp r3, r2 bne .L2 bx lr Bootstrapped Regtested on arm-none-linux-gnueabihf and no issues. Codegen tested for -march=armv8.1-m.main+mve.fp -mfloat-abi=hard -mfpu=auto and no issues. This is just a splitting of a previously approved patch due to it having a dependency on the AArch64 bits which have been requested to be reworked. Will commit under the previous approval. Thanks, Tamar gcc/ChangeLog: * config/arm/arm_mve.h (__arm_vcaddq_rot90_u8, __arm_vcaddq_rot270_u8, __arm_vcaddq_rot90_s8, __arm_vcaddq_rot270_s8, __arm_vcaddq_rot90_u16, __arm_vcaddq_rot270_u16, __arm_vcaddq_rot90_s16, __arm_vcaddq_rot270_s16, __arm_vcaddq_rot90_u32, __arm_vcaddq_rot270_u32, __arm_vcaddq_rot90_s32, __arm_vcaddq_rot270_s32, __arm_vcaddq_rot90_f16, __arm_vcaddq_rot270_f16, __arm_vcaddq_rot90_f32, __arm_vcaddq_rot270_f32): Update builtin calls. * config/arm/arm_mve_builtins.def (vcaddq_rot90_u, vcaddq_rot270_u, vcaddq_rot90_s, vcaddq_rot270_s, vcaddq_rot90_f, vcaddq_rot270_f): Removed. (vcaddq_rot90, vcaddq_rot270): New. * config/arm/constraints.md (Dz): Include MVE. * config/arm/iterators.md (mve_rot): New. (supf): Remove VCADDQ_ROT270_S, VCADDQ_ROT270_U, VCADDQ_ROT90_S, VCADDQ_ROT90_U. (VCADDQ_ROT270, VCADDQ_ROT90): Removed. * config/arm/mve.md (mve_vcaddq_rot270_<supf><mode, mve_vcaddq_rot90_<supf><mode>, mve_vcaddq_rot270_f<mode>, mve_vcaddq_rot90_f<mode>): Removed. (mve_vcaddq<mve_rot><mode>, mve_vcaddq<mve_rot><mode>): New. * config/arm/unspecs.md (VCADDQ_ROT270_S, VCADDQ_ROT90_S, VCADDQ_ROT270_U, VCADDQ_ROT90_U, VCADDQ_ROT270_F, VCADDQ_ROT90_F): Removed. * config/arm/vec-common.md (cadd<rot><mode>3): New. --- inline copy of patch -- diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h index 6c0d1e2e634a32196eb31079166a7733dcd3a4b6..987495dd234ad96ba1163a1f482fe183a46ff437 100644 --- a/gcc/config/arm/arm_mve.h +++ b/gcc/config/arm/arm_mve.h @@ -3981,14 +3981,16 @@ __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot90_u8 (uint8x16_t __a, uint8x16_t __b) { - return __builtin_mve_vcaddq_rot90_uv16qi (__a, __b); + return (uint8x16_t) + __builtin_mve_vcaddq_rot90v16qi ((int8x16_t)__a, (int8x16_t)__b); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot270_u8 (uint8x16_t __a, uint8x16_t __b) { - return __builtin_mve_vcaddq_rot270_uv16qi (__a, __b); + return (uint8x16_t) + __builtin_mve_vcaddq_rot270v16qi ((int8x16_t)__a, (int8x16_t)__b); } __extension__ extern __inline uint8x16_t @@ -4520,14 +4522,14 @@ __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot90_s8 (int8x16_t __a, int8x16_t __b) { - return __builtin_mve_vcaddq_rot90_sv16qi (__a, __b); + return __builtin_mve_vcaddq_rot90v16qi (__a, __b); } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot270_s8 (int8x16_t __a, int8x16_t __b) { - return __builtin_mve_vcaddq_rot270_sv16qi (__a, __b); + return __builtin_mve_vcaddq_rot270v16qi (__a, __b); } __extension__ extern __inline int8x16_t @@ -4821,14 +4823,16 @@ __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot90_u16 (uint16x8_t __a, uint16x8_t __b) { - return __builtin_mve_vcaddq_rot90_uv8hi (__a, __b); + return (uint16x8_t) + __builtin_mve_vcaddq_rot90v8hi ((int16x8_t)__a, (int16x8_t)__b); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot270_u16 (uint16x8_t __a, uint16x8_t __b) { - return __builtin_mve_vcaddq_rot270_uv8hi (__a, __b); + return (uint16x8_t) + __builtin_mve_vcaddq_rot270v8hi ((int16x8_t)__a, (int16x8_t)__b); } __extension__ extern __inline uint16x8_t @@ -5360,14 +5364,14 @@ __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot90_s16 (int16x8_t __a, int16x8_t __b) { - return __builtin_mve_vcaddq_rot90_sv8hi (__a, __b); + return __builtin_mve_vcaddq_rot90v8hi (__a, __b); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot270_s16 (int16x8_t __a, int16x8_t __b) { - return __builtin_mve_vcaddq_rot270_sv8hi (__a, __b); + return __builtin_mve_vcaddq_rot270v8hi (__a, __b); } __extension__ extern __inline int16x8_t @@ -5661,14 +5665,16 @@ __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot90_u32 (uint32x4_t __a, uint32x4_t __b) { - return __builtin_mve_vcaddq_rot90_uv4si (__a, __b); + return (uint32x4_t) + __builtin_mve_vcaddq_rot90v4si ((int32x4_t)__a, (int32x4_t)__b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot270_u32 (uint32x4_t __a, uint32x4_t __b) { - return __builtin_mve_vcaddq_rot270_uv4si (__a, __b); + return (uint32x4_t) + __builtin_mve_vcaddq_rot270v4si ((int32x4_t)__a, (int32x4_t)__b); } __extension__ extern __inline uint32x4_t @@ -6200,14 +6206,14 @@ __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot90_s32 (int32x4_t __a, int32x4_t __b) { - return __builtin_mve_vcaddq_rot90_sv4si (__a, __b); + return __builtin_mve_vcaddq_rot90v4si (__a, __b); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot270_s32 (int32x4_t __a, int32x4_t __b) { - return __builtin_mve_vcaddq_rot270_sv4si (__a, __b); + return __builtin_mve_vcaddq_rot270v4si (__a, __b); } __extension__ extern __inline int32x4_t @@ -17370,14 +17376,14 @@ __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot90_f16 (float16x8_t __a, float16x8_t __b) { - return __builtin_mve_vcaddq_rot90_fv8hf (__a, __b); + return __builtin_mve_vcaddq_rot90v8hf (__a, __b); } __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot270_f16 (float16x8_t __a, float16x8_t __b) { - return __builtin_mve_vcaddq_rot270_fv8hf (__a, __b); + return __builtin_mve_vcaddq_rot270v8hf (__a, __b); } __extension__ extern __inline float16x8_t @@ -17622,14 +17628,14 @@ __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot90_f32 (float32x4_t __a, float32x4_t __b) { - return __builtin_mve_vcaddq_rot90_fv4sf (__a, __b); + return __builtin_mve_vcaddq_rot90v4sf (__a, __b); } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot270_f32 (float32x4_t __a, float32x4_t __b) { - return __builtin_mve_vcaddq_rot270_fv4sf (__a, __b); + return __builtin_mve_vcaddq_rot270v4sf (__a, __b); } __extension__ extern __inline float32x4_t diff --git a/gcc/config/arm/arm_mve_builtins.def b/gcc/config/arm/arm_mve_builtins.def index f38926ffd8e44f63d25a8fb9bf8f7d8680570ef0..b86e1793e686b07367732c4ffd0603deb78830bd 100644 --- a/gcc/config/arm/arm_mve_builtins.def +++ b/gcc/config/arm/arm_mve_builtins.def @@ -125,8 +125,6 @@ VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpeqq_u, v16qi, v8hi, v4si) VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpeqq_n_u, v16qi, v8hi, v4si) VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpcsq_u, v16qi, v8hi, v4si) VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpcsq_n_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vcaddq_rot90_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vcaddq_rot270_u, v16qi, v8hi, v4si) VAR3 (BINOP_UNONE_UNONE_UNONE, vbicq_u, v16qi, v8hi, v4si) VAR3 (BINOP_UNONE_UNONE_UNONE, vandq_u, v16qi, v8hi, v4si) VAR3 (BINOP_UNONE_UNONE_UNONE, vaddvq_p_u, v16qi, v8hi, v4si) @@ -202,8 +200,6 @@ VAR3 (BINOP_NONE_NONE_NONE, vhcaddq_rot270_s, v16qi, v8hi, v4si) VAR3 (BINOP_NONE_NONE_NONE, vhaddq_s, v16qi, v8hi, v4si) VAR3 (BINOP_NONE_NONE_NONE, vhaddq_n_s, v16qi, v8hi, v4si) VAR3 (BINOP_NONE_NONE_NONE, veorq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vcaddq_rot90_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vcaddq_rot270_s, v16qi, v8hi, v4si) VAR3 (BINOP_NONE_NONE_NONE, vbrsrq_n_s, v16qi, v8hi, v4si) VAR3 (BINOP_NONE_NONE_NONE, vbicq_s, v16qi, v8hi, v4si) VAR3 (BINOP_NONE_NONE_NONE, vandq_s, v16qi, v8hi, v4si) @@ -268,8 +264,6 @@ VAR2 (BINOP_NONE_NONE_NONE, vcmulq_rot90_f, v8hf, v4sf) VAR2 (BINOP_NONE_NONE_NONE, vcmulq_rot270_f, v8hf, v4sf) VAR2 (BINOP_NONE_NONE_NONE, vcmulq_rot180_f, v8hf, v4sf) VAR2 (BINOP_NONE_NONE_NONE, vcmulq_f, v8hf, v4sf) -VAR2 (BINOP_NONE_NONE_NONE, vcaddq_rot90_f, v8hf, v4sf) -VAR2 (BINOP_NONE_NONE_NONE, vcaddq_rot270_f, v8hf, v4sf) VAR2 (BINOP_NONE_NONE_NONE, vbicq_f, v8hf, v4sf) VAR2 (BINOP_NONE_NONE_NONE, vandq_f, v8hf, v4sf) VAR2 (BINOP_NONE_NONE_NONE, vaddq_n_f, v8hf, v4sf) @@ -892,3 +886,7 @@ VAR3 (QUADOP_NONE_NONE_UNONE_IMM_UNONE, vshlcq_m_vec_s, v16qi, v8hi, v4si) VAR3 (QUADOP_NONE_NONE_UNONE_IMM_UNONE, vshlcq_m_carry_s, v16qi, v8hi, v4si) VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vshlcq_m_vec_u, v16qi, v8hi, v4si) VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vshlcq_m_carry_u, v16qi, v8hi, v4si) + +/* optabs without any suffixes. */ +VAR5 (BINOP_NONE_NONE_NONE, vcaddq_rot90, v16qi, v8hi, v4si, v8hf, v4sf) +VAR5 (BINOP_NONE_NONE_NONE, vcaddq_rot270, v16qi, v8hi, v4si, v8hf, v4sf) diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md index 789e3332abb7495b308509d03ed241d39498a8b6..6ebddb95b4f9c835f10f5265573f27a06ccbd11f 100644 --- a/gcc/config/arm/constraints.md +++ b/gcc/config/arm/constraints.md @@ -310,7 +310,7 @@ (define_constraint "Dz" "@internal In ARM/Thumb-2 state a vector of constant zeros." (and (match_code "const_vector") - (match_test "TARGET_NEON && op == CONST0_RTX (mode)"))) + (match_test "(TARGET_NEON || TARGET_HAVE_MVE) && op == CONST0_RTX (mode)"))) (define_constraint "Da" "@internal diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index 036a939e2ee758abede76485cdd4946894993111..adf4c017735812fdb318f615ede1407298377519 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -1182,6 +1182,9 @@ (define_int_attr rot [(UNSPEC_VCADD90 "90") (UNSPEC_VCMLA180 "180") (UNSPEC_VCMLA270 "270")]) +(define_int_attr mve_rot [(UNSPEC_VCADD90 "_rot90") + (UNSPEC_VCADD270 "_rot270")]) + (define_int_attr simd32_op [(UNSPEC_QADD8 "qadd8") (UNSPEC_QSUB8 "qsub8") (UNSPEC_SHADD8 "shadd8") (UNSPEC_SHSUB8 "shsub8") (UNSPEC_UHADD8 "uhadd8") (UNSPEC_UHSUB8 "uhsub8") @@ -1232,10 +1235,8 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s") (VADDLVQ_P_U "u") (VCMPNEQ_U "u") (VCMPNEQ_S "s") (VABDQ_M_S "s") (VABDQ_M_U "u") (VABDQ_S "s") (VABDQ_U "u") (VADDQ_N_S "s") (VADDQ_N_U "u") - (VADDVQ_P_S "s") (VADDVQ_P_U "u") - (VBRSRQ_N_S "s") (VBRSRQ_N_U "u") (VCADDQ_ROT270_S "s") - (VCADDQ_ROT270_U "u") (VCADDQ_ROT90_S "s") - (VCMPEQQ_S "s") (VCMPEQQ_U "u") (VCADDQ_ROT90_U "u") + (VADDVQ_P_S "s") (VADDVQ_P_U "u") (VBRSRQ_N_S "s") + (VBRSRQ_N_U "u") (VCMPEQQ_S "s") (VCMPEQQ_U "u") (VCMPEQQ_N_S "s") (VCMPEQQ_N_U "u") (VCMPNEQ_N_S "s") (VCMPNEQ_N_U "u") (VHADDQ_N_S "s") (VHADDQ_N_U "u") (VHADDQ_S "s") @@ -1500,8 +1501,6 @@ (define_int_iterator VADDQ_N [VADDQ_N_S VADDQ_N_U]) (define_int_iterator VADDVAQ [VADDVAQ_S VADDVAQ_U]) (define_int_iterator VADDVQ_P [VADDVQ_P_U VADDVQ_P_S]) (define_int_iterator VBRSRQ_N [VBRSRQ_N_U VBRSRQ_N_S]) -(define_int_iterator VCADDQ_ROT270 [VCADDQ_ROT270_S VCADDQ_ROT270_U]) -(define_int_iterator VCADDQ_ROT90 [VCADDQ_ROT90_U VCADDQ_ROT90_S]) (define_int_iterator VCMPEQQ [VCMPEQQ_U VCMPEQQ_S]) (define_int_iterator VCMPEQQ_N [VCMPEQQ_N_S VCMPEQQ_N_U]) (define_int_iterator VCMPNEQ_N [VCMPNEQ_N_U VCMPNEQ_N_S]) diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index b4c5a1e27c41e2270e05f9f4da1c055457a20ad3..516d0a3172e9cbb9a7fe0e9a1cd45ba5d935344b 100644 --- a/gcc/config/arm/mve.md +++ b/gcc/config/arm/mve.md @@ -962,34 +962,28 @@ (define_insn "mve_vbrsrq_n_<supf><mode>" ]) ;; -;; [vcaddq_rot270_s, vcaddq_rot270_u]) +;; [vcaddq, vcaddq_rot90, vcadd_rot180, vcadd_rot270]) ;; -(define_insn "mve_vcaddq_rot270_<supf><mode>" +(define_insn "mve_vcaddq<mve_rot><mode>" [ (set (match_operand:MVE_2 0 "s_register_operand" "<earlyclobber_32>") (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w") (match_operand:MVE_2 2 "s_register_operand" "w")] - VCADDQ_ROT270)) + VCADD)) ] "TARGET_HAVE_MVE" - "vcadd.i%#<V_sz_elem> %q0, %q1, %q2, #270" + "vcadd.i%#<V_sz_elem> %q0, %q1, %q2, #<rot>" [(set_attr "type" "mve_move") ]) -;; -;; [vcaddq_rot90_u, vcaddq_rot90_s]) -;; -(define_insn "mve_vcaddq_rot90_<supf><mode>" - [ - (set (match_operand:MVE_2 0 "s_register_operand" "<earlyclobber_32>") - (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w") - (match_operand:MVE_2 2 "s_register_operand" "w")] - VCADDQ_ROT90)) - ] - "TARGET_HAVE_MVE" - "vcadd.i%#<V_sz_elem> %q0, %q1, %q2, #90" - [(set_attr "type" "mve_move") -]) +;; Auto vectorizer pattern for int vcadd +(define_expand "cadd<rot><mode>3" + [(set (match_operand:MVE_2 0 "register_operand") + (unspec:MVE_2 [(match_operand:MVE_2 1 "register_operand") + (match_operand:MVE_2 2 "register_operand")] + VCADD))] + "TARGET_HAVE_MVE && !BYTES_BIG_ENDIAN" +) ;; ;; [vcmpcsq_n_u]) @@ -2102,32 +2096,17 @@ (define_insn "mve_vbicq_n_<supf><mode>" ]) ;; -;; [vcaddq_rot270_f]) -;; -(define_insn "mve_vcaddq_rot270_f<mode>" - [ - (set (match_operand:MVE_0 0 "s_register_operand" "<earlyclobber_32>") - (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w") - (match_operand:MVE_0 2 "s_register_operand" "w")] - VCADDQ_ROT270_F)) - ] - "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" - "vcadd.f%#<V_sz_elem> %q0, %q1, %q2, #270" - [(set_attr "type" "mve_move") -]) - -;; -;; [vcaddq_rot90_f]) +;; [vcaddq, vcaddq_rot90, vcadd_rot180, vcadd_rot270]) ;; -(define_insn "mve_vcaddq_rot90_f<mode>" +(define_insn "mve_vcaddq<mve_rot><mode>" [ (set (match_operand:MVE_0 0 "s_register_operand" "<earlyclobber_32>") (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w") (match_operand:MVE_0 2 "s_register_operand" "w")] - VCADDQ_ROT90_F)) + VCADD)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" - "vcadd.f%#<V_sz_elem> %q0, %q1, %q2, #90" + "vcadd.f%#<V_sz_elem> %q0, %q1, %q2, #<rot>" [(set_attr "type" "mve_move") ]) diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md index ef64989600dcad642af54d7bf0250728a9fb7502..3f9ebe7b1753045e53044324cc7302f51d0eed21 100644 --- a/gcc/config/arm/unspecs.md +++ b/gcc/config/arm/unspecs.md @@ -598,8 +598,6 @@ (define_c_enum "unspec" [ VADDVAQ_S VADDVQ_P_S VBRSRQ_N_S - VCADDQ_ROT270_S - VCADDQ_ROT90_S VCMPEQQ_S VCMPEQQ_N_S VCMPNEQ_N_S @@ -641,8 +639,6 @@ (define_c_enum "unspec" [ VADDVAQ_U VADDVQ_P_U VBRSRQ_N_U - VCADDQ_ROT270_U - VCADDQ_ROT90_U VCMPEQQ_U VCMPEQQ_N_U VCMPNEQ_N_U @@ -709,8 +705,6 @@ (define_c_enum "unspec" [ VABDQ_M_U VABDQ_F VADDQ_N_F - VCADDQ_ROT270_F - VCADDQ_ROT90_F VCMPEQQ_F VCMPEQQ_N_F VCMPGEQ_F diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md index 2d0932b95a1615e94aa6572a593fc9849c9b36db..ce1ea960161164098a6512cf58843bb64251fb05 100644 --- a/gcc/config/arm/vec-common.md +++ b/gcc/config/arm/vec-common.md @@ -205,3 +205,13 @@ (define_expand "neg<mode>2" (neg:VDQWH (match_operand:VDQWH 1 "s_register_operand" "")))] "ARM_HAVE_<MODE>_ARITH" ) + +(define_expand "cadd<rot><mode>3" + [(set (match_operand:VF 0 "register_operand") + (unspec:VF [(match_operand:VF 1 "register_operand") + (match_operand:VF 2 "register_operand")] + VCADD))] + "(TARGET_COMPLEX || (TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT + && ARM_HAVE_<MODE>_ARITH)) && !BYTES_BIG_ENDIAN" +) + --
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h index 6c0d1e2e634a32196eb31079166a7733dcd3a4b6..987495dd234ad96ba1163a1f482fe183a46ff437 100644 --- a/gcc/config/arm/arm_mve.h +++ b/gcc/config/arm/arm_mve.h @@ -3981,14 +3981,16 @@ __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot90_u8 (uint8x16_t __a, uint8x16_t __b) { - return __builtin_mve_vcaddq_rot90_uv16qi (__a, __b); + return (uint8x16_t) + __builtin_mve_vcaddq_rot90v16qi ((int8x16_t)__a, (int8x16_t)__b); } __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot270_u8 (uint8x16_t __a, uint8x16_t __b) { - return __builtin_mve_vcaddq_rot270_uv16qi (__a, __b); + return (uint8x16_t) + __builtin_mve_vcaddq_rot270v16qi ((int8x16_t)__a, (int8x16_t)__b); } __extension__ extern __inline uint8x16_t @@ -4520,14 +4522,14 @@ __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot90_s8 (int8x16_t __a, int8x16_t __b) { - return __builtin_mve_vcaddq_rot90_sv16qi (__a, __b); + return __builtin_mve_vcaddq_rot90v16qi (__a, __b); } __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot270_s8 (int8x16_t __a, int8x16_t __b) { - return __builtin_mve_vcaddq_rot270_sv16qi (__a, __b); + return __builtin_mve_vcaddq_rot270v16qi (__a, __b); } __extension__ extern __inline int8x16_t @@ -4821,14 +4823,16 @@ __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot90_u16 (uint16x8_t __a, uint16x8_t __b) { - return __builtin_mve_vcaddq_rot90_uv8hi (__a, __b); + return (uint16x8_t) + __builtin_mve_vcaddq_rot90v8hi ((int16x8_t)__a, (int16x8_t)__b); } __extension__ extern __inline uint16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot270_u16 (uint16x8_t __a, uint16x8_t __b) { - return __builtin_mve_vcaddq_rot270_uv8hi (__a, __b); + return (uint16x8_t) + __builtin_mve_vcaddq_rot270v8hi ((int16x8_t)__a, (int16x8_t)__b); } __extension__ extern __inline uint16x8_t @@ -5360,14 +5364,14 @@ __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot90_s16 (int16x8_t __a, int16x8_t __b) { - return __builtin_mve_vcaddq_rot90_sv8hi (__a, __b); + return __builtin_mve_vcaddq_rot90v8hi (__a, __b); } __extension__ extern __inline int16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot270_s16 (int16x8_t __a, int16x8_t __b) { - return __builtin_mve_vcaddq_rot270_sv8hi (__a, __b); + return __builtin_mve_vcaddq_rot270v8hi (__a, __b); } __extension__ extern __inline int16x8_t @@ -5661,14 +5665,16 @@ __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot90_u32 (uint32x4_t __a, uint32x4_t __b) { - return __builtin_mve_vcaddq_rot90_uv4si (__a, __b); + return (uint32x4_t) + __builtin_mve_vcaddq_rot90v4si ((int32x4_t)__a, (int32x4_t)__b); } __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot270_u32 (uint32x4_t __a, uint32x4_t __b) { - return __builtin_mve_vcaddq_rot270_uv4si (__a, __b); + return (uint32x4_t) + __builtin_mve_vcaddq_rot270v4si ((int32x4_t)__a, (int32x4_t)__b); } __extension__ extern __inline uint32x4_t @@ -6200,14 +6206,14 @@ __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot90_s32 (int32x4_t __a, int32x4_t __b) { - return __builtin_mve_vcaddq_rot90_sv4si (__a, __b); + return __builtin_mve_vcaddq_rot90v4si (__a, __b); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot270_s32 (int32x4_t __a, int32x4_t __b) { - return __builtin_mve_vcaddq_rot270_sv4si (__a, __b); + return __builtin_mve_vcaddq_rot270v4si (__a, __b); } __extension__ extern __inline int32x4_t @@ -17370,14 +17376,14 @@ __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot90_f16 (float16x8_t __a, float16x8_t __b) { - return __builtin_mve_vcaddq_rot90_fv8hf (__a, __b); + return __builtin_mve_vcaddq_rot90v8hf (__a, __b); } __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot270_f16 (float16x8_t __a, float16x8_t __b) { - return __builtin_mve_vcaddq_rot270_fv8hf (__a, __b); + return __builtin_mve_vcaddq_rot270v8hf (__a, __b); } __extension__ extern __inline float16x8_t @@ -17622,14 +17628,14 @@ __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot90_f32 (float32x4_t __a, float32x4_t __b) { - return __builtin_mve_vcaddq_rot90_fv4sf (__a, __b); + return __builtin_mve_vcaddq_rot90v4sf (__a, __b); } __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vcaddq_rot270_f32 (float32x4_t __a, float32x4_t __b) { - return __builtin_mve_vcaddq_rot270_fv4sf (__a, __b); + return __builtin_mve_vcaddq_rot270v4sf (__a, __b); } __extension__ extern __inline float32x4_t diff --git a/gcc/config/arm/arm_mve_builtins.def b/gcc/config/arm/arm_mve_builtins.def index f38926ffd8e44f63d25a8fb9bf8f7d8680570ef0..b86e1793e686b07367732c4ffd0603deb78830bd 100644 --- a/gcc/config/arm/arm_mve_builtins.def +++ b/gcc/config/arm/arm_mve_builtins.def @@ -125,8 +125,6 @@ VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpeqq_u, v16qi, v8hi, v4si) VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpeqq_n_u, v16qi, v8hi, v4si) VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpcsq_u, v16qi, v8hi, v4si) VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpcsq_n_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vcaddq_rot90_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vcaddq_rot270_u, v16qi, v8hi, v4si) VAR3 (BINOP_UNONE_UNONE_UNONE, vbicq_u, v16qi, v8hi, v4si) VAR3 (BINOP_UNONE_UNONE_UNONE, vandq_u, v16qi, v8hi, v4si) VAR3 (BINOP_UNONE_UNONE_UNONE, vaddvq_p_u, v16qi, v8hi, v4si) @@ -202,8 +200,6 @@ VAR3 (BINOP_NONE_NONE_NONE, vhcaddq_rot270_s, v16qi, v8hi, v4si) VAR3 (BINOP_NONE_NONE_NONE, vhaddq_s, v16qi, v8hi, v4si) VAR3 (BINOP_NONE_NONE_NONE, vhaddq_n_s, v16qi, v8hi, v4si) VAR3 (BINOP_NONE_NONE_NONE, veorq_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vcaddq_rot90_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_NONE, vcaddq_rot270_s, v16qi, v8hi, v4si) VAR3 (BINOP_NONE_NONE_NONE, vbrsrq_n_s, v16qi, v8hi, v4si) VAR3 (BINOP_NONE_NONE_NONE, vbicq_s, v16qi, v8hi, v4si) VAR3 (BINOP_NONE_NONE_NONE, vandq_s, v16qi, v8hi, v4si) @@ -268,8 +264,6 @@ VAR2 (BINOP_NONE_NONE_NONE, vcmulq_rot90_f, v8hf, v4sf) VAR2 (BINOP_NONE_NONE_NONE, vcmulq_rot270_f, v8hf, v4sf) VAR2 (BINOP_NONE_NONE_NONE, vcmulq_rot180_f, v8hf, v4sf) VAR2 (BINOP_NONE_NONE_NONE, vcmulq_f, v8hf, v4sf) -VAR2 (BINOP_NONE_NONE_NONE, vcaddq_rot90_f, v8hf, v4sf) -VAR2 (BINOP_NONE_NONE_NONE, vcaddq_rot270_f, v8hf, v4sf) VAR2 (BINOP_NONE_NONE_NONE, vbicq_f, v8hf, v4sf) VAR2 (BINOP_NONE_NONE_NONE, vandq_f, v8hf, v4sf) VAR2 (BINOP_NONE_NONE_NONE, vaddq_n_f, v8hf, v4sf) @@ -892,3 +886,7 @@ VAR3 (QUADOP_NONE_NONE_UNONE_IMM_UNONE, vshlcq_m_vec_s, v16qi, v8hi, v4si) VAR3 (QUADOP_NONE_NONE_UNONE_IMM_UNONE, vshlcq_m_carry_s, v16qi, v8hi, v4si) VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vshlcq_m_vec_u, v16qi, v8hi, v4si) VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vshlcq_m_carry_u, v16qi, v8hi, v4si) + +/* optabs without any suffixes. */ +VAR5 (BINOP_NONE_NONE_NONE, vcaddq_rot90, v16qi, v8hi, v4si, v8hf, v4sf) +VAR5 (BINOP_NONE_NONE_NONE, vcaddq_rot270, v16qi, v8hi, v4si, v8hf, v4sf) diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md index 789e3332abb7495b308509d03ed241d39498a8b6..6ebddb95b4f9c835f10f5265573f27a06ccbd11f 100644 --- a/gcc/config/arm/constraints.md +++ b/gcc/config/arm/constraints.md @@ -310,7 +310,7 @@ (define_constraint "Dz" "@internal In ARM/Thumb-2 state a vector of constant zeros." (and (match_code "const_vector") - (match_test "TARGET_NEON && op == CONST0_RTX (mode)"))) + (match_test "(TARGET_NEON || TARGET_HAVE_MVE) && op == CONST0_RTX (mode)"))) (define_constraint "Da" "@internal diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index 036a939e2ee758abede76485cdd4946894993111..adf4c017735812fdb318f615ede1407298377519 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -1182,6 +1182,9 @@ (define_int_attr rot [(UNSPEC_VCADD90 "90") (UNSPEC_VCMLA180 "180") (UNSPEC_VCMLA270 "270")]) +(define_int_attr mve_rot [(UNSPEC_VCADD90 "_rot90") + (UNSPEC_VCADD270 "_rot270")]) + (define_int_attr simd32_op [(UNSPEC_QADD8 "qadd8") (UNSPEC_QSUB8 "qsub8") (UNSPEC_SHADD8 "shadd8") (UNSPEC_SHSUB8 "shsub8") (UNSPEC_UHADD8 "uhadd8") (UNSPEC_UHSUB8 "uhsub8") @@ -1232,10 +1235,8 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s") (VADDLVQ_P_U "u") (VCMPNEQ_U "u") (VCMPNEQ_S "s") (VABDQ_M_S "s") (VABDQ_M_U "u") (VABDQ_S "s") (VABDQ_U "u") (VADDQ_N_S "s") (VADDQ_N_U "u") - (VADDVQ_P_S "s") (VADDVQ_P_U "u") - (VBRSRQ_N_S "s") (VBRSRQ_N_U "u") (VCADDQ_ROT270_S "s") - (VCADDQ_ROT270_U "u") (VCADDQ_ROT90_S "s") - (VCMPEQQ_S "s") (VCMPEQQ_U "u") (VCADDQ_ROT90_U "u") + (VADDVQ_P_S "s") (VADDVQ_P_U "u") (VBRSRQ_N_S "s") + (VBRSRQ_N_U "u") (VCMPEQQ_S "s") (VCMPEQQ_U "u") (VCMPEQQ_N_S "s") (VCMPEQQ_N_U "u") (VCMPNEQ_N_S "s") (VCMPNEQ_N_U "u") (VHADDQ_N_S "s") (VHADDQ_N_U "u") (VHADDQ_S "s") @@ -1500,8 +1501,6 @@ (define_int_iterator VADDQ_N [VADDQ_N_S VADDQ_N_U]) (define_int_iterator VADDVAQ [VADDVAQ_S VADDVAQ_U]) (define_int_iterator VADDVQ_P [VADDVQ_P_U VADDVQ_P_S]) (define_int_iterator VBRSRQ_N [VBRSRQ_N_U VBRSRQ_N_S]) -(define_int_iterator VCADDQ_ROT270 [VCADDQ_ROT270_S VCADDQ_ROT270_U]) -(define_int_iterator VCADDQ_ROT90 [VCADDQ_ROT90_U VCADDQ_ROT90_S]) (define_int_iterator VCMPEQQ [VCMPEQQ_U VCMPEQQ_S]) (define_int_iterator VCMPEQQ_N [VCMPEQQ_N_S VCMPEQQ_N_U]) (define_int_iterator VCMPNEQ_N [VCMPNEQ_N_U VCMPNEQ_N_S]) diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index b4c5a1e27c41e2270e05f9f4da1c055457a20ad3..516d0a3172e9cbb9a7fe0e9a1cd45ba5d935344b 100644 --- a/gcc/config/arm/mve.md +++ b/gcc/config/arm/mve.md @@ -962,34 +962,28 @@ (define_insn "mve_vbrsrq_n_<supf><mode>" ]) ;; -;; [vcaddq_rot270_s, vcaddq_rot270_u]) +;; [vcaddq, vcaddq_rot90, vcadd_rot180, vcadd_rot270]) ;; -(define_insn "mve_vcaddq_rot270_<supf><mode>" +(define_insn "mve_vcaddq<mve_rot><mode>" [ (set (match_operand:MVE_2 0 "s_register_operand" "<earlyclobber_32>") (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w") (match_operand:MVE_2 2 "s_register_operand" "w")] - VCADDQ_ROT270)) + VCADD)) ] "TARGET_HAVE_MVE" - "vcadd.i%#<V_sz_elem> %q0, %q1, %q2, #270" + "vcadd.i%#<V_sz_elem> %q0, %q1, %q2, #<rot>" [(set_attr "type" "mve_move") ]) -;; -;; [vcaddq_rot90_u, vcaddq_rot90_s]) -;; -(define_insn "mve_vcaddq_rot90_<supf><mode>" - [ - (set (match_operand:MVE_2 0 "s_register_operand" "<earlyclobber_32>") - (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w") - (match_operand:MVE_2 2 "s_register_operand" "w")] - VCADDQ_ROT90)) - ] - "TARGET_HAVE_MVE" - "vcadd.i%#<V_sz_elem> %q0, %q1, %q2, #90" - [(set_attr "type" "mve_move") -]) +;; Auto vectorizer pattern for int vcadd +(define_expand "cadd<rot><mode>3" + [(set (match_operand:MVE_2 0 "register_operand") + (unspec:MVE_2 [(match_operand:MVE_2 1 "register_operand") + (match_operand:MVE_2 2 "register_operand")] + VCADD))] + "TARGET_HAVE_MVE && !BYTES_BIG_ENDIAN" +) ;; ;; [vcmpcsq_n_u]) @@ -2102,32 +2096,17 @@ (define_insn "mve_vbicq_n_<supf><mode>" ]) ;; -;; [vcaddq_rot270_f]) -;; -(define_insn "mve_vcaddq_rot270_f<mode>" - [ - (set (match_operand:MVE_0 0 "s_register_operand" "<earlyclobber_32>") - (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w") - (match_operand:MVE_0 2 "s_register_operand" "w")] - VCADDQ_ROT270_F)) - ] - "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" - "vcadd.f%#<V_sz_elem> %q0, %q1, %q2, #270" - [(set_attr "type" "mve_move") -]) - -;; -;; [vcaddq_rot90_f]) +;; [vcaddq, vcaddq_rot90, vcadd_rot180, vcadd_rot270]) ;; -(define_insn "mve_vcaddq_rot90_f<mode>" +(define_insn "mve_vcaddq<mve_rot><mode>" [ (set (match_operand:MVE_0 0 "s_register_operand" "<earlyclobber_32>") (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w") (match_operand:MVE_0 2 "s_register_operand" "w")] - VCADDQ_ROT90_F)) + VCADD)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" - "vcadd.f%#<V_sz_elem> %q0, %q1, %q2, #90" + "vcadd.f%#<V_sz_elem> %q0, %q1, %q2, #<rot>" [(set_attr "type" "mve_move") ]) diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md index ef64989600dcad642af54d7bf0250728a9fb7502..3f9ebe7b1753045e53044324cc7302f51d0eed21 100644 --- a/gcc/config/arm/unspecs.md +++ b/gcc/config/arm/unspecs.md @@ -598,8 +598,6 @@ (define_c_enum "unspec" [ VADDVAQ_S VADDVQ_P_S VBRSRQ_N_S - VCADDQ_ROT270_S - VCADDQ_ROT90_S VCMPEQQ_S VCMPEQQ_N_S VCMPNEQ_N_S @@ -641,8 +639,6 @@ (define_c_enum "unspec" [ VADDVAQ_U VADDVQ_P_U VBRSRQ_N_U - VCADDQ_ROT270_U - VCADDQ_ROT90_U VCMPEQQ_U VCMPEQQ_N_U VCMPNEQ_N_U @@ -709,8 +705,6 @@ (define_c_enum "unspec" [ VABDQ_M_U VABDQ_F VADDQ_N_F - VCADDQ_ROT270_F - VCADDQ_ROT90_F VCMPEQQ_F VCMPEQQ_N_F VCMPGEQ_F diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md index 2d0932b95a1615e94aa6572a593fc9849c9b36db..ce1ea960161164098a6512cf58843bb64251fb05 100644 --- a/gcc/config/arm/vec-common.md +++ b/gcc/config/arm/vec-common.md @@ -205,3 +205,13 @@ (define_expand "neg<mode>2" (neg:VDQWH (match_operand:VDQWH 1 "s_register_operand" "")))] "ARM_HAVE_<MODE>_ARITH" ) + +(define_expand "cadd<rot><mode>3" + [(set (match_operand:VF 0 "register_operand") + (unspec:VF [(match_operand:VF 1 "register_operand") + (match_operand:VF 2 "register_operand")] + VCADD))] + "(TARGET_COMPLEX || (TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT + && ARM_HAVE_<MODE>_ARITH)) && !BYTES_BIG_ENDIAN" +) +