https://gcc.gnu.org/g:027205879733933ec991c230795da6c01ac50029
commit r16-4440-g027205879733933ec991c230795da6c01ac50029 Author: Christophe Lyon <[email protected]> Date: Thu Oct 2 13:52:22 2025 +0000 arm: [MVE] Fix carry-in support for vadcq / vsbcq [PR122189] The vadcq and vsbcq patterns had two problems: - the adc / sbc part of the pattern did not mention the use of vfpcc - the carry calcultation part should use a different unspec code In addtion, the get_fpscr_nzcvqc and set_fpscr_nzcvqc were over-cautious by using unspec_volatile when unspec is really what they need. Making them unspec enables to remove redundant accesses to FPSCR_nzcvqc. With unspec_volatile, we used to generate: test_2: @ args = 0, pretend = 0, frame = 8 @ frame_needed = 0, uses_anonymous_args = 0 vmov.i32 q0, #0x1 @ v4si push {lr} sub sp, sp, #12 vmrs r3, FPSCR_nzcvqc ;; [1] bic r3, r3, #536870912 vmsr FPSCR_nzcvqc, r3 vadc.i32 q3, q0, q0 vmrs r3, FPSCR_nzcvqc ;; [2] vmrs r3, FPSCR_nzcvqc orr r3, r3, #536870912 vmsr FPSCR_nzcvqc, r3 vadc.i32 q0, q0, q0 vmrs r3, FPSCR_nzcvqc ldr r0, .L8 ubfx r3, r3, #29, #1 str r3, [sp, #4] bl print_uint32x4_t add sp, sp, #12 @ sp needed pop {pc} .L9: .align 2 .L8: .word .LC1 with unspec, we generate: test_2: @ args = 0, pretend = 0, frame = 8 @ frame_needed = 0, uses_anonymous_args = 0 vmrs r3, FPSCR_nzcvqc ;; [1] bic r3, r3, #536870912 ;; [3] vmov.i32 q0, #0x1 @ v4si vmsr FPSCR_nzcvqc, r3 vadc.i32 q3, q0, q0 vmrs r3, FPSCR_nzcvqc orr r3, r3, #536870912 vmsr FPSCR_nzcvqc, r3 vadc.i32 q0, q0, q0 vmrs r3, FPSCR_nzcvqc push {lr} ubfx r3, r3, #29, #1 sub sp, sp, #12 ldr r0, .L8 str r3, [sp, #4] bl print_uint32x4_t add sp, sp, #12 @ sp needed pop {pc} .L9: .align 2 .L8: .word .LC1 That is, unspec in get_fpscr_nzcvqc enables to: - move [1] earlier - delete redundant [2] and unspec in set_fpscr_nzcvqc enables to move push {lr} and stack manipulation later. gcc/ChangeLog: PR target/122189 * config/arm/iterators.md (VxCIQ_carry, VxCIQ_M_carry, VxCQ_carry) (VxCQ_M_carry): New iterators. * config/arm/mve.md (get_fpscr_nzcvqc, set_fpscr_nzcvqc): Use unspec instead of unspec_volatile. (vadciq, vadciq_m, vadcq, vadcq_m): Use vfpcc in operation. Use a different unspec code for carry calcultation. * config/arm/unspecs.md (VADCQ_U_carry, VADCQ_M_U_carry) (VADCQ_S_carry, VADCQ_M_S_carry, VSBCIQ_U_carry ,VSBCIQ_S_carry ,VSBCIQ_M_U_carry ,VSBCIQ_M_S_carry ,VSBCQ_U_carry ,VSBCQ_S_carry ,VSBCQ_M_U_carry ,VSBCQ_M_S_carry ,VADCIQ_U_carry ,VADCIQ_M_U_carry ,VADCIQ_S_carry ,VADCIQ_M_S_carry): New unspec codes. gcc/testsuite/ChangeLog: PR target/122189 * gcc.target/arm/mve/intrinsics/vadcq-check-carry.c: New test. * gcc.target/arm/mve/intrinsics/vadcq_m_s32.c: Adjust instructions order. * gcc.target/arm/mve/intrinsics/vadcq_m_u32.c: Likewise. * gcc.target/arm/mve/intrinsics/vsbcq_m_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vsbcq_m_u32.c: Likewise. Diff: --- gcc/config/arm/iterators.md | 17 ++++++++ gcc/config/arm/mve.md | 36 ++++++++++------ gcc/config/arm/unspecs.md | 16 ++++++++ .../arm/mve/intrinsics/vadcq-check-carry.c | 48 ++++++++++++++++++++++ .../gcc.target/arm/mve/intrinsics/vadcq_m_s32.c | 8 ++-- .../gcc.target/arm/mve/intrinsics/vadcq_m_u32.c | 8 ++-- .../gcc.target/arm/mve/intrinsics/vsbcq_m_s32.c | 8 ++-- .../gcc.target/arm/mve/intrinsics/vsbcq_m_u32.c | 8 ++-- 8 files changed, 121 insertions(+), 28 deletions(-) diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index 0c163ed47820..eb519e7b5f8c 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -3014,3 +3014,20 @@ ;; Define iterators for VCMLA operations as MUL (define_int_iterator VCMUL_OP [UNSPEC_VCMUL UNSPEC_VCMUL_CONJ]) + +(define_int_attr VxCIQ_carry [(VADCIQ_U "VADCIQ_U_carry") + (VADCIQ_S "VADCIQ_S_carry") + (VSBCIQ_U "VSBCIQ_U_carry") + (VSBCIQ_S "VSBCIQ_S_carry")]) +(define_int_attr VxCIQ_M_carry [(VADCIQ_M_U "VADCIQ_M_U_carry") + (VADCIQ_M_S "VADCIQ_M_S_carry") + (VSBCIQ_M_U "VSBCIQ_M_U_carry") + (VSBCIQ_M_S "VSBCIQ_M_S_carry")]) +(define_int_attr VxCQ_carry [(VADCQ_U "VADCQ_U_carry") + (VADCQ_S "VADCQ_S_carry") + (VSBCQ_U "VSBCQ_U_carry") + (VSBCQ_S "VSBCQ_S_carry")]) +(define_int_attr VxCQ_M_carry [(VADCQ_M_U "VADCQ_M_U_carry") + (VADCQ_M_S "VADCQ_M_S_carry") + (VSBCQ_M_U "VSBCQ_M_U_carry") + (VSBCQ_M_S "VSBCQ_M_S_carry")]) diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index bd3db24b0c23..87b45b2e41c2 100644 --- a/gcc/config/arm/mve.md +++ b/gcc/config/arm/mve.md @@ -3965,14 +3965,14 @@ (define_insn "get_fpscr_nzcvqc" [(set (match_operand:SI 0 "register_operand" "=r") - (unspec_volatile:SI [(reg:SI VFPCC_REGNUM)] UNSPEC_GET_FPSCR_NZCVQC))] + (unspec:SI [(reg:SI VFPCC_REGNUM)] UNSPEC_GET_FPSCR_NZCVQC))] "TARGET_HAVE_MVE" "vmrs\\t%0, FPSCR_nzcvqc" [(set_attr "type" "mve_move")]) (define_insn "set_fpscr_nzcvqc" [(set (reg:SI VFPCC_REGNUM) - (unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] + (unspec:SI [(match_operand:SI 0 "register_operand" "r")] VUNSPEC_SET_FPSCR_NZCVQC))] "TARGET_HAVE_MVE" "vmsr\\tFPSCR_nzcvqc, %0" @@ -3988,8 +3988,9 @@ (match_operand:V4SI 2 "s_register_operand" "w")] VxCIQ)) (set (reg:SI VFPCC_REGNUM) - (unspec:SI [(const_int 0)] - VxCIQ)) + (unspec:SI [(match_dup 1) + (match_dup 2)] + <VxCIQ_carry>)) ] "TARGET_HAVE_MVE" "<mve_insn>.i32\t%q0, %q1, %q2" @@ -4009,8 +4010,11 @@ (match_operand:V4BI 4 "vpr_register_operand" "Up")] VxCIQ_M)) (set (reg:SI VFPCC_REGNUM) - (unspec:SI [(const_int 0)] - VxCIQ_M)) + (unspec:SI [(match_dup 1) + (match_dup 2) + (match_dup 3) + (match_dup 4)] + <VxCIQ_M_carry>)) ] "TARGET_HAVE_MVE" "vpst\;<mve_insn>t.i32\t%q0, %q2, %q3" @@ -4025,11 +4029,14 @@ (define_insn "@mve_<mve_insn>q_<supf>v4si" [(set (match_operand:V4SI 0 "s_register_operand" "=w") (unspec:V4SI [(match_operand:V4SI 1 "s_register_operand" "w") - (match_operand:V4SI 2 "s_register_operand" "w")] + (match_operand:V4SI 2 "s_register_operand" "w") + (reg:SI VFPCC_REGNUM)] VxCQ)) (set (reg:SI VFPCC_REGNUM) - (unspec:SI [(reg:SI VFPCC_REGNUM)] - VxCQ)) + (unspec:SI [(match_dup 1) + (match_dup 2) + (reg:SI VFPCC_REGNUM)] + <VxCQ_carry>)) ] "TARGET_HAVE_MVE" "<mve_insn>.i32\t%q0, %q1, %q2" @@ -4047,11 +4054,16 @@ (unspec:V4SI [(match_operand:V4SI 1 "s_register_operand" "0") (match_operand:V4SI 2 "s_register_operand" "w") (match_operand:V4SI 3 "s_register_operand" "w") - (match_operand:V4BI 4 "vpr_register_operand" "Up")] + (match_operand:V4BI 4 "vpr_register_operand" "Up") + (reg:SI VFPCC_REGNUM)] VxCQ_M)) (set (reg:SI VFPCC_REGNUM) - (unspec:SI [(reg:SI VFPCC_REGNUM)] - VxCQ_M)) + (unspec:SI [(match_dup 1) + (match_dup 2) + (match_dup 3) + (match_dup 4) + (reg:SI VFPCC_REGNUM)] + <VxCQ_M_carry>)) ] "TARGET_HAVE_MVE" "vpst\;<mve_insn>t.i32\t%q0, %q2, %q3" diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md index c1ee97248e0d..17af152ad32a 100644 --- a/gcc/config/arm/unspecs.md +++ b/gcc/config/arm/unspecs.md @@ -1160,21 +1160,37 @@ VLDRGBWBQ VLDRGBWBQ_Z VADCQ_U + VADCQ_U_carry VADCQ_M_U + VADCQ_M_U_carry VADCQ_S + VADCQ_S_carry VADCQ_M_S + VADCQ_M_S_carry VSBCIQ_U + VSBCIQ_U_carry VSBCIQ_S + VSBCIQ_S_carry VSBCIQ_M_U + VSBCIQ_M_U_carry VSBCIQ_M_S + VSBCIQ_M_S_carry VSBCQ_U + VSBCQ_U_carry VSBCQ_S + VSBCQ_S_carry VSBCQ_M_U + VSBCQ_M_U_carry VSBCQ_M_S + VSBCQ_M_S_carry VADCIQ_U + VADCIQ_U_carry VADCIQ_M_U + VADCIQ_M_U_carry VADCIQ_S + VADCIQ_S_carry VADCIQ_M_S + VADCIQ_M_S_carry VLD2Q VLD4Q VST2Q diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vadcq-check-carry.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vadcq-check-carry.c new file mode 100644 index 000000000000..3a9b8debf982 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vadcq-check-carry.c @@ -0,0 +1,48 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-require-effective-target arm_mve_hw } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_v8_1m_mve } */ + +#include "arm_mve.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#include <inttypes.h> +#include <stdio.h> + +__attribute((noinline)) void print_uint32x4_t(const char *name, uint32x4_t val) +{ + printf("%s: %u, %u, %u, %u\n", + name, + vgetq_lane_u32(val, 0), + vgetq_lane_u32(val, 1), + vgetq_lane_u32(val, 2), + vgetq_lane_u32(val, 3)); +} + +void __attribute__ ((noinline)) test_2(void) +{ + uint32x4_t v12, v18, v108; + unsigned v17 = 0; + v12 = vdupq_n_u32(1); + v18 = vadcq_u32(v12, v12, &v17); + v17 = 1; + v108 = vadcq_u32(v12, v12, &v17); + print_uint32x4_t("v108", v108); +} + +int main() +{ + test_2(); + return 0; +} + +#ifdef __cplusplus +} +#endif + +/* { dg-output "v108: 3, 2, 2, 2" } */ +/* { dg-final { scan-assembler-times {\tvmrs\t(?:ip|fp|r[0-9]+), FPSCR_nzcvqc} 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vadcq_m_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vadcq_m_s32.c index 0d4cb7792549..c5a58782cdef 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vadcq_m_s32.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vadcq_m_s32.c @@ -14,12 +14,12 @@ extern "C" { ** ... ** vmrs (?:ip|fp|r[0-9]+), FPSCR_nzcvqc(?: @.*|) ** ... +** vmsr p0, (?:ip|fp|r[0-9]+)(?: @.*|) +** ... ** bfi (?:ip|fp|r[0-9]+), (?:ip|fp|r[0-9]+), #29, #1(?: @.*|) ** ... ** vmsr FPSCR_nzcvqc, (?:ip|fp|r[0-9]+)(?: @.*|) ** ... -** vmsr p0, (?:ip|fp|r[0-9]+)(?: @.*|) -** ... ** vpst(?: @.*|) ** ... ** vadct.i32 q[0-9]+, q[0-9]+, q[0-9]+(?: @.*|) @@ -41,12 +41,12 @@ foo (int32x4_t inactive, int32x4_t a, int32x4_t b, unsigned *carry, mve_pred16_t ** ... ** vmrs (?:ip|fp|r[0-9]+), FPSCR_nzcvqc(?: @.*|) ** ... +** vmsr p0, (?:ip|fp|r[0-9]+)(?: @.*|) +** ... ** bfi (?:ip|fp|r[0-9]+), (?:ip|fp|r[0-9]+), #29, #1(?: @.*|) ** ... ** vmsr FPSCR_nzcvqc, (?:ip|fp|r[0-9]+)(?: @.*|) ** ... -** vmsr p0, (?:ip|fp|r[0-9]+)(?: @.*|) -** ... ** vpst(?: @.*|) ** ... ** vadct.i32 q[0-9]+, q[0-9]+, q[0-9]+(?: @.*|) diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vadcq_m_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vadcq_m_u32.c index a0ba6825d8c9..23908a4273db 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vadcq_m_u32.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vadcq_m_u32.c @@ -14,12 +14,12 @@ extern "C" { ** ... ** vmrs (?:ip|fp|r[0-9]+), FPSCR_nzcvqc(?: @.*|) ** ... +** vmsr p0, (?:ip|fp|r[0-9]+)(?: @.*|) +** ... ** bfi (?:ip|fp|r[0-9]+), (?:ip|fp|r[0-9]+), #29, #1(?: @.*|) ** ... ** vmsr FPSCR_nzcvqc, (?:ip|fp|r[0-9]+)(?: @.*|) ** ... -** vmsr p0, (?:ip|fp|r[0-9]+)(?: @.*|) -** ... ** vpst(?: @.*|) ** ... ** vadct.i32 q[0-9]+, q[0-9]+, q[0-9]+(?: @.*|) @@ -41,12 +41,12 @@ foo (uint32x4_t inactive, uint32x4_t a, uint32x4_t b, unsigned *carry, mve_pred1 ** ... ** vmrs (?:ip|fp|r[0-9]+), FPSCR_nzcvqc(?: @.*|) ** ... +** vmsr p0, (?:ip|fp|r[0-9]+)(?: @.*|) +** ... ** bfi (?:ip|fp|r[0-9]+), (?:ip|fp|r[0-9]+), #29, #1(?: @.*|) ** ... ** vmsr FPSCR_nzcvqc, (?:ip|fp|r[0-9]+)(?: @.*|) ** ... -** vmsr p0, (?:ip|fp|r[0-9]+)(?: @.*|) -** ... ** vpst(?: @.*|) ** ... ** vadct.i32 q[0-9]+, q[0-9]+, q[0-9]+(?: @.*|) diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsbcq_m_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsbcq_m_s32.c index 7a332610c693..940e2edfcc5e 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsbcq_m_s32.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsbcq_m_s32.c @@ -14,12 +14,12 @@ extern "C" { ** ... ** vmrs (?:ip|fp|r[0-9]+), FPSCR_nzcvqc(?: @.*|) ** ... +** vmsr p0, (?:ip|fp|r[0-9]+)(?: @.*|) +** ... ** bfi (?:ip|fp|r[0-9]+), (?:ip|fp|r[0-9]+), #29, #1(?: @.*|) ** ... ** vmsr FPSCR_nzcvqc, (?:ip|fp|r[0-9]+)(?: @.*|) ** ... -** vmsr p0, (?:ip|fp|r[0-9]+)(?: @.*|) -** ... ** vpst(?: @.*|) ** ... ** vsbct.i32 q[0-9]+, q[0-9]+, q[0-9]+(?: @.*|) @@ -41,12 +41,12 @@ foo (int32x4_t inactive, int32x4_t a, int32x4_t b, unsigned *carry, mve_pred16_t ** ... ** vmrs (?:ip|fp|r[0-9]+), FPSCR_nzcvqc(?: @.*|) ** ... +** vmsr p0, (?:ip|fp|r[0-9]+)(?: @.*|) +** ... ** bfi (?:ip|fp|r[0-9]+), (?:ip|fp|r[0-9]+), #29, #1(?: @.*|) ** ... ** vmsr FPSCR_nzcvqc, (?:ip|fp|r[0-9]+)(?: @.*|) ** ... -** vmsr p0, (?:ip|fp|r[0-9]+)(?: @.*|) -** ... ** vpst(?: @.*|) ** ... ** vsbct.i32 q[0-9]+, q[0-9]+, q[0-9]+(?: @.*|) diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsbcq_m_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsbcq_m_u32.c index 609021965022..478b938f8f74 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsbcq_m_u32.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsbcq_m_u32.c @@ -14,12 +14,12 @@ extern "C" { ** ... ** vmrs (?:ip|fp|r[0-9]+), FPSCR_nzcvqc(?: @.*|) ** ... +** vmsr p0, (?:ip|fp|r[0-9]+)(?: @.*|) +** ... ** bfi (?:ip|fp|r[0-9]+), (?:ip|fp|r[0-9]+), #29, #1(?: @.*|) ** ... ** vmsr FPSCR_nzcvqc, (?:ip|fp|r[0-9]+)(?: @.*|) ** ... -** vmsr p0, (?:ip|fp|r[0-9]+)(?: @.*|) -** ... ** vpst(?: @.*|) ** ... ** vsbct.i32 q[0-9]+, q[0-9]+, q[0-9]+(?: @.*|) @@ -41,12 +41,12 @@ foo (uint32x4_t inactive, uint32x4_t a, uint32x4_t b, unsigned *carry, mve_pred1 ** ... ** vmrs (?:ip|fp|r[0-9]+), FPSCR_nzcvqc(?: @.*|) ** ... +** vmsr p0, (?:ip|fp|r[0-9]+)(?: @.*|) +** ... ** bfi (?:ip|fp|r[0-9]+), (?:ip|fp|r[0-9]+), #29, #1(?: @.*|) ** ... ** vmsr FPSCR_nzcvqc, (?:ip|fp|r[0-9]+)(?: @.*|) ** ... -** vmsr p0, (?:ip|fp|r[0-9]+)(?: @.*|) -** ... ** vpst(?: @.*|) ** ... ** vsbct.i32 q[0-9]+, q[0-9]+, q[0-9]+(?: @.*|)
