https://gcc.gnu.org/g:4960fba5455498f5253c8da9bceb3466c1ca6b9a
commit r17-978-g4960fba5455498f5253c8da9bceb3466c1ca6b9a Author: Artemiy Volkov <[email protected]> Date: Wed Dec 17 13:27:21 2025 +0000 aarch64: implement FIRSTP and LASTP SVE instructions This commit implements patterns and intrinsics for these two instructions new in SVE2.2 (or in streaming mode, SME2.2): - FIRSTP (Scalar index of first true predicate element (predicated)) - LASTP (Scalar index of last true predicate element (predicated)) The new intrinsics are documented in the ACLE manual [0] and have the following signatures: int64_t svfirstp_b{8,16,32,64} (svbool_t pg, svbool_t pn); int64_t svlastp_b{8,16,32,64} (svbool_t pg, svbool_t pn); The intrinsics are implemented in the usual way; the new svfirst_lastp_impl base class is used for both families. The ->fold () method implements constant folding except for LASTP under -msve-vector-bits=scalable. On the .md side, the patterns for both new instructions are implemented using UNSPECs as they can't be expressed in terms of standard RTL. Included are standard asm tests (which are heavily based on cntp_* tests from the sve directory), as well as some general C tests demonstrating aforementioned optimizations when PG and/or PN are constant vectors. [0] https://github.com/ARM-software/acle gcc/ChangeLog: * config/aarch64/aarch64-sve-builtins-sve2.cc (class svfirst_lastp_impl): Define new SVE function base class. (svfirstp): Define new SVE function base. (svlastp): Likewise. * config/aarch64/aarch64-sve-builtins-sve2.def (svfirstp): Define new SVE function. (svlastp): Likewise. * config/aarch64/aarch64-sve-builtins-sve2.h (svfirstp): Declare new SVE function base. * config/aarch64/aarch64-sve2.md (@aarch64_pred_firstp<mode>): New insn pattern. (@aarch64_pred_lastp<mode>): Likewise. * config/aarch64/iterators.md (UNSPEC_FIRSTP): New UNSPEC. (UNSPEC_LASTP): Likewise. gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve2/acle/asm/firstp_b16.c: New test. * gcc.target/aarch64/sve2/acle/asm/firstp_b32.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/firstp_b64.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/firstp_b8.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/lastp_b16.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/lastp_b32.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/lastp_b64.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/lastp_b8.c: Likewise. * gcc.target/aarch64/sve2/acle/general/firstp.c: Likewise. * gcc.target/aarch64/sve2/acle/general/lastp.c: Likewise. Diff: --- gcc/config/aarch64/aarch64-sve-builtins-sve2.cc | 61 ++++++ gcc/config/aarch64/aarch64-sve-builtins-sve2.def | 2 + gcc/config/aarch64/aarch64-sve-builtins-sve2.h | 2 + gcc/config/aarch64/aarch64-sve2.md | 33 ++++ gcc/config/aarch64/iterators.md | 2 + .../gcc.target/aarch64/sve2/acle/asm/firstp_b16.c | 192 +++++++++++++++++++ .../gcc.target/aarch64/sve2/acle/asm/firstp_b32.c | 192 +++++++++++++++++++ .../gcc.target/aarch64/sve2/acle/asm/firstp_b64.c | 192 +++++++++++++++++++ .../gcc.target/aarch64/sve2/acle/asm/firstp_b8.c | 192 +++++++++++++++++++ .../gcc.target/aarch64/sve2/acle/asm/lastp_b16.c | 192 +++++++++++++++++++ .../gcc.target/aarch64/sve2/acle/asm/lastp_b32.c | 192 +++++++++++++++++++ .../gcc.target/aarch64/sve2/acle/asm/lastp_b64.c | 192 +++++++++++++++++++ .../gcc.target/aarch64/sve2/acle/asm/lastp_b8.c | 192 +++++++++++++++++++ .../gcc.target/aarch64/sve2/acle/general/firstp.c | 212 +++++++++++++++++++++ .../gcc.target/aarch64/sve2/acle/general/lastp.c | 212 +++++++++++++++++++++ 15 files changed, 2060 insertions(+) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc index d45012e79362..5ea08056ae3d 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc @@ -291,6 +291,65 @@ public: } }; +class svfirst_lastp_impl : public function_base +{ +public: + CONSTEXPR svfirst_lastp_impl (bool first) + : m_first (first) + {} + + gimple * + fold (gimple_folder &f) const override + { + tree pg = gimple_call_arg (f.call, 0); + tree pn = gimple_call_arg (f.call, 1); + + gcc_assert (TYPE_MODE (TREE_TYPE (pg)) == TYPE_MODE (TREE_TYPE (pn))); + + if (is_pfalse (pg) || is_pfalse (pn)) + return f.fold_call_to (build_minus_one_cst (TREE_TYPE (f.lhs))); + + if (TREE_CODE (pg) != VECTOR_CST + || TREE_CODE (pn) != VECTOR_CST) + return NULL; + + HOST_WIDE_INT nelts_full_vector = aarch64_fold_sve_cnt_pat (AARCH64_SV_ALL, + f.elements_per_vq (0)); + if (!m_first && nelts_full_vector < 0) + return NULL; + + tree pa = fold_build2 (BIT_AND_EXPR, TREE_TYPE (pg), pg, pn); + gcc_assert (TREE_CODE (pa) == VECTOR_CST); + + int elt_size = f.type_suffix (0).element_bytes; + unsigned int nelts = vector_cst_encoded_nelts (pa); + for (unsigned int i = 0; i < nelts; i++) + { + unsigned int idx = m_first ? i : nelts - 1 - i; + if (tree_to_shwi (VECTOR_CST_ENCODED_ELT (pa, idx)) != 0) + return f.fold_call_to (build_int_cst (TREE_TYPE (f.lhs), + m_first + ? i / elt_size + : (nelts_full_vector - 1 + - i / elt_size))); + } + + return f.fold_call_to (build_minus_one_cst (TREE_TYPE (f.lhs))); + } + + rtx + expand (function_expander &e) const override + { + machine_mode mode = e.vector_mode (0); + return e.use_exact_insn (m_first ? code_for_aarch64_pred_firstp (mode) + : code_for_aarch64_pred_lastp (mode)); + } + +private: + /* True for svfirstp, false for svlastp. */ + bool m_first; +}; + class svld1q_gather_impl : public full_width_access { public: @@ -1023,12 +1082,14 @@ FUNCTION (sveorbt, unspec_based_function, (UNSPEC_EORBT, UNSPEC_EORBT, -1)) FUNCTION (sveorqv, reduction, (UNSPEC_EORQV, UNSPEC_EORQV, -1)) FUNCTION (sveortb, unspec_based_function, (UNSPEC_EORTB, UNSPEC_EORTB, -1)) FUNCTION (svextq, svextq_impl,) +FUNCTION (svfirstp, svfirst_lastp_impl, (true)) FUNCTION (svhadd, unspec_based_function, (UNSPEC_SHADD, UNSPEC_UHADD, -1)) FUNCTION (svhsub, unspec_based_function, (UNSPEC_SHSUB, UNSPEC_UHSUB, -1)) FUNCTION (svhistcnt, CODE_FOR_MODE0 (aarch64_sve2_histcnt),) FUNCTION (svhistseg, CODE_FOR_MODE0 (aarch64_sve2_histseg),) FUNCTION (svhsubr, unspec_based_function_rotated, (UNSPEC_SHSUB, UNSPEC_UHSUB, -1)) +FUNCTION (svlastp, svfirst_lastp_impl, (false)) FUNCTION (svld1q_gather, svld1q_gather_impl,) FUNCTION (svld1udq, svld1uxq_impl, (VNx1DImode)) FUNCTION (svld1uwq, svld1uxq_impl, (VNx1SImode)) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def index f93c26e7d848..dcb968c664a4 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def @@ -304,6 +304,8 @@ DEF_SVE_FUNCTION (svcvtlt, unary_convert, cvt_long, z) DEF_SVE_FUNCTION (svcvtnt, unary_convert_narrowt, cvt_narrow, z) DEF_SVE_FUNCTION (svcvtnt, unary_convert_narrowt, cvt_bfloat, z) DEF_SVE_FUNCTION (svcvtxnt, unary_convert_narrowt, cvt_narrow_s, z) +DEF_SVE_FUNCTION (svfirstp, count_pred, all_pred, implicit) +DEF_SVE_FUNCTION (svlastp, count_pred, all_pred, implicit) DEF_SVE_FUNCTION (svrint32x, unary, sd_float, mxz) DEF_SVE_FUNCTION (svrint32z, unary, sd_float, mxz) DEF_SVE_FUNCTION (svrint64x, unary, sd_float, mxz) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.h b/gcc/config/aarch64/aarch64-sve-builtins-sve2.h index 8b1581f8568b..b2f2698b8802 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.h +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.h @@ -80,11 +80,13 @@ namespace aarch64_sve extern const function_base *const sveorqv; extern const function_base *const sveortb; extern const function_base *const svextq; + extern const function_base *const svfirstp; extern const function_base *const svhadd; extern const function_base *const svhistcnt; extern const function_base *const svhistseg; extern const function_base *const svhsub; extern const function_base *const svhsubr; + extern const function_base *const svlastp; extern const function_base *const svld1q_gather; extern const function_base *const svld1udq; extern const function_base *const svld1uwq; diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md index b56b909741cd..d9ad7689ff2f 100644 --- a/gcc/config/aarch64/aarch64-sve2.md +++ b/gcc/config/aarch64/aarch64-sve2.md @@ -46,6 +46,7 @@ ;; ---- [PRED] Predicate extraction ;; ---- [PRED] Predicate selection ;; ---- [PRED] Predicate count +;; ---- [PRED] Predicate first/last true element ;; ;; == Uniform unary arithmnetic ;; ---- [FP] General unary arithmetic that maps to unspecs @@ -721,6 +722,38 @@ [(set_attr "sve_type" "sve_pred_cnt_scalar")] ) +;; ------------------------------------------------------------------------- +;; ---- [PRED] Predicate first/last true element +;; ------------------------------------------------------------------------- +;; Includes +;; - FIRSTP (predicate first true element) (SVE2p2, SME2p2) +;; - LASTP (predicate last true element) (SVE2p2, SME2p2) +;; ------------------------------------------------------------------------- + +;; Count the number of set bits in a predicate. Operand 3 is true if +;; operand 1 is known to be all-true. +(define_insn "@aarch64_pred_firstp<mode>" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:PRED_ALL 1 "register_operand" "Upl") + (match_operand:PRED_ALL 2 "register_operand" "Upa")] + UNSPEC_FIRSTP))] + "TARGET_SVE2p2_OR_SME2p2" + "firstp\t%x0, %1, %2.<Vetype>" + [(set_attr "sve_type" "sve_pred_cnt_scalar")] +) + +;; Count the number of set bits in a predicate. Operand 3 is true if +;; operand 1 is known to be all-true. +(define_insn "@aarch64_pred_lastp<mode>" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:PRED_ALL 1 "register_operand" "Upl") + (match_operand:PRED_ALL 2 "register_operand" "Upa")] + UNSPEC_LASTP))] + "TARGET_SVE2p2_OR_SME2p2" + "lastp\t%x0, %1, %2.<Vetype>" + [(set_attr "sve_type" "sve_pred_cnt_scalar")] +) + ;; ========================================================================= ;; == Uniform unary arithmnetic ;; ========================================================================= diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 41410095ba39..37f819ce6493 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -1132,6 +1132,7 @@ UNSPEC_FCVT ; Used in aarch64-sve2.md. UNSPEC_FCVTNB ; Used in aarch64-sve2.md. UNSPEC_FCVTNT ; Used in aarch64-sve2.md. + UNSPEC_FIRSTP ; Used in aarch64-sve2.md. UNSPEC_FMAXNMP ; Used in aarch64-sve2.md. UNSPEC_FMAXP ; Used in aarch64-sve2.md. UNSPEC_FMINNMP ; Used in aarch64-sve2.md. @@ -1149,6 +1150,7 @@ UNSPEC_FP8FCVTN ; Used in aarch64-sve2.md. UNSPEC_HISTCNT ; Used in aarch64-sve2.md. UNSPEC_HISTSEG ; Used in aarch64-sve2.md. + UNSPEC_LASTP ; Used in aarch64-sve2.md. UNSPEC_LD1_COUNT ; Used in aarch64-sve2.md. UNSPEC_LDNT1_COUNT ; Used in aarch64-sve2.md. UNSPEC_MATCH ; Used in aarch64-sve2.md. diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b16.c new file mode 100644 index 000000000000..06ea1e1b9ef5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b16.c @@ -0,0 +1,192 @@ +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */ +/* { dg-additional-options "-msve-vector-bits=scalable" } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" +#include <stdbool.h> + +#pragma GCC target "+sve2p2" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+sme2p2" +#endif + +/* +** firstp_b16_32: +** firstp x0, p0, p1\.h +** ret +*/ +TEST_PTEST (firstp_b16_32, uint32_t, + x0 = svfirstp_b16 (p0, p1)); + +/* +** firstp_b16_64: +** firstp x0, p0, p1\.h +** ret +*/ +TEST_PTEST (firstp_b16_64, uint64_t, + x0 = svfirstp_b16 (p0, p1)); + +/* +** firstp_inc_b16_32_general_x0: +** firstp x([0-9]+), p0, p1\.h +** add w0, (w0, w\1|w\1, w0) +** ret +*/ +TEST_PTEST (firstp_inc_b16_32_general_x0, uint32_t, + x0 += svfirstp_b16 (p0, p1)); + +/* +** firstp_inc_b16_32_general_x1: +** firstp x([0-9]+), p0, p1\.h +** add w0, (w1, w\1|w\1, w1) +** ret +*/ +TEST_PTEST (firstp_inc_b16_32_general_x1, uint32_t, + x0 = x1 + svfirstp_b16 (p0, p1)); + +/* +** firstp_inc_b16_64_general_x0: +** firstp (x[0-9]+), p0, p1\.h +** add x0, (x0, \1|\1, x0) +** ret +*/ +TEST_PTEST (firstp_inc_b16_64_general_x0, uint64_t, + x0 += svfirstp_b16 (p0, p1)); + +/* +** firstp_inc_b16_64_general_x1: +** firstp (x[0-9]+), p0, p1\.h +** add x0, (x1, \1|\1, x1) +** ret +*/ +TEST_PTEST (firstp_inc_b16_64_general_x1, uint64_t, + x0 = x1 + svfirstp_b16 (p0, p1)); + +/* +** firstp_dec_b16_32_general_x0: +** firstp x([0-9]+), p0, p1\.h +** sub w0, w0, w\1 +** ret +*/ +TEST_PTEST (firstp_dec_b16_32_general_x0, uint32_t, + x0 -= svfirstp_b16 (p0, p1)); + +/* +** firstp_dec_b16_32_general_x1: +** firstp x([0-9]+), p0, p1\.h +** sub w0, w1, w\1 +** ret +*/ +TEST_PTEST (firstp_dec_b16_32_general_x1, uint32_t, + x0 = x1 - svfirstp_b16 (p0, p1)); + +/* +** firstp_dec_b16_64_general_x0: +** firstp (x[0-9]+), p0, p1\.h +** sub x0, x0, \1 +** ret +*/ +TEST_PTEST (firstp_dec_b16_64_general_x0, uint64_t, + x0 -= svfirstp_b16 (p0, p1)); + +/* +** firstp_dec_b16_64_general_x1: +** firstp (x[0-9]+), p0, p1\.h +** sub x0, x1, \1 +** ret +*/ +TEST_PTEST (firstp_dec_b16_64_general_x1, uint64_t, + x0 = x1 - svfirstp_b16 (p0, p1)); + +/* +** firstp_inc_b16_u16_general_z0: +** firstp x([0-9]+), p0, p1\.h +** mov (z[0-9]+\.h), w\1 +** add z0\.h, (z0\.h, \2|\2, z0\.h) +** ret +*/ +TEST_UNIFORM_Z (firstp_inc_b16_u16_general_z0, svuint16_t, + z0 = svadd_n_u16_x (svptrue_b16 (), z0, svfirstp_b16 (p0, p1)), + z0 = svadd_x (svptrue_b16 (), z0, svfirstp_b16 (p0, p1))); + +/* +** firstp_inc_b16_u16_general_z1: +** firstp x([0-9]+), p0, p1\.h +** mov (z[0-9]+\.h), w\1 +** add z0\.h, (z1\.h, \2|\2, z1\.h) +** ret +*/ +TEST_UNIFORM_Z (firstp_inc_b16_u16_general_z1, svuint16_t, + z0 = svadd_n_u16_x (svptrue_b16 (), z1, svfirstp_b16 (p0, p1)), + z0 = svadd_x (svptrue_b16 (), z1, svfirstp_b16 (p0, p1))); + +/* +** firstp_inc_b16_u16_ptrue_z0: +** ptrue (p[0-7])\.b, all +** firstp x([0-9]+), \1, p0\.h +** mov (z[0-9]+\.h), w\2 +** add z0\.h, (z0\.h, \3|\3, z0\.h) +** ret +*/ +TEST_UNIFORM_Z (firstp_inc_b16_u16_ptrue_z0, svuint16_t, + z0 = svadd_n_u16_x (svptrue_b16 (), z0, svfirstp_b16 (svptrue_b16 (), p0)), + z0 = svadd_x (svptrue_b16 (), z0, svfirstp_b16 (svptrue_b16 (), p0))); + +/* +** firstp_inc_b16_u16_ptrue_z1: +** ptrue (p[0-7])\.b, all +** firstp x([0-9]+), \1, p0\.h +** mov (z[0-9]+\.h), w\2 +** add z0\.h, (z1\.h, \3|\3, z1\.h) +** ret +*/ +TEST_UNIFORM_Z (firstp_inc_b16_u16_ptrue_z1, svuint16_t, + z0 = svadd_n_u16_x (svptrue_b16 (), z1, svfirstp_b16 (svptrue_b16 (), p0)), + z0 = svadd_x (svptrue_b16 (), z1, svfirstp_b16 (svptrue_b16 (), p0))); + +/* +** firstp_dec_b16_u16_general_z0: +** firstp x([0-9]+), p0, p1\.h +** mov (z[0-9]+\.h), w\1 +** sub z0\.h, z0\.h, \2 +** ret +*/ +TEST_UNIFORM_Z (firstp_dec_b16_u16_general_z0, svuint16_t, + z0 = svsub_n_u16_x (svptrue_b16 (), z0, svfirstp_b16 (p0, p1)), + z0 = svsub_x (svptrue_b16 (), z0, svfirstp_b16 (p0, p1))); + +/* +** firstp_dec_b16_u16_general_z1: +** firstp x([0-9]+), p0, p1\.h +** mov (z[0-9]+\.h), w\1 +** sub z0\.h, z1\.h, \2 +** ret +*/ +TEST_UNIFORM_Z (firstp_dec_b16_u16_general_z1, svuint16_t, + z0 = svsub_n_u16_x (svptrue_b16 (), z1, svfirstp_b16 (p0, p1)), + z0 = svsub_x (svptrue_b16 (), z1, svfirstp_b16 (p0, p1))); + +/* +** firstp_dec_b16_u16_ptrue_z0: +** ptrue (p[0-7])\.b, all +** firstp x([0-9]+), \1, p0\.h +** mov (z[0-9]+\.h), w\2 +** sub z0\.h, z0\.h, \3 +** ret +*/ +TEST_UNIFORM_Z (firstp_dec_b16_u16_ptrue_z0, svuint16_t, + z0 = svsub_n_u16_x (svptrue_b16 (), z0, svfirstp_b16 (svptrue_b16 (), p0)), + z0 = svsub_x (svptrue_b16 (), z0, svfirstp_b16 (svptrue_b16 (), p0))); + +/* +** firstp_dec_b16_u16_ptrue_z1: +** ptrue (p[0-7])\.b, all +** firstp x([0-9]+), \1, p0\.h +** mov (z[0-9]+\.h), w\2 +** sub z0\.h, z1\.h, \3 +** ret +*/ +TEST_UNIFORM_Z (firstp_dec_b16_u16_ptrue_z1, svuint16_t, + z0 = svsub_n_u16_x (svptrue_b16 (), z1, svfirstp_b16 (svptrue_b16 (), p0)), + z0 = svsub_x (svptrue_b16 (), z1, svfirstp_b16 (svptrue_b16 (), p0))); diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b32.c new file mode 100644 index 000000000000..668920bba167 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b32.c @@ -0,0 +1,192 @@ +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */ +/* { dg-additional-options "-msve-vector-bits=scalable" } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" +#include <stdbool.h> + +#pragma GCC target "+sve2p2" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+sme2p2" +#endif + +/* +** firstp_b32_32: +** firstp x0, p0, p1\.s +** ret +*/ +TEST_PTEST (firstp_b32_32, uint32_t, + x0 = svfirstp_b32 (p0, p1)); + +/* +** firstp_b32_64: +** firstp x0, p0, p1\.s +** ret +*/ +TEST_PTEST (firstp_b32_64, uint64_t, + x0 = svfirstp_b32 (p0, p1)); + +/* +** firstp_inc_b32_32_general_x0: +** firstp x([0-9]+), p0, p1\.s +** add w0, (w0, w\1|w\1, w0) +** ret +*/ +TEST_PTEST (firstp_inc_b32_32_general_x0, uint32_t, + x0 += svfirstp_b32 (p0, p1)); + +/* +** firstp_inc_b32_32_general_x1: +** firstp x([0-9]+), p0, p1\.s +** add w0, (w1, w\1|w\1, w1) +** ret +*/ +TEST_PTEST (firstp_inc_b32_32_general_x1, uint32_t, + x0 = x1 + svfirstp_b32 (p0, p1)); + +/* +** firstp_inc_b32_64_general_x0: +** firstp (x[0-9]+), p0, p1\.s +** add x0, (x0, \1|\1, x0) +** ret +*/ +TEST_PTEST (firstp_inc_b32_64_general_x0, uint64_t, + x0 += svfirstp_b32 (p0, p1)); + +/* +** firstp_inc_b32_64_general_x1: +** firstp (x[0-9]+), p0, p1\.s +** add x0, (x1, \1|\1, x1) +** ret +*/ +TEST_PTEST (firstp_inc_b32_64_general_x1, uint64_t, + x0 = x1 + svfirstp_b32 (p0, p1)); + +/* +** firstp_dec_b32_32_general_x0: +** firstp x([0-9]+), p0, p1\.s +** sub w0, w0, w\1 +** ret +*/ +TEST_PTEST (firstp_dec_b32_32_general_x0, uint32_t, + x0 -= svfirstp_b32 (p0, p1)); + +/* +** firstp_dec_b32_32_general_x1: +** firstp x([0-9]+), p0, p1\.s +** sub w0, w1, w\1 +** ret +*/ +TEST_PTEST (firstp_dec_b32_32_general_x1, uint32_t, + x0 = x1 - svfirstp_b32 (p0, p1)); + +/* +** firstp_dec_b32_64_general_x0: +** firstp (x[0-9]+), p0, p1\.s +** sub x0, x0, \1 +** ret +*/ +TEST_PTEST (firstp_dec_b32_64_general_x0, uint64_t, + x0 -= svfirstp_b32 (p0, p1)); + +/* +** firstp_dec_b32_64_general_x1: +** firstp (x[0-9]+), p0, p1\.s +** sub x0, x1, \1 +** ret +*/ +TEST_PTEST (firstp_dec_b32_64_general_x1, uint64_t, + x0 = x1 - svfirstp_b32 (p0, p1)); + +/* +** firstp_inc_b32_u32_general_z0: +** firstp x([0-9]+), p0, p1\.s +** mov (z[0-9]+\.s), w\1 +** add z0\.s, (z0\.s, \2|\2, z0\.s) +** ret +*/ +TEST_UNIFORM_Z (firstp_inc_b32_u32_general_z0, svuint32_t, + z0 = svadd_n_u32_x (svptrue_b32 (), z0, svfirstp_b32 (p0, p1)), + z0 = svadd_x (svptrue_b32 (), z0, svfirstp_b32 (p0, p1))); + +/* +** firstp_inc_b32_u32_general_z1: +** firstp x([0-9]+), p0, p1\.s +** mov (z[0-9]+\.s), w\1 +** add z0\.s, (z1\.s, \2|\2, z1\.s) +** ret +*/ +TEST_UNIFORM_Z (firstp_inc_b32_u32_general_z1, svuint32_t, + z0 = svadd_n_u32_x (svptrue_b32 (), z1, svfirstp_b32 (p0, p1)), + z0 = svadd_x (svptrue_b32 (), z1, svfirstp_b32 (p0, p1))); + +/* +** firstp_inc_b32_u32_ptrue_z0: +** ptrue (p[0-7])\.b, all +** firstp x([0-9]+), \1, p0\.s +** mov (z[0-9]+\.s), w\2 +** add z0\.s, (z0\.s, \3|\3, z0\.s) +** ret +*/ +TEST_UNIFORM_Z (firstp_inc_b32_u32_ptrue_z0, svuint32_t, + z0 = svadd_n_u32_x (svptrue_b32 (), z0, svfirstp_b32 (svptrue_b32 (), p0)), + z0 = svadd_x (svptrue_b32 (), z0, svfirstp_b32 (svptrue_b32 (), p0))); + +/* +** firstp_inc_b32_u32_ptrue_z1: +** ptrue (p[0-7])\.b, all +** firstp x([0-9]+), \1, p0\.s +** mov (z[0-9]+\.s), w\2 +** add z0\.s, (z1\.s, \3|\3, z1\.s) +** ret +*/ +TEST_UNIFORM_Z (firstp_inc_b32_u32_ptrue_z1, svuint32_t, + z0 = svadd_n_u32_x (svptrue_b32 (), z1, svfirstp_b32 (svptrue_b32 (), p0)), + z0 = svadd_x (svptrue_b32 (), z1, svfirstp_b32 (svptrue_b32 (), p0))); + +/* +** firstp_dec_b32_u32_general_z0: +** firstp x([0-9]+), p0, p1\.s +** mov (z[0-9]+\.s), w\1 +** sub z0\.s, z0\.s, \2 +** ret +*/ +TEST_UNIFORM_Z (firstp_dec_b32_u32_general_z0, svuint32_t, + z0 = svsub_n_u32_x (svptrue_b32 (), z0, svfirstp_b32 (p0, p1)), + z0 = svsub_x (svptrue_b32 (), z0, svfirstp_b32 (p0, p1))); + +/* +** firstp_dec_b32_u32_general_z1: +** firstp x([0-9]+), p0, p1\.s +** mov (z[0-9]+\.s), w\1 +** sub z0\.s, z1\.s, \2 +** ret +*/ +TEST_UNIFORM_Z (firstp_dec_b32_u32_general_z1, svuint32_t, + z0 = svsub_n_u32_x (svptrue_b32 (), z1, svfirstp_b32 (p0, p1)), + z0 = svsub_x (svptrue_b32 (), z1, svfirstp_b32 (p0, p1))); + +/* +** firstp_dec_b32_u32_ptrue_z0: +** ptrue (p[0-7])\.b, all +** firstp x([0-9]+), \1, p0\.s +** mov (z[0-9]+\.s), w\2 +** sub z0\.s, z0\.s, \3 +** ret +*/ +TEST_UNIFORM_Z (firstp_dec_b32_u32_ptrue_z0, svuint32_t, + z0 = svsub_n_u32_x (svptrue_b32 (), z0, svfirstp_b32 (svptrue_b32 (), p0)), + z0 = svsub_x (svptrue_b32 (), z0, svfirstp_b32 (svptrue_b32 (), p0))); + +/* +** firstp_dec_b32_u32_ptrue_z1: +** ptrue (p[0-7])\.b, all +** firstp x([0-9]+), \1, p0\.s +** mov (z[0-9]+\.s), w\2 +** sub z0\.s, z1\.s, \3 +** ret +*/ +TEST_UNIFORM_Z (firstp_dec_b32_u32_ptrue_z1, svuint32_t, + z0 = svsub_n_u32_x (svptrue_b32 (), z1, svfirstp_b32 (svptrue_b32 (), p0)), + z0 = svsub_x (svptrue_b32 (), z1, svfirstp_b32 (svptrue_b32 (), p0))); diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b64.c new file mode 100644 index 000000000000..330b0b047689 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b64.c @@ -0,0 +1,192 @@ +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */ +/* { dg-additional-options "-msve-vector-bits=scalable" } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" +#include <stdbool.h> + +#pragma GCC target "+sve2p2" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+sme2p2" +#endif + +/* +** firstp_b64_32: +** firstp x0, p0, p1\.d +** ret +*/ +TEST_PTEST (firstp_b64_32, uint32_t, + x0 = svfirstp_b64 (p0, p1)); + +/* +** firstp_b64_64: +** firstp x0, p0, p1\.d +** ret +*/ +TEST_PTEST (firstp_b64_64, uint64_t, + x0 = svfirstp_b64 (p0, p1)); + +/* +** firstp_inc_b64_32_general_x0: +** firstp x([0-9]+), p0, p1\.d +** add w0, (w0, w\1|w\1, w0) +** ret +*/ +TEST_PTEST (firstp_inc_b64_32_general_x0, uint32_t, + x0 += svfirstp_b64 (p0, p1)); + +/* +** firstp_inc_b64_32_general_x1: +** firstp x([0-9]+), p0, p1\.d +** add w0, (w1, w\1|w\1, w1) +** ret +*/ +TEST_PTEST (firstp_inc_b64_32_general_x1, uint32_t, + x0 = x1 + svfirstp_b64 (p0, p1)); + +/* +** firstp_inc_b64_64_general_x0: +** firstp (x[0-9]+), p0, p1\.d +** add x0, (x0, \1|\1, x0) +** ret +*/ +TEST_PTEST (firstp_inc_b64_64_general_x0, uint64_t, + x0 += svfirstp_b64 (p0, p1)); + +/* +** firstp_inc_b64_64_general_x1: +** firstp (x[0-9]+), p0, p1\.d +** add x0, (x1, \1|\1, x1) +** ret +*/ +TEST_PTEST (firstp_inc_b64_64_general_x1, uint64_t, + x0 = x1 + svfirstp_b64 (p0, p1)); + +/* +** firstp_dec_b64_32_general_x0: +** firstp x([0-9]+), p0, p1\.d +** sub w0, w0, w\1 +** ret +*/ +TEST_PTEST (firstp_dec_b64_32_general_x0, uint32_t, + x0 -= svfirstp_b64 (p0, p1)); + +/* +** firstp_dec_b64_32_general_x1: +** firstp x([0-9]+), p0, p1\.d +** sub w0, w1, w\1 +** ret +*/ +TEST_PTEST (firstp_dec_b64_32_general_x1, uint32_t, + x0 = x1 - svfirstp_b64 (p0, p1)); + +/* +** firstp_dec_b64_64_general_x0: +** firstp (x[0-9]+), p0, p1\.d +** sub x0, x0, \1 +** ret +*/ +TEST_PTEST (firstp_dec_b64_64_general_x0, uint64_t, + x0 -= svfirstp_b64 (p0, p1)); + +/* +** firstp_dec_b64_64_general_x1: +** firstp (x[0-9]+), p0, p1\.d +** sub x0, x1, \1 +** ret +*/ +TEST_PTEST (firstp_dec_b64_64_general_x1, uint64_t, + x0 = x1 - svfirstp_b64 (p0, p1)); + +/* +** firstp_inc_b64_u64_general_z0: +** firstp (x[0-9]+), p0, p1\.d +** mov (z[0-9]+\.d), \1 +** add z0\.d, (z0\.d, \2|\2, z0\.d) +** ret +*/ +TEST_UNIFORM_Z (firstp_inc_b64_u64_general_z0, svuint64_t, + z0 = svadd_n_u64_x (svptrue_b64 (), z0, svfirstp_b64 (p0, p1)), + z0 = svadd_x (svptrue_b64 (), z0, svfirstp_b64 (p0, p1))); + +/* +** firstp_inc_b64_u64_general_z1: +** firstp (x[0-9]+), p0, p1\.d +** mov (z[0-9]+\.d), \1 +** add z0\.d, (z1\.d, \2|\2, z1\.d) +** ret +*/ +TEST_UNIFORM_Z (firstp_inc_b64_u64_general_z1, svuint64_t, + z0 = svadd_n_u64_x (svptrue_b64 (), z1, svfirstp_b64 (p0, p1)), + z0 = svadd_x (svptrue_b64 (), z1, svfirstp_b64 (p0, p1))); + +/* +** firstp_inc_b64_u64_ptrue_z0: +** ptrue (p[0-7])\.b, all +** firstp (x[0-9]+), \1, p0\.d +** mov (z[0-9]+\.d), \2 +** add z0\.d, (z0\.d, \3|\3, z0\.d) +** ret +*/ +TEST_UNIFORM_Z (firstp_inc_b64_u64_ptrue_z0, svuint64_t, + z0 = svadd_n_u64_x (svptrue_b64 (), z0, svfirstp_b64 (svptrue_b64 (), p0)), + z0 = svadd_x (svptrue_b64 (), z0, svfirstp_b64 (svptrue_b64 (), p0))); + +/* +** firstp_inc_b64_u64_ptrue_z1: +** ptrue (p[0-7])\.b, all +** firstp (x[0-9]+), \1, p0\.d +** mov (z[0-9]+\.d), \2 +** add z0\.d, (z1\.d, \3|\3, z1\.d) +** ret +*/ +TEST_UNIFORM_Z (firstp_inc_b64_u64_ptrue_z1, svuint64_t, + z0 = svadd_n_u64_x (svptrue_b64 (), z1, svfirstp_b64 (svptrue_b64 (), p0)), + z0 = svadd_x (svptrue_b64 (), z1, svfirstp_b64 (svptrue_b64 (), p0))); + +/* +** firstp_dec_b64_u64_general_z0: +** firstp (x[0-9]+), p0, p1\.d +** mov (z[0-9]+\.d), \1 +** sub z0\.d, z0\.d, \2 +** ret +*/ +TEST_UNIFORM_Z (firstp_dec_b64_u64_general_z0, svuint64_t, + z0 = svsub_n_u64_x (svptrue_b64 (), z0, svfirstp_b64 (p0, p1)), + z0 = svsub_x (svptrue_b64 (), z0, svfirstp_b64 (p0, p1))); + +/* +** firstp_dec_b64_u64_general_z1: +** firstp (x[0-9]+), p0, p1\.d +** mov (z[0-9]+\.d), \1 +** sub z0\.d, z1\.d, \2 +** ret +*/ +TEST_UNIFORM_Z (firstp_dec_b64_u64_general_z1, svuint64_t, + z0 = svsub_n_u64_x (svptrue_b64 (), z1, svfirstp_b64 (p0, p1)), + z0 = svsub_x (svptrue_b64 (), z1, svfirstp_b64 (p0, p1))); + +/* +** firstp_dec_b64_u64_ptrue_z0: +** ptrue (p[0-7])\.b, all +** firstp (x[0-9]+), \1, p0\.d +** mov (z[0-9]+\.d), \2 +** sub z0\.d, z0\.d, \3 +** ret +*/ +TEST_UNIFORM_Z (firstp_dec_b64_u64_ptrue_z0, svuint64_t, + z0 = svsub_n_u64_x (svptrue_b64 (), z0, svfirstp_b64 (svptrue_b64 (), p0)), + z0 = svsub_x (svptrue_b64 (), z0, svfirstp_b64 (svptrue_b64 (), p0))); + +/* +** firstp_dec_b64_u64_ptrue_z1: +** ptrue (p[0-7])\.b, all +** firstp (x[0-9]+), \1, p0\.d +** mov (z[0-9]+\.d), \2 +** sub z0\.d, z1\.d, \3 +** ret +*/ +TEST_UNIFORM_Z (firstp_dec_b64_u64_ptrue_z1, svuint64_t, + z0 = svsub_n_u64_x (svptrue_b64 (), z1, svfirstp_b64 (svptrue_b64 (), p0)), + z0 = svsub_x (svptrue_b64 (), z1, svfirstp_b64 (svptrue_b64 (), p0))); diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b8.c new file mode 100644 index 000000000000..653d903577ac --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/firstp_b8.c @@ -0,0 +1,192 @@ +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */ +/* { dg-additional-options "-msve-vector-bits=scalable" } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" +#include <stdbool.h> + +#pragma GCC target "+sve2p2" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+sme2p2" +#endif + +/* +** firstp_b8_32: +** firstp x0, p0, p1\.b +** ret +*/ +TEST_PTEST (firstp_b8_32, uint32_t, + x0 = svfirstp_b8 (p0, p1)); + +/* +** firstp_b8_64: +** firstp x0, p0, p1\.b +** ret +*/ +TEST_PTEST (firstp_b8_64, uint64_t, + x0 = svfirstp_b8 (p0, p1)); + +/* +** firstp_inc_b8_32_general_x0: +** firstp x([0-9]+), p0, p1\.b +** add w0, (w0, w\1|w\1, w0) +** ret +*/ +TEST_PTEST (firstp_inc_b8_32_general_x0, uint32_t, + x0 += svfirstp_b8 (p0, p1)); + +/* +** firstp_inc_b8_32_general_x1: +** firstp x([0-9]+), p0, p1\.b +** add w0, (w1, w\1|w\1, w1) +** ret +*/ +TEST_PTEST (firstp_inc_b8_32_general_x1, uint32_t, + x0 = x1 + svfirstp_b8 (p0, p1)); + +/* +** firstp_inc_b8_64_general_x0: +** firstp (x[0-9]+), p0, p1\.b +** add x0, (x0, \1|\1, x0) +** ret +*/ +TEST_PTEST (firstp_inc_b8_64_general_x0, uint64_t, + x0 += svfirstp_b8 (p0, p1)); + +/* +** firstp_inc_b8_64_general_x1: +** firstp (x[0-9]+), p0, p1\.b +** add x0, (x1, \1|\1, x1) +** ret +*/ +TEST_PTEST (firstp_inc_b8_64_general_x1, uint64_t, + x0 = x1 + svfirstp_b8 (p0, p1)); + +/* +** firstp_dec_b8_32_general_x0: +** firstp x([0-9]+), p0, p1\.b +** sub w0, w0, w\1 +** ret +*/ +TEST_PTEST (firstp_dec_b8_32_general_x0, uint32_t, + x0 -= svfirstp_b8 (p0, p1)); + +/* +** firstp_dec_b8_32_general_x1: +** firstp x([0-9]+), p0, p1\.b +** sub w0, w1, w\1 +** ret +*/ +TEST_PTEST (firstp_dec_b8_32_general_x1, uint32_t, + x0 = x1 - svfirstp_b8 (p0, p1)); + +/* +** firstp_dec_b8_64_general_x0: +** firstp (x[0-9]+), p0, p1\.b +** sub x0, x0, \1 +** ret +*/ +TEST_PTEST (firstp_dec_b8_64_general_x0, uint64_t, + x0 -= svfirstp_b8 (p0, p1)); + +/* +** firstp_dec_b8_64_general_x1: +** firstp (x[0-9]+), p0, p1\.b +** sub x0, x1, \1 +** ret +*/ +TEST_PTEST (firstp_dec_b8_64_general_x1, uint64_t, + x0 = x1 - svfirstp_b8 (p0, p1)); + +/* +** firstp_inc_b8_u8_general_z0: +** firstp x([0-9]+), p0, p1\.b +** mov (z[0-9]+\.b), w\1 +** add z0\.b, (z0\.b, \2|\2, z0\.b) +** ret +*/ +TEST_UNIFORM_Z (firstp_inc_b8_u8_general_z0, svuint8_t, + z0 = svadd_n_u8_x (svptrue_b8 (), z0, svfirstp_b8 (p0, p1)), + z0 = svadd_x (svptrue_b8 (), z0, svfirstp_b8 (p0, p1))); + +/* +** firstp_inc_b8_u8_general_z1: +** firstp x([0-9]+), p0, p1\.b +** mov (z[0-9]+\.b), w\1 +** add z0\.b, (z1\.b, \2|\2, z1\.b) +** ret +*/ +TEST_UNIFORM_Z (firstp_inc_b8_u8_general_z1, svuint8_t, + z0 = svadd_n_u8_x (svptrue_b8 (), z1, svfirstp_b8 (p0, p1)), + z0 = svadd_x (svptrue_b8 (), z1, svfirstp_b8 (p0, p1))); + +/* +** firstp_inc_b8_u8_ptrue_z0: +** ptrue (p[0-7])\.b, all +** firstp x([0-9]+), \1, p0\.b +** mov (z[0-9]+\.b), w\2 +** add z0\.b, (z0\.b, \3|\3, z0\.b) +** ret +*/ +TEST_UNIFORM_Z (firstp_inc_b8_u8_ptrue_z0, svuint8_t, + z0 = svadd_n_u8_x (svptrue_b8 (), z0, svfirstp_b8 (svptrue_b8 (), p0)), + z0 = svadd_x (svptrue_b8 (), z0, svfirstp_b8 (svptrue_b8 (), p0))); + +/* +** firstp_inc_b8_u8_ptrue_z1: +** ptrue (p[0-7])\.b, all +** firstp x([0-9]+), \1, p0\.b +** mov (z[0-9]+\.b), w\2 +** add z0\.b, (z1\.b, \3|\3, z1\.b) +** ret +*/ +TEST_UNIFORM_Z (firstp_inc_b8_u8_ptrue_z1, svuint8_t, + z0 = svadd_n_u8_x (svptrue_b8 (), z1, svfirstp_b8 (svptrue_b8 (), p0)), + z0 = svadd_x (svptrue_b8 (), z1, svfirstp_b8 (svptrue_b8 (), p0))); + +/* +** firstp_dec_b8_u8_general_z0: +** firstp x([0-9]+), p0, p1\.b +** mov (z[0-9]+\.b), w\1 +** sub z0\.b, z0\.b, \2 +** ret +*/ +TEST_UNIFORM_Z (firstp_dec_b8_u8_general_z0, svuint8_t, + z0 = svsub_n_u8_x (svptrue_b8 (), z0, svfirstp_b8 (p0, p1)), + z0 = svsub_x (svptrue_b8 (), z0, svfirstp_b8 (p0, p1))); + +/* +** firstp_dec_b8_u8_general_z1: +** firstp x([0-9]+), p0, p1\.b +** mov (z[0-9]+\.b), w\1 +** sub z0\.b, z1\.b, \2 +** ret +*/ +TEST_UNIFORM_Z (firstp_dec_b8_u8_general_z1, svuint8_t, + z0 = svsub_n_u8_x (svptrue_b8 (), z1, svfirstp_b8 (p0, p1)), + z0 = svsub_x (svptrue_b8 (), z1, svfirstp_b8 (p0, p1))); + +/* +** firstp_dec_b8_u8_ptrue_z0: +** ptrue (p[0-7])\.b, all +** firstp x([0-9]+), \1, p0\.b +** mov (z[0-9]+\.b), w\2 +** sub z0\.b, z0\.b, \3 +** ret +*/ +TEST_UNIFORM_Z (firstp_dec_b8_u8_ptrue_z0, svuint8_t, + z0 = svsub_n_u8_x (svptrue_b8 (), z0, svfirstp_b8 (svptrue_b8 (), p0)), + z0 = svsub_x (svptrue_b8 (), z0, svfirstp_b8 (svptrue_b8 (), p0))); + +/* +** firstp_dec_b8_u8_ptrue_z1: +** ptrue (p[0-7])\.b, all +** firstp x([0-9]+), \1, p0\.b +** mov (z[0-9]+\.b), w\2 +** sub z0\.b, z1\.b, \3 +** ret +*/ +TEST_UNIFORM_Z (firstp_dec_b8_u8_ptrue_z1, svuint8_t, + z0 = svsub_n_u8_x (svptrue_b8 (), z1, svfirstp_b8 (svptrue_b8 (), p0)), + z0 = svsub_x (svptrue_b8 (), z1, svfirstp_b8 (svptrue_b8 (), p0))); diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b16.c new file mode 100644 index 000000000000..e70df211cf99 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b16.c @@ -0,0 +1,192 @@ +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */ +/* { dg-additional-options "-msve-vector-bits=scalable" } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" +#include <stdbool.h> + +#pragma GCC target "+sve2p2" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+sme2p2" +#endif + +/* +** lastp_b16_32: +** lastp x0, p0, p1\.h +** ret +*/ +TEST_PTEST (lastp_b16_32, uint32_t, + x0 = svlastp_b16 (p0, p1)); + +/* +** lastp_b16_64: +** lastp x0, p0, p1\.h +** ret +*/ +TEST_PTEST (lastp_b16_64, uint64_t, + x0 = svlastp_b16 (p0, p1)); + +/* +** lastp_inc_b16_32_general_x0: +** lastp x([0-9]+), p0, p1\.h +** add w0, (w0, w\1|w\1, w0) +** ret +*/ +TEST_PTEST (lastp_inc_b16_32_general_x0, uint32_t, + x0 += svlastp_b16 (p0, p1)); + +/* +** lastp_inc_b16_32_general_x1: +** lastp x([0-9]+), p0, p1\.h +** add w0, (w1, w\1|w\1, w1) +** ret +*/ +TEST_PTEST (lastp_inc_b16_32_general_x1, uint32_t, + x0 = x1 + svlastp_b16 (p0, p1)); + +/* +** lastp_inc_b16_64_general_x0: +** lastp (x[0-9]+), p0, p1\.h +** add x0, (x0, \1|\1, x0) +** ret +*/ +TEST_PTEST (lastp_inc_b16_64_general_x0, uint64_t, + x0 += svlastp_b16 (p0, p1)); + +/* +** lastp_inc_b16_64_general_x1: +** lastp (x[0-9]+), p0, p1\.h +** add x0, (x1, \1|\1, x1) +** ret +*/ +TEST_PTEST (lastp_inc_b16_64_general_x1, uint64_t, + x0 = x1 + svlastp_b16 (p0, p1)); + +/* +** lastp_dec_b16_32_general_x0: +** lastp x([0-9]+), p0, p1\.h +** sub w0, w0, w\1 +** ret +*/ +TEST_PTEST (lastp_dec_b16_32_general_x0, uint32_t, + x0 -= svlastp_b16 (p0, p1)); + +/* +** lastp_dec_b16_32_general_x1: +** lastp x([0-9]+), p0, p1\.h +** sub w0, w1, w\1 +** ret +*/ +TEST_PTEST (lastp_dec_b16_32_general_x1, uint32_t, + x0 = x1 - svlastp_b16 (p0, p1)); + +/* +** lastp_dec_b16_64_general_x0: +** lastp (x[0-9]+), p0, p1\.h +** sub x0, x0, \1 +** ret +*/ +TEST_PTEST (lastp_dec_b16_64_general_x0, uint64_t, + x0 -= svlastp_b16 (p0, p1)); + +/* +** lastp_dec_b16_64_general_x1: +** lastp (x[0-9]+), p0, p1\.h +** sub x0, x1, \1 +** ret +*/ +TEST_PTEST (lastp_dec_b16_64_general_x1, uint64_t, + x0 = x1 - svlastp_b16 (p0, p1)); + +/* +** lastp_inc_b16_u16_general_z0: +** lastp x([0-9]+), p0, p1\.h +** mov (z[0-9]+\.h), w\1 +** add z0\.h, (z0\.h, \2|\2, z0\.h) +** ret +*/ +TEST_UNIFORM_Z (lastp_inc_b16_u16_general_z0, svuint16_t, + z0 = svadd_n_u16_x (svptrue_b16 (), z0, svlastp_b16 (p0, p1)), + z0 = svadd_x (svptrue_b16 (), z0, svlastp_b16 (p0, p1))); + +/* +** lastp_inc_b16_u16_general_z1: +** lastp x([0-9]+), p0, p1\.h +** mov (z[0-9]+\.h), w\1 +** add z0\.h, (z1\.h, \2|\2, z1\.h) +** ret +*/ +TEST_UNIFORM_Z (lastp_inc_b16_u16_general_z1, svuint16_t, + z0 = svadd_n_u16_x (svptrue_b16 (), z1, svlastp_b16 (p0, p1)), + z0 = svadd_x (svptrue_b16 (), z1, svlastp_b16 (p0, p1))); + +/* +** lastp_inc_b16_u16_ptrue_z0: +** ptrue (p[0-7])\.b, all +** lastp x([0-9]+), \1, p0\.h +** mov (z[0-9]+\.h), w\2 +** add z0\.h, (z0\.h, \3|\3, z0\.h) +** ret +*/ +TEST_UNIFORM_Z (lastp_inc_b16_u16_ptrue_z0, svuint16_t, + z0 = svadd_n_u16_x (svptrue_b16 (), z0, svlastp_b16 (svptrue_b16 (), p0)), + z0 = svadd_x (svptrue_b16 (), z0, svlastp_b16 (svptrue_b16 (), p0))); + +/* +** lastp_inc_b16_u16_ptrue_z1: +** ptrue (p[0-7])\.b, all +** lastp x([0-9]+), \1, p0\.h +** mov (z[0-9]+\.h), w\2 +** add z0\.h, (z1\.h, \3|\3, z1\.h) +** ret +*/ +TEST_UNIFORM_Z (lastp_inc_b16_u16_ptrue_z1, svuint16_t, + z0 = svadd_n_u16_x (svptrue_b16 (), z1, svlastp_b16 (svptrue_b16 (), p0)), + z0 = svadd_x (svptrue_b16 (), z1, svlastp_b16 (svptrue_b16 (), p0))); + +/* +** lastp_dec_b16_u16_general_z0: +** lastp x([0-9]+), p0, p1\.h +** mov (z[0-9]+\.h), w\1 +** sub z0\.h, z0\.h, \2 +** ret +*/ +TEST_UNIFORM_Z (lastp_dec_b16_u16_general_z0, svuint16_t, + z0 = svsub_n_u16_x (svptrue_b16 (), z0, svlastp_b16 (p0, p1)), + z0 = svsub_x (svptrue_b16 (), z0, svlastp_b16 (p0, p1))); + +/* +** lastp_dec_b16_u16_general_z1: +** lastp x([0-9]+), p0, p1\.h +** mov (z[0-9]+\.h), w\1 +** sub z0\.h, z1\.h, \2 +** ret +*/ +TEST_UNIFORM_Z (lastp_dec_b16_u16_general_z1, svuint16_t, + z0 = svsub_n_u16_x (svptrue_b16 (), z1, svlastp_b16 (p0, p1)), + z0 = svsub_x (svptrue_b16 (), z1, svlastp_b16 (p0, p1))); + +/* +** lastp_dec_b16_u16_ptrue_z0: +** ptrue (p[0-7])\.b, all +** lastp x([0-9]+), \1, p0\.h +** mov (z[0-9]+\.h), w\2 +** sub z0\.h, z0\.h, \3 +** ret +*/ +TEST_UNIFORM_Z (lastp_dec_b16_u16_ptrue_z0, svuint16_t, + z0 = svsub_n_u16_x (svptrue_b16 (), z0, svlastp_b16 (svptrue_b16 (), p0)), + z0 = svsub_x (svptrue_b16 (), z0, svlastp_b16 (svptrue_b16 (), p0))); + +/* +** lastp_dec_b16_u16_ptrue_z1: +** ptrue (p[0-7])\.b, all +** lastp x([0-9]+), \1, p0\.h +** mov (z[0-9]+\.h), w\2 +** sub z0\.h, z1\.h, \3 +** ret +*/ +TEST_UNIFORM_Z (lastp_dec_b16_u16_ptrue_z1, svuint16_t, + z0 = svsub_n_u16_x (svptrue_b16 (), z1, svlastp_b16 (svptrue_b16 (), p0)), + z0 = svsub_x (svptrue_b16 (), z1, svlastp_b16 (svptrue_b16 (), p0))); diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b32.c new file mode 100644 index 000000000000..b5b64407f7ba --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b32.c @@ -0,0 +1,192 @@ +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */ +/* { dg-additional-options "-msve-vector-bits=scalable" } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" +#include <stdbool.h> + +#pragma GCC target "+sve2p2" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+sme2p2" +#endif + +/* +** lastp_b32_32: +** lastp x0, p0, p1\.s +** ret +*/ +TEST_PTEST (lastp_b32_32, uint32_t, + x0 = svlastp_b32 (p0, p1)); + +/* +** lastp_b32_64: +** lastp x0, p0, p1\.s +** ret +*/ +TEST_PTEST (lastp_b32_64, uint64_t, + x0 = svlastp_b32 (p0, p1)); + +/* +** lastp_inc_b32_32_general_x0: +** lastp x([0-9]+), p0, p1\.s +** add w0, (w0, w\1|w\1, w0) +** ret +*/ +TEST_PTEST (lastp_inc_b32_32_general_x0, uint32_t, + x0 += svlastp_b32 (p0, p1)); + +/* +** lastp_inc_b32_32_general_x1: +** lastp x([0-9]+), p0, p1\.s +** add w0, (w1, w\1|w\1, w1) +** ret +*/ +TEST_PTEST (lastp_inc_b32_32_general_x1, uint32_t, + x0 = x1 + svlastp_b32 (p0, p1)); + +/* +** lastp_inc_b32_64_general_x0: +** lastp (x[0-9]+), p0, p1\.s +** add x0, (x0, \1|\1, x0) +** ret +*/ +TEST_PTEST (lastp_inc_b32_64_general_x0, uint64_t, + x0 += svlastp_b32 (p0, p1)); + +/* +** lastp_inc_b32_64_general_x1: +** lastp (x[0-9]+), p0, p1\.s +** add x0, (x1, \1|\1, x1) +** ret +*/ +TEST_PTEST (lastp_inc_b32_64_general_x1, uint64_t, + x0 = x1 + svlastp_b32 (p0, p1)); + +/* +** lastp_dec_b32_32_general_x0: +** lastp x([0-9]+), p0, p1\.s +** sub w0, w0, w\1 +** ret +*/ +TEST_PTEST (lastp_dec_b32_32_general_x0, uint32_t, + x0 -= svlastp_b32 (p0, p1)); + +/* +** lastp_dec_b32_32_general_x1: +** lastp x([0-9]+), p0, p1\.s +** sub w0, w1, w\1 +** ret +*/ +TEST_PTEST (lastp_dec_b32_32_general_x1, uint32_t, + x0 = x1 - svlastp_b32 (p0, p1)); + +/* +** lastp_dec_b32_64_general_x0: +** lastp (x[0-9]+), p0, p1\.s +** sub x0, x0, \1 +** ret +*/ +TEST_PTEST (lastp_dec_b32_64_general_x0, uint64_t, + x0 -= svlastp_b32 (p0, p1)); + +/* +** lastp_dec_b32_64_general_x1: +** lastp (x[0-9]+), p0, p1\.s +** sub x0, x1, \1 +** ret +*/ +TEST_PTEST (lastp_dec_b32_64_general_x1, uint64_t, + x0 = x1 - svlastp_b32 (p0, p1)); + +/* +** lastp_inc_b32_u32_general_z0: +** lastp x([0-9]+), p0, p1\.s +** mov (z[0-9]+\.s), w\1 +** add z0\.s, (z0\.s, \2|\2, z0\.s) +** ret +*/ +TEST_UNIFORM_Z (lastp_inc_b32_u32_general_z0, svuint32_t, + z0 = svadd_n_u32_x (svptrue_b32 (), z0, svlastp_b32 (p0, p1)), + z0 = svadd_x (svptrue_b32 (), z0, svlastp_b32 (p0, p1))); + +/* +** lastp_inc_b32_u32_general_z1: +** lastp x([0-9]+), p0, p1\.s +** mov (z[0-9]+\.s), w\1 +** add z0\.s, (z1\.s, \2|\2, z1\.s) +** ret +*/ +TEST_UNIFORM_Z (lastp_inc_b32_u32_general_z1, svuint32_t, + z0 = svadd_n_u32_x (svptrue_b32 (), z1, svlastp_b32 (p0, p1)), + z0 = svadd_x (svptrue_b32 (), z1, svlastp_b32 (p0, p1))); + +/* +** lastp_inc_b32_u32_ptrue_z0: +** ptrue (p[0-7])\.b, all +** lastp x([0-9]+), \1, p0\.s +** mov (z[0-9]+\.s), w\2 +** add z0\.s, (z0\.s, \3|\3, z0\.s) +** ret +*/ +TEST_UNIFORM_Z (lastp_inc_b32_u32_ptrue_z0, svuint32_t, + z0 = svadd_n_u32_x (svptrue_b32 (), z0, svlastp_b32 (svptrue_b32 (), p0)), + z0 = svadd_x (svptrue_b32 (), z0, svlastp_b32 (svptrue_b32 (), p0))); + +/* +** lastp_inc_b32_u32_ptrue_z1: +** ptrue (p[0-7])\.b, all +** lastp x([0-9]+), \1, p0\.s +** mov (z[0-9]+\.s), w\2 +** add z0\.s, (z1\.s, \3|\3, z1\.s) +** ret +*/ +TEST_UNIFORM_Z (lastp_inc_b32_u32_ptrue_z1, svuint32_t, + z0 = svadd_n_u32_x (svptrue_b32 (), z1, svlastp_b32 (svptrue_b32 (), p0)), + z0 = svadd_x (svptrue_b32 (), z1, svlastp_b32 (svptrue_b32 (), p0))); + +/* +** lastp_dec_b32_u32_general_z0: +** lastp x([0-9]+), p0, p1\.s +** mov (z[0-9]+\.s), w\1 +** sub z0\.s, z0\.s, \2 +** ret +*/ +TEST_UNIFORM_Z (lastp_dec_b32_u32_general_z0, svuint32_t, + z0 = svsub_n_u32_x (svptrue_b32 (), z0, svlastp_b32 (p0, p1)), + z0 = svsub_x (svptrue_b32 (), z0, svlastp_b32 (p0, p1))); + +/* +** lastp_dec_b32_u32_general_z1: +** lastp x([0-9]+), p0, p1\.s +** mov (z[0-9]+\.s), w\1 +** sub z0\.s, z1\.s, \2 +** ret +*/ +TEST_UNIFORM_Z (lastp_dec_b32_u32_general_z1, svuint32_t, + z0 = svsub_n_u32_x (svptrue_b32 (), z1, svlastp_b32 (p0, p1)), + z0 = svsub_x (svptrue_b32 (), z1, svlastp_b32 (p0, p1))); + +/* +** lastp_dec_b32_u32_ptrue_z0: +** ptrue (p[0-7])\.b, all +** lastp x([0-9]+), \1, p0\.s +** mov (z[0-9]+\.s), w\2 +** sub z0\.s, z0\.s, \3 +** ret +*/ +TEST_UNIFORM_Z (lastp_dec_b32_u32_ptrue_z0, svuint32_t, + z0 = svsub_n_u32_x (svptrue_b32 (), z0, svlastp_b32 (svptrue_b32 (), p0)), + z0 = svsub_x (svptrue_b32 (), z0, svlastp_b32 (svptrue_b32 (), p0))); + +/* +** lastp_dec_b32_u32_ptrue_z1: +** ptrue (p[0-7])\.b, all +** lastp x([0-9]+), \1, p0\.s +** mov (z[0-9]+\.s), w\2 +** sub z0\.s, z1\.s, \3 +** ret +*/ +TEST_UNIFORM_Z (lastp_dec_b32_u32_ptrue_z1, svuint32_t, + z0 = svsub_n_u32_x (svptrue_b32 (), z1, svlastp_b32 (svptrue_b32 (), p0)), + z0 = svsub_x (svptrue_b32 (), z1, svlastp_b32 (svptrue_b32 (), p0))); diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b64.c new file mode 100644 index 000000000000..343be3da9f85 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b64.c @@ -0,0 +1,192 @@ +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */ +/* { dg-additional-options "-msve-vector-bits=scalable" } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" +#include <stdbool.h> + +#pragma GCC target "+sve2p2" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+sme2p2" +#endif + +/* +** lastp_b64_32: +** lastp x0, p0, p1\.d +** ret +*/ +TEST_PTEST (lastp_b64_32, uint32_t, + x0 = svlastp_b64 (p0, p1)); + +/* +** lastp_b64_64: +** lastp x0, p0, p1\.d +** ret +*/ +TEST_PTEST (lastp_b64_64, uint64_t, + x0 = svlastp_b64 (p0, p1)); + +/* +** lastp_inc_b64_32_general_x0: +** lastp x([0-9]+), p0, p1\.d +** add w0, (w0, w\1|w\1, w0) +** ret +*/ +TEST_PTEST (lastp_inc_b64_32_general_x0, uint32_t, + x0 += svlastp_b64 (p0, p1)); + +/* +** lastp_inc_b64_32_general_x1: +** lastp x([0-9]+), p0, p1\.d +** add w0, (w1, w\1|w\1, w1) +** ret +*/ +TEST_PTEST (lastp_inc_b64_32_general_x1, uint32_t, + x0 = x1 + svlastp_b64 (p0, p1)); + +/* +** lastp_inc_b64_64_general_x0: +** lastp (x[0-9]+), p0, p1\.d +** add x0, (x0, \1|\1, x0) +** ret +*/ +TEST_PTEST (lastp_inc_b64_64_general_x0, uint64_t, + x0 += svlastp_b64 (p0, p1)); + +/* +** lastp_inc_b64_64_general_x1: +** lastp (x[0-9]+), p0, p1\.d +** add x0, (x1, \1|\1, x1) +** ret +*/ +TEST_PTEST (lastp_inc_b64_64_general_x1, uint64_t, + x0 = x1 + svlastp_b64 (p0, p1)); + +/* +** lastp_dec_b64_32_general_x0: +** lastp x([0-9]+), p0, p1\.d +** sub w0, w0, w\1 +** ret +*/ +TEST_PTEST (lastp_dec_b64_32_general_x0, uint32_t, + x0 -= svlastp_b64 (p0, p1)); + +/* +** lastp_dec_b64_32_general_x1: +** lastp x([0-9]+), p0, p1\.d +** sub w0, w1, w\1 +** ret +*/ +TEST_PTEST (lastp_dec_b64_32_general_x1, uint32_t, + x0 = x1 - svlastp_b64 (p0, p1)); + +/* +** lastp_dec_b64_64_general_x0: +** lastp (x[0-9]+), p0, p1\.d +** sub x0, x0, \1 +** ret +*/ +TEST_PTEST (lastp_dec_b64_64_general_x0, uint64_t, + x0 -= svlastp_b64 (p0, p1)); + +/* +** lastp_dec_b64_64_general_x1: +** lastp (x[0-9]+), p0, p1\.d +** sub x0, x1, \1 +** ret +*/ +TEST_PTEST (lastp_dec_b64_64_general_x1, uint64_t, + x0 = x1 - svlastp_b64 (p0, p1)); + +/* +** lastp_inc_b64_u64_general_z0: +** lastp (x[0-9]+), p0, p1\.d +** mov (z[0-9]+\.d), \1 +** add z0\.d, (z0\.d, \2|\2, z0\.d) +** ret +*/ +TEST_UNIFORM_Z (lastp_inc_b64_u64_general_z0, svuint64_t, + z0 = svadd_n_u64_x (svptrue_b64 (), z0, svlastp_b64 (p0, p1)), + z0 = svadd_x (svptrue_b64 (), z0, svlastp_b64 (p0, p1))); + +/* +** lastp_inc_b64_u64_general_z1: +** lastp (x[0-9]+), p0, p1\.d +** mov (z[0-9]+\.d), \1 +** add z0\.d, (z1\.d, \2|\2, z1\.d) +** ret +*/ +TEST_UNIFORM_Z (lastp_inc_b64_u64_general_z1, svuint64_t, + z0 = svadd_n_u64_x (svptrue_b64 (), z1, svlastp_b64 (p0, p1)), + z0 = svadd_x (svptrue_b64 (), z1, svlastp_b64 (p0, p1))); + +/* +** lastp_inc_b64_u64_ptrue_z0: +** ptrue (p[0-7])\.b, all +** lastp (x[0-9]+), \1, p0\.d +** mov (z[0-9]+\.d), \2 +** add z0\.d, (z0\.d, \3|\3, z0\.d) +** ret +*/ +TEST_UNIFORM_Z (lastp_inc_b64_u64_ptrue_z0, svuint64_t, + z0 = svadd_n_u64_x (svptrue_b64 (), z0, svlastp_b64 (svptrue_b64 (), p0)), + z0 = svadd_x (svptrue_b64 (), z0, svlastp_b64 (svptrue_b64 (), p0))); + +/* +** lastp_inc_b64_u64_ptrue_z1: +** ptrue (p[0-7])\.b, all +** lastp (x[0-9]+), \1, p0\.d +** mov (z[0-9]+\.d), \2 +** add z0\.d, (z1\.d, \3|\3, z1\.d) +** ret +*/ +TEST_UNIFORM_Z (lastp_inc_b64_u64_ptrue_z1, svuint64_t, + z0 = svadd_n_u64_x (svptrue_b64 (), z1, svlastp_b64 (svptrue_b64 (), p0)), + z0 = svadd_x (svptrue_b64 (), z1, svlastp_b64 (svptrue_b64 (), p0))); + +/* +** lastp_dec_b64_u64_general_z0: +** lastp (x[0-9]+), p0, p1\.d +** mov (z[0-9]+\.d), \1 +** sub z0\.d, z0\.d, \2 +** ret +*/ +TEST_UNIFORM_Z (lastp_dec_b64_u64_general_z0, svuint64_t, + z0 = svsub_n_u64_x (svptrue_b64 (), z0, svlastp_b64 (p0, p1)), + z0 = svsub_x (svptrue_b64 (), z0, svlastp_b64 (p0, p1))); + +/* +** lastp_dec_b64_u64_general_z1: +** lastp (x[0-9]+), p0, p1\.d +** mov (z[0-9]+\.d), \1 +** sub z0\.d, z1\.d, \2 +** ret +*/ +TEST_UNIFORM_Z (lastp_dec_b64_u64_general_z1, svuint64_t, + z0 = svsub_n_u64_x (svptrue_b64 (), z1, svlastp_b64 (p0, p1)), + z0 = svsub_x (svptrue_b64 (), z1, svlastp_b64 (p0, p1))); + +/* +** lastp_dec_b64_u64_ptrue_z0: +** ptrue (p[0-7])\.b, all +** lastp (x[0-9]+), \1, p0\.d +** mov (z[0-9]+\.d), \2 +** sub z0\.d, z0\.d, \3 +** ret +*/ +TEST_UNIFORM_Z (lastp_dec_b64_u64_ptrue_z0, svuint64_t, + z0 = svsub_n_u64_x (svptrue_b64 (), z0, svlastp_b64 (svptrue_b64 (), p0)), + z0 = svsub_x (svptrue_b64 (), z0, svlastp_b64 (svptrue_b64 (), p0))); + +/* +** lastp_dec_b64_u64_ptrue_z1: +** ptrue (p[0-7])\.b, all +** lastp (x[0-9]+), \1, p0\.d +** mov (z[0-9]+\.d), \2 +** sub z0\.d, z1\.d, \3 +** ret +*/ +TEST_UNIFORM_Z (lastp_dec_b64_u64_ptrue_z1, svuint64_t, + z0 = svsub_n_u64_x (svptrue_b64 (), z1, svlastp_b64 (svptrue_b64 (), p0)), + z0 = svsub_x (svptrue_b64 (), z1, svlastp_b64 (svptrue_b64 (), p0))); diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b8.c new file mode 100644 index 000000000000..5fa0f26f5b9b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/lastp_b8.c @@ -0,0 +1,192 @@ +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */ +/* { dg-additional-options "-msve-vector-bits=scalable" } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" +#include <stdbool.h> + +#pragma GCC target "+sve2p2" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+sme2p2" +#endif + +/* +** lastp_b8_32: +** lastp x0, p0, p1\.b +** ret +*/ +TEST_PTEST (lastp_b8_32, uint32_t, + x0 = svlastp_b8 (p0, p1)); + +/* +** lastp_b8_64: +** lastp x0, p0, p1\.b +** ret +*/ +TEST_PTEST (lastp_b8_64, uint64_t, + x0 = svlastp_b8 (p0, p1)); + +/* +** lastp_inc_b8_32_general_x0: +** lastp x([0-9]+), p0, p1\.b +** add w0, (w0, w\1|w\1, w0) +** ret +*/ +TEST_PTEST (lastp_inc_b8_32_general_x0, uint32_t, + x0 += svlastp_b8 (p0, p1)); + +/* +** lastp_inc_b8_32_general_x1: +** lastp x([0-9]+), p0, p1\.b +** add w0, (w1, w\1|w\1, w1) +** ret +*/ +TEST_PTEST (lastp_inc_b8_32_general_x1, uint32_t, + x0 = x1 + svlastp_b8 (p0, p1)); + +/* +** lastp_inc_b8_64_general_x0: +** lastp (x[0-9]+), p0, p1\.b +** add x0, (x0, \1|\1, x0) +** ret +*/ +TEST_PTEST (lastp_inc_b8_64_general_x0, uint64_t, + x0 += svlastp_b8 (p0, p1)); + +/* +** lastp_inc_b8_64_general_x1: +** lastp (x[0-9]+), p0, p1\.b +** add x0, (x1, \1|\1, x1) +** ret +*/ +TEST_PTEST (lastp_inc_b8_64_general_x1, uint64_t, + x0 = x1 + svlastp_b8 (p0, p1)); + +/* +** lastp_dec_b8_32_general_x0: +** lastp x([0-9]+), p0, p1\.b +** sub w0, w0, w\1 +** ret +*/ +TEST_PTEST (lastp_dec_b8_32_general_x0, uint32_t, + x0 -= svlastp_b8 (p0, p1)); + +/* +** lastp_dec_b8_32_general_x1: +** lastp x([0-9]+), p0, p1\.b +** sub w0, w1, w\1 +** ret +*/ +TEST_PTEST (lastp_dec_b8_32_general_x1, uint32_t, + x0 = x1 - svlastp_b8 (p0, p1)); + +/* +** lastp_dec_b8_64_general_x0: +** lastp (x[0-9]+), p0, p1\.b +** sub x0, x0, \1 +** ret +*/ +TEST_PTEST (lastp_dec_b8_64_general_x0, uint64_t, + x0 -= svlastp_b8 (p0, p1)); + +/* +** lastp_dec_b8_64_general_x1: +** lastp (x[0-9]+), p0, p1\.b +** sub x0, x1, \1 +** ret +*/ +TEST_PTEST (lastp_dec_b8_64_general_x1, uint64_t, + x0 = x1 - svlastp_b8 (p0, p1)); + +/* +** lastp_inc_b8_u8_general_z0: +** lastp x([0-9]+), p0, p1\.b +** mov (z[0-9]+\.b), w\1 +** add z0\.b, (z0\.b, \2|\2, z0\.b) +** ret +*/ +TEST_UNIFORM_Z (lastp_inc_b8_u8_general_z0, svuint8_t, + z0 = svadd_n_u8_x (svptrue_b8 (), z0, svlastp_b8 (p0, p1)), + z0 = svadd_x (svptrue_b8 (), z0, svlastp_b8 (p0, p1))); + +/* +** lastp_inc_b8_u8_general_z1: +** lastp x([0-9]+), p0, p1\.b +** mov (z[0-9]+\.b), w\1 +** add z0\.b, (z1\.b, \2|\2, z1\.b) +** ret +*/ +TEST_UNIFORM_Z (lastp_inc_b8_u8_general_z1, svuint8_t, + z0 = svadd_n_u8_x (svptrue_b8 (), z1, svlastp_b8 (p0, p1)), + z0 = svadd_x (svptrue_b8 (), z1, svlastp_b8 (p0, p1))); + +/* +** lastp_inc_b8_u8_ptrue_z0: +** ptrue (p[0-7])\.b, all +** lastp x([0-9]+), \1, p0\.b +** mov (z[0-9]+\.b), w\2 +** add z0\.b, (z0\.b, \3|\3, z0\.b) +** ret +*/ +TEST_UNIFORM_Z (lastp_inc_b8_u8_ptrue_z0, svuint8_t, + z0 = svadd_n_u8_x (svptrue_b8 (), z0, svlastp_b8 (svptrue_b8 (), p0)), + z0 = svadd_x (svptrue_b8 (), z0, svlastp_b8 (svptrue_b8 (), p0))); + +/* +** lastp_inc_b8_u8_ptrue_z1: +** ptrue (p[0-7])\.b, all +** lastp x([0-9]+), \1, p0\.b +** mov (z[0-9]+\.b), w\2 +** add z0\.b, (z1\.b, \3|\3, z1\.b) +** ret +*/ +TEST_UNIFORM_Z (lastp_inc_b8_u8_ptrue_z1, svuint8_t, + z0 = svadd_n_u8_x (svptrue_b8 (), z1, svlastp_b8 (svptrue_b8 (), p0)), + z0 = svadd_x (svptrue_b8 (), z1, svlastp_b8 (svptrue_b8 (), p0))); + +/* +** lastp_dec_b8_u8_general_z0: +** lastp x([0-9]+), p0, p1\.b +** mov (z[0-9]+\.b), w\1 +** sub z0\.b, z0\.b, \2 +** ret +*/ +TEST_UNIFORM_Z (lastp_dec_b8_u8_general_z0, svuint8_t, + z0 = svsub_n_u8_x (svptrue_b8 (), z0, svlastp_b8 (p0, p1)), + z0 = svsub_x (svptrue_b8 (), z0, svlastp_b8 (p0, p1))); + +/* +** lastp_dec_b8_u8_general_z1: +** lastp x([0-9]+), p0, p1\.b +** mov (z[0-9]+\.b), w\1 +** sub z0\.b, z1\.b, \2 +** ret +*/ +TEST_UNIFORM_Z (lastp_dec_b8_u8_general_z1, svuint8_t, + z0 = svsub_n_u8_x (svptrue_b8 (), z1, svlastp_b8 (p0, p1)), + z0 = svsub_x (svptrue_b8 (), z1, svlastp_b8 (p0, p1))); + +/* +** lastp_dec_b8_u8_ptrue_z0: +** ptrue (p[0-7])\.b, all +** lastp x([0-9]+), \1, p0\.b +** mov (z[0-9]+\.b), w\2 +** sub z0\.b, z0\.b, \3 +** ret +*/ +TEST_UNIFORM_Z (lastp_dec_b8_u8_ptrue_z0, svuint8_t, + z0 = svsub_n_u8_x (svptrue_b8 (), z0, svlastp_b8 (svptrue_b8 (), p0)), + z0 = svsub_x (svptrue_b8 (), z0, svlastp_b8 (svptrue_b8 (), p0))); + +/* +** lastp_dec_b8_u8_ptrue_z1: +** ptrue (p[0-7])\.b, all +** lastp x([0-9]+), \1, p0\.b +** mov (z[0-9]+\.b), w\2 +** sub z0\.b, z1\.b, \3 +** ret +*/ +TEST_UNIFORM_Z (lastp_dec_b8_u8_ptrue_z1, svuint8_t, + z0 = svsub_n_u8_x (svptrue_b8 (), z1, svlastp_b8 (svptrue_b8 (), p0)), + z0 = svsub_x (svptrue_b8 (), z1, svlastp_b8 (svptrue_b8 (), p0))); diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/general/firstp.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/general/firstp.c new file mode 100644 index 000000000000..c61a308bc89e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/general/firstp.c @@ -0,0 +1,212 @@ +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */ +/* { dg-options "-O2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <arm_sve.h> + +#pragma GCC target "+sve2p2" + +#ifdef __cplusplus +extern "C" { +#endif + +/* +** test1: +** mov x0, 0 +** ret +*/ +uint64_t +test1 () +{ + return svfirstp_b8 (svptrue_b8 (), + svptrue_b8 ()); +} + +/* +** test2: +** mov x0, -1 +** ret +*/ +uint64_t +test2 () +{ + return svfirstp_b8 (svpfalse_b (), + svptrue_b8 ()); +} + +/* +** test3: +** mov x0, -1 +** ret +*/ +uint64_t +test3 () +{ + return svfirstp_b8 (svptrue_b8 (), + svpfalse_b ()); +} + +/* +** test4: +** mov x0, 15 +** ret +*/ +uint64_t +test4 () +{ + return svfirstp_b8 (svdupq_n_b8 (false, false, false, false, + false, false, false, false, + false, false, false, false, + false, false, false, true), + svptrue_b8 ()); +} + +/* +** test5: +** mov x0, 0 +** ret +*/ +uint64_t +test5 () +{ + return svfirstp_b16 (svptrue_b16 (), + svptrue_b16 ()); +} + +/* +** test6: +** mov x0, -1 +** ret +*/ +uint64_t +test6 () +{ + return svfirstp_b16 (svpfalse_b (), + svptrue_b16 ()); +} + +/* +** test7: +** mov x0, -1 +** ret +*/ +uint64_t +test7 () +{ + return svfirstp_b16 (svptrue_b16 (), + svpfalse_b ()); +} + +/* +** test8: +** mov x0, 7 +** ret +*/ +uint64_t +test8 () +{ + return svfirstp_b16 (svdupq_n_b16 (false, false, false, false, + false, false, false, true), + svptrue_b16 ()); +} + +/* +** test9: +** mov x0, 0 +** ret +*/ +uint64_t +test9 () +{ + return svfirstp_b32 (svptrue_b32 (), + svptrue_b32 ()); +} + +/* +** test10: +** mov x0, -1 +** ret +*/ +uint64_t +test10 () +{ + return svfirstp_b32 (svpfalse_b (), + svptrue_b32 ()); +} + +/* +** test11: +** mov x0, -1 +** ret +*/ +uint64_t +test11 () +{ + return svfirstp_b32 (svptrue_b32 (), + svpfalse_b ()); +} + +/* +** test12: +** mov x0, 3 +** ret +*/ +uint64_t +test12 () +{ + return svfirstp_b32 (svdupq_n_b32 (false, false, false, true), + svptrue_b32 ()); +} + +/* +** test13: +** mov x0, 0 +** ret +*/ +uint64_t +test13 () +{ + return svfirstp_b64 (svptrue_b64 (), + svptrue_b64 ()); +} + +/* +** test14: +** mov x0, -1 +** ret +*/ +uint64_t +test14 () +{ + return svfirstp_b64 (svpfalse_b (), + svptrue_b64 ()); +} + +/* +** test15: +** mov x0, -1 +** ret +*/ +uint64_t +test15 () +{ + return svfirstp_b64 (svptrue_b64 (), + svpfalse_b ()); +} + +/* +** test16: +** mov x0, 1 +** ret +*/ +uint64_t +test16 () +{ + return svfirstp_b64 (svdupq_n_b64 (false, true), + svptrue_b64 ()); +} + +#ifdef __cplusplus +} +#endif diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/general/lastp.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/general/lastp.c new file mode 100644 index 000000000000..2dbb65d798d5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/general/lastp.c @@ -0,0 +1,212 @@ +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */ +/* { dg-options "-O2 -msve-vector-bits=256" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <arm_sve.h> + +#pragma GCC target "+sve2p2" + +#ifdef __cplusplus +extern "C" { +#endif + +/* +** test1: +** mov x0, 31 +** ret +*/ +uint64_t +test1 () +{ + return svlastp_b8 (svptrue_b8 (), + svptrue_b8 ()); +} + +/* +** test2: +** mov x0, -1 +** ret +*/ +uint64_t +test2 () +{ + return svlastp_b8 (svpfalse_b (), + svptrue_b8 ()); +} + +/* +** test3: +** mov x0, -1 +** ret +*/ +uint64_t +test3 () +{ + return svlastp_b8 (svptrue_b8 (), + svpfalse_b ()); +} + +/* +** test4: +** mov x0, 31 +** ret +*/ +uint64_t +test4 () +{ + return svlastp_b8 (svdupq_n_b8 (false, false, false, false, + false, false, false, false, + false, false, false, false, + false, false, false, true), + svptrue_b8 ()); +} + +/* +** test5: +** mov x0, 15 +** ret +*/ +uint64_t +test5 () +{ + return svlastp_b16 (svptrue_b16 (), + svptrue_b16 ()); +} + +/* +** test6: +** mov x0, -1 +** ret +*/ +uint64_t +test6 () +{ + return svlastp_b16 (svpfalse_b (), + svptrue_b16 ()); +} + +/* +** test7: +** mov x0, -1 +** ret +*/ +uint64_t +test7 () +{ + return svlastp_b16 (svptrue_b16 (), + svpfalse_b ()); +} + +/* +** test8: +** mov x0, 15 +** ret +*/ +uint64_t +test8 () +{ + return svlastp_b16 (svdupq_n_b16 (false, false, false, false, + false, false, false, true), + svptrue_b16 ()); +} + +/* +** test9: +** mov x0, 7 +** ret +*/ +uint64_t +test9 () +{ + return svlastp_b32 (svptrue_b32 (), + svptrue_b32 ()); +} + +/* +** test10: +** mov x0, -1 +** ret +*/ +uint64_t +test10 () +{ + return svlastp_b32 (svpfalse_b (), + svptrue_b32 ()); +} + +/* +** test11: +** mov x0, -1 +** ret +*/ +uint64_t +test11 () +{ + return svlastp_b32 (svptrue_b32 (), + svpfalse_b ()); +} + +/* +** test12: +** mov x0, 7 +** ret +*/ +uint64_t +test12 () +{ + return svlastp_b32 (svdupq_n_b32 (false, false, false, true), + svptrue_b32 ()); +} + +/* +** test13: +** mov x0, 3 +** ret +*/ +uint64_t +test13 () +{ + return svlastp_b64 (svptrue_b64 (), + svptrue_b64 ()); +} + +/* +** test14: +** mov x0, -1 +** ret +*/ +uint64_t +test14 () +{ + return svlastp_b64 (svpfalse_b (), + svptrue_b64 ()); +} + +/* +** test15: +** mov x0, -1 +** ret +*/ +uint64_t +test15 () +{ + return svlastp_b64 (svptrue_b64 (), + svpfalse_b ()); +} + +/* +** test16: +** mov x0, 3 +** ret +*/ +uint64_t +test16 () +{ + return svlastp_b64 (svdupq_n_b64 (false, true), + svptrue_b64 ()); +} + +#ifdef __cplusplus +} +#endif
