https://gcc.gnu.org/g:2b371286fd58c8b59b9de2f062ae51e29a2d4e8d
commit r17-976-g2b371286fd58c8b59b9de2f062ae51e29a2d4e8d Author: Artemiy Volkov <[email protected]> Date: Sat Jan 10 07:40:59 2026 +0000 aarch64: add zeroing forms for predicated SVE top FP conversions SVE2.2 (or in streaming mode, SME2.2) adds support for zeroing predication for the following SVE FP conversion instructions: SVE1: - BFCVTNT (Single-precision convert to BFloat16 (top, predicated)) SVE2: - FCVTLT (Floating-point widening convert (top, predicated)) - FCVTNT (Floating-point narrowing convert (top, predicated)) - FCVTXNT (Double-precision convert to single-precision, rounding to odd (top, predicated)) Additionally, this patch implements corresponding intrinsics documented in the ACLE manual [0] with the following signatures: svfloat{32,64}_t svcvtlt_{f32[_f16],_f64[_f32]}_z (svbool_t pg, svfloat{16,32}_t op); sv{bfloat16,float16,float32}_t svcvtnt_{f16[_f32],_f32[_f64],_bf16[_f32]}_z (sv{bfloat16,float16,float32}_t even, svbool_t pg, svfloat{32,64}_t op); svfloat32_t svcvtxnt_f32[_f64]_z (svfloat32_t even, svbool_t pg, svfloat64_t op); This patch adds an alternative that emits a single zeroing-predication form of the instructions mentioned above (as long as the sve2p2_or_sme2p2 condition holds) to corresponding RTL patterns. For narrowing conversions ([B]FCVTNT and FCVTXNT), since an additional merge operand controlling the values of inactive lanes is required, the intrinsics have been changed to use the new top_narrowing_convert SVE function base class; this new class injects a const_vector selector operand at expand time. Depending on the value of this operand, either the destination vector or a constant zero vector is used to supply values for inactive lanes. The new tests all have "_z" in their names since they only cover the zeroing-predication versions of their respective intrinsics. [0] https://github.com/ARM-software/acle gcc/ChangeLog: * config/aarch64/aarch64-sve-builtins-base.cc (class svcvtnt_impl): Remove. (svcvtnt): Redefine using narrowing_top_convert. * config/aarch64/aarch64-sve-builtins-functions.h (class narrowing_top_convert): New SVE function base class. (NARROWING_TOP_CONVERT0): New function-like macro for specializing narrowing_top_convert. (NARROWING_TOP_CONVERT1): Likewise. * config/aarch64/aarch64-sve-builtins-sve2.cc (class svcvtxnt_impl): Remove. (svcvtxnt): Redefine using narrowing_top_convert. * config/aarch64/aarch64-sve-builtins-sve2.def (svcvtlt): Allow zeroing predication. (svcvtnt): Likewise. (svcvtxnt): Likewise. * config/aarch64/aarch64-sve.md (@aarch64_sve_cvtnt<mode>): Convert to compact syntax. Add operand 4 for values of inactive lanes. New alternative for zeroing predication. * config/aarch64/aarch64-sve2.md (*cond_<sve_fp_op><mode>_relaxed): Convert to compact syntax. New alternative for zeroing predication. (*cond_<sve_fp_op><mode>_strict): Likewise. (@aarch64_sve_cvtnt<mode>): Convert to compact syntax. Add operand 4 for values of inactive lanes. New alternative for zeroing predication. gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve2/acle/asm/cvtlt_f32_z.c: New test. * gcc.target/aarch64/sve2/acle/asm/cvtlt_f64_z.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/cvtnt_bf16_z.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/cvtnt_f16_z.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/cvtnt_f32_z.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/cvtxnt_f32_z.c: Likewise. Diff: --- gcc/config/aarch64/aarch64-sve-builtins-base.cc | 14 +----- .../aarch64/aarch64-sve-builtins-functions.h | 41 ++++++++++++++++ gcc/config/aarch64/aarch64-sve-builtins-sve2.cc | 14 +----- gcc/config/aarch64/aarch64-sve-builtins-sve2.def | 7 +++ gcc/config/aarch64/aarch64-sve.md | 16 ++++--- gcc/config/aarch64/aarch64-sve2.md | 55 ++++++++++++++-------- .../gcc.target/aarch64/sve2/acle/asm/cvtlt_f32_z.c | 28 +++++++++++ .../gcc.target/aarch64/sve2/acle/asm/cvtlt_f64_z.c | 28 +++++++++++ .../aarch64/sve2/acle/asm/cvtnt_bf16_z.c | 34 +++++++++++++ .../gcc.target/aarch64/sve2/acle/asm/cvtnt_f16_z.c | 34 +++++++++++++ .../gcc.target/aarch64/sve2/acle/asm/cvtnt_f32_z.c | 34 +++++++++++++ .../aarch64/sve2/acle/asm/cvtxnt_f32_z.c | 34 +++++++++++++ 12 files changed, 287 insertions(+), 52 deletions(-) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc index 9661a031fa01..4edb4f89c165 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc @@ -855,18 +855,6 @@ public: } }; -class svcvtnt_impl : public CODE_FOR_MODE0 (aarch64_sve_cvtnt) -{ -public: - gimple * - fold (gimple_folder &f) const override - { - if (f.pred == PRED_x && is_pfalse (gimple_call_arg (f.call, 1))) - f.fold_call_to (build_zero_cst (TREE_TYPE (f.lhs))); - return NULL; - } -}; - class svdiv_impl : public rtx_code_function { public: @@ -3594,7 +3582,7 @@ FUNCTION (svcreate2, svcreate_impl, (2)) FUNCTION (svcreate3, svcreate_impl, (3)) FUNCTION (svcreate4, svcreate_impl, (4)) FUNCTION (svcvt, svcvt_impl,) -FUNCTION (svcvtnt, svcvtnt_impl,) +FUNCTION (svcvtnt, NARROWING_TOP_CONVERT0 (aarch64_sve_cvtnt),) FUNCTION (svdiv, svdiv_impl,) FUNCTION (svdivr, rtx_code_function_rotated, (DIV, UDIV, UNSPEC_COND_FDIV)) FUNCTION (svdot, svdot_impl,) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-functions.h b/gcc/config/aarch64/aarch64-sve-builtins-functions.h index df5e44a294fd..629408668e73 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-functions.h +++ b/gcc/config/aarch64/aarch64-sve-builtins-functions.h @@ -870,6 +870,47 @@ public: int m_unspec_for_uint; }; +template<insn_code (*CODE_FOR_MODE) (machine_mode), unsigned int N> +class narrowing_top_convert : public code_for_mode_function <CODE_FOR_MODE, N> +{ + using base = code_for_mode_function <CODE_FOR_MODE, N>; + +public: + gimple * + fold (gimple_folder &f) const override + { + if (f.pred == PRED_x && is_pfalse (gimple_call_arg (f.call, 1))) + return f.fold_call_to (build_zero_cst (TREE_TYPE (f.lhs))); + return NULL; + } + + rtx + expand (function_expander &e) const override + { + /* If the instruction is predicated, Add a selector argument for the + values of inactive lanes, which is equal to all ones for merging + predication and to all zeros for zeroing predication. */ + if (e.pred == PRED_none) + ; + else if (e.pred == PRED_z) + { + e.args.quick_push (CONST0_RTX (e.result_mode ())); + } + else + { + gcc_assert (e.pred == PRED_m || e.pred == PRED_x); + e.args.quick_push (CONST1_RTX (e.result_mode ())); + } + + return base::expand (e); + } +}; + +#define NARROWING_TOP_CONVERT0(PATTERN)\ + narrowing_top_convert<code_for_##PATTERN, 0> +#define NARROWING_TOP_CONVERT1(PATTERN)\ + narrowing_top_convert<code_for_##PATTERN, 1> + } /* Declare the global function base NAME, creating it from an instance diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc index c4c8bae86b8e..86ea2efe5aa5 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc @@ -252,18 +252,6 @@ public: } }; -class svcvtxnt_impl : public CODE_FOR_MODE1 (aarch64_sve2_cvtxnt) -{ -public: - gimple * - fold (gimple_folder &f) const override - { - if (f.pred == PRED_x && is_pfalse (gimple_call_arg (f.call, 1))) - return f.fold_call_to (build_zero_cst (TREE_TYPE (f.lhs))); - return NULL; - } -}; - class svdup_laneq_impl : public function_base { public: @@ -1028,7 +1016,7 @@ FUNCTION (svcvtlt, unspec_based_function, (-1, -1, UNSPEC_COND_FCVTLT)) FUNCTION (svcvtn, svcvtn_impl,) FUNCTION (svcvtnb, fixed_insn_function, (CODE_FOR_aarch64_sve2_fp8_cvtnbvnx16qi)) FUNCTION (svcvtx, unspec_based_function, (-1, -1, UNSPEC_COND_FCVTX)) -FUNCTION (svcvtxnt, svcvtxnt_impl,) +FUNCTION (svcvtxnt, NARROWING_TOP_CONVERT1 (aarch64_sve2_cvtxnt),) FUNCTION (svdup_laneq, svdup_laneq_impl,) FUNCTION (sveor3, CODE_FOR_MODE0 (aarch64_sve2_eor3),) FUNCTION (sveorbt, unspec_based_function, (UNSPEC_EORBT, UNSPEC_EORBT, -1)) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def index 62a80a7b3203..82714361275a 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def @@ -299,6 +299,13 @@ DEF_SVE_FUNCTION (svst1q_scatter, store_scatter64_index, hsd_data, implicit) DEF_SVE_FUNCTION (svst1wq, store, s_data, implicit) #undef REQUIRED_EXTENSIONS +#define REQUIRED_EXTENSIONS sve_and_sme (AARCH64_FL_SVE2p2, AARCH64_FL_SME2p2) +DEF_SVE_FUNCTION (svcvtlt, unary_convert, cvt_long, z) +DEF_SVE_FUNCTION (svcvtnt, unary_convert_narrowt, cvt_narrow, z) +DEF_SVE_FUNCTION (svcvtnt, unary_convert_narrowt, cvt_bfloat, z) +DEF_SVE_FUNCTION (svcvtxnt, unary_convert_narrowt, cvt_narrow_s, z) +#undef REQUIRED_EXTENSIONS + #define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME2) DEF_SVE_FUNCTION_GS (svadd, binary_single, all_integer, x24, none) DEF_SVE_FUNCTION_GS (svclamp, clamp, all_arith, x24, none) diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index c7fe14973f95..e36ff9d6468b 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -11374,15 +11374,19 @@ ;; ;; This instructions does not take MOVPRFX. (define_insn "@aarch64_sve_cvtnt<mode>" - [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w") + [(set (match_operand:VNx8BF_ONLY 0 "register_operand") (unspec:VNx8BF_ONLY - [(match_operand:VNx4BI 2 "register_operand" "Upl") + [(match_operand:VNx4BI 2 "register_operand") (const_int SVE_STRICT_GP) - (match_operand:VNx8BF_ONLY 1 "register_operand" "0") - (match_operand:VNx4SF 3 "register_operand" "w")] + (match_operand:VNx8BF_ONLY 1 "register_operand") + (match_operand:VNx8BF_ONLY 4 "aarch64_constant_vector_operand") + (match_operand:VNx4SF 3 "register_operand")] UNSPEC_COND_FCVTNT))] - "TARGET_SVE_BF16" - "bfcvtnt\t%0.h, %2/m, %3.s" + "TARGET_SVE_BF16 || TARGET_SVE2p2_OR_SME2p2" + {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: arch ] + [ w , 0 , Upl , w , vs1 ; * ] bfcvtnt\t%0.h, %2/m, %3.s + [ w , 0 , Upl , w , Dz ; sve2p2_or_sme2p2 ] bfcvtnt\t%0.h, %2/z, %3.s + } [(set_attr "sve_type" "sve_fp_cvt")] ) diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md index 995b08f084cf..97beccbe51f7 100644 --- a/gcc/config/aarch64/aarch64-sve2.md +++ b/gcc/config/aarch64/aarch64-sve2.md @@ -3479,18 +3479,21 @@ ;; These instructions do not take MOVPRFX. (define_insn_and_rewrite "*cond_<sve_fp_op><mode>_relaxed" - [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w") + [(set (match_operand:SVE_FULL_SDF 0 "register_operand") (unspec:SVE_FULL_SDF - [(match_operand:<VPRED> 1 "register_operand" "Upl") + [(match_operand:<VPRED> 1 "register_operand") (unspec:SVE_FULL_SDF [(match_operand 4) (const_int SVE_RELAXED_GP) - (match_operand:<VNARROW> 2 "register_operand" "w")] + (match_operand:<VNARROW> 2 "register_operand")] SVE2_COND_FP_UNARY_LONG) - (match_operand:SVE_FULL_SDF 3 "register_operand" "0")] + (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_direct_zero")] UNSPEC_SEL))] "TARGET_SVE2" - "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype>" + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: arch ] + [ w , Upl , w , 0 ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype> + [ w , Upl , w , Dz ; sve2p2_or_sme2p2 ] <sve_fp_op>\t%0.<Vetype>, %1/z, %2.<Ventype> + } "&& !rtx_equal_p (operands[1], operands[4])" { operands[4] = copy_rtx (operands[1]); @@ -3499,18 +3502,21 @@ ) (define_insn "*cond_<sve_fp_op><mode>_strict" - [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w") + [(set (match_operand:SVE_FULL_SDF 0 "register_operand") (unspec:SVE_FULL_SDF - [(match_operand:<VPRED> 1 "register_operand" "Upl") + [(match_operand:<VPRED> 1 "register_operand") (unspec:SVE_FULL_SDF [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:<VNARROW> 2 "register_operand" "w")] + (match_operand:<VNARROW> 2 "register_operand")] SVE2_COND_FP_UNARY_LONG) - (match_operand:SVE_FULL_SDF 3 "register_operand" "0")] + (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_direct_zero")] UNSPEC_SEL))] "TARGET_SVE2" - "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype>" + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: arch ] + [ w , Upl , w , 0 ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype> + [ w , Upl , w , Dz ; sve2p2_or_sme2p2 ] <sve_fp_op>\t%0.<Vetype>, %1/z, %2.<Ventype> + } [(set_attr "sve_type" "sve_fp_cvt")] ) @@ -3540,15 +3546,19 @@ ;; ;; These instructions do not take MOVPRFX. (define_insn "@aarch64_sve_cvtnt<mode>" - [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w") + [(set (match_operand:SVE_FULL_HSF 0 "register_operand") (unspec:SVE_FULL_HSF - [(match_operand:<VWIDE_PRED> 2 "register_operand" "Upl") + [(match_operand:<VWIDE_PRED> 2 "register_operand") (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_HSF 1 "register_operand" "0") - (match_operand:<VWIDE> 3 "register_operand" "w")] + (match_operand:SVE_FULL_HSF 1 "register_operand") + (match_operand:SVE_FULL_HSF 4 "aarch64_constant_vector_operand") + (match_operand:<VWIDE> 3 "register_operand")] UNSPEC_COND_FCVTNT))] "TARGET_SVE2" - "fcvtnt\t%0.<Vetype>, %2/m, %3.<Vewtype>" + {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: arch ] + [ w , 0 , Upl , w , vs1 ; * ] fcvtnt\t%0.<Vetype>, %2/m, %3.<Vewtype> + [ w , 0 , Upl , w , Dz ; sve2p2_or_sme2p2 ] fcvtnt\t%0.<Vetype>, %2/z, %3.<Vewtype> + } [(set_attr "sve_type" "sve_fp_cvt")] ) @@ -3636,18 +3646,23 @@ ;; ;; These instructions do not take MOVPRFX. (define_insn "@aarch64_sve2_cvtxnt<mode>" - [(set (match_operand:<VNARROW> 0 "register_operand" "=w") + [(set (match_operand:<VNARROW> 0 "register_operand") (unspec:<VNARROW> - [(match_operand:<VPRED> 2 "register_operand" "Upl") + [(match_operand:<VPRED> 2 "register_operand") (const_int SVE_STRICT_GP) - (match_operand:<VNARROW> 1 "register_operand" "0") - (match_operand:VNx2DF_ONLY 3 "register_operand" "w")] + (match_operand:<VNARROW> 1 "register_operand") + (match_operand:<VNARROW> 4 "aarch64_constant_vector_operand") + (match_operand:VNx2DF_ONLY 3 "register_operand")] UNSPEC_COND_FCVTXNT))] "TARGET_SVE2" - "fcvtxnt\t%0.<Ventype>, %2/m, %3.<Vetype>" + {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: arch ] + [ w , 0 , Upl , w , vs1 ; * ] fcvtxnt\t%0.<Ventype>, %2/m, %3.<Vetype> + [ w , 0 , Upl , w , Dz ; sve2p2_or_sme2p2 ] fcvtxnt\t%0.<Ventype>, %2/z, %3.<Vetype> + } [(set_attr "sve_type" "sve_fp_cvt")] ) + ;; ------------------------------------------------------------------------- ;; ---- [FP<-FP] Multi-vector widening conversions ;; ------------------------------------------------------------------------- diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtlt_f32_z.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtlt_f32_z.c new file mode 100644 index 000000000000..c7ca18e63867 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtlt_f32_z.c @@ -0,0 +1,28 @@ +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+sve2p2" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+sme2p2" +#endif + +/* +** cvtlt_f32_f16_z_tied1: +** fcvtlt z0\.s, p0/z, z0\.h +** ret +*/ +TEST_DUAL_Z_REV (cvtlt_f32_f16_z_tied1, svfloat32_t, svfloat16_t, + z0_res = svcvtlt_f32_f16_z (p0, z0), + z0_res = svcvtlt_f32_z (p0, z0)) + +/* +** cvtlt_f32_f16_z_untied: +** fcvtlt z0\.s, p0/z, z4\.h +** ret +*/ +TEST_DUAL_Z (cvtlt_f32_f16_z_untied, svfloat32_t, svfloat16_t, + z0 = svcvtlt_f32_f16_z (p0, z4), + z0 = svcvtlt_f32_z (p0, z4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtlt_f64_z.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtlt_f64_z.c new file mode 100644 index 000000000000..29229c022fb5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtlt_f64_z.c @@ -0,0 +1,28 @@ +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+sve2p2" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+sme2p2" +#endif + +/* +** cvtlt_f64_f32_z_tied1: +** fcvtlt z0\.d, p0/z, z0\.s +** ret +*/ +TEST_DUAL_Z_REV (cvtlt_f64_f32_z_tied1, svfloat64_t, svfloat32_t, + z0_res = svcvtlt_f64_f32_z (p0, z0), + z0_res = svcvtlt_f64_z (p0, z0)) + +/* +** cvtlt_f64_f32_z_untied: +** fcvtlt z0\.d, p0/z, z4\.s +** ret +*/ +TEST_DUAL_Z (cvtlt_f64_f32_z_untied, svfloat64_t, svfloat32_t, + z0 = svcvtlt_f64_f32_z (p0, z4), + z0 = svcvtlt_f64_z (p0, z4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtnt_bf16_z.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtnt_bf16_z.c new file mode 100644 index 000000000000..2f23e49d7823 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtnt_bf16_z.c @@ -0,0 +1,34 @@ +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+sve2p2+bf16" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+sme2p2" +#endif + +/* +** cvtnt_bf16_f32_z_tied1: +** bfcvtnt z0\.h, p0/z, z4\.s +** ret +*/ +TEST_DUAL_Z (cvtnt_bf16_f32_z_tied1, svbfloat16_t, svfloat32_t, + z0 = svcvtnt_bf16_f32_z (z0, p0, z4), + z0 = svcvtnt_bf16_z (z0, p0, z4)) + +/* +** cvtnt_bf16_f32_z_untied: +** ( +** mov z0\.d, z1\.d +** bfcvtnt z0\.h, p0/z, z4\.s +** | +** bfcvtnt z1\.h, p0/z, z4\.s +** mov z0\.d, z1\.d +** ) +** ret +*/ +TEST_DUAL_Z (cvtnt_bf16_f32_z_untied, svbfloat16_t, svfloat32_t, + z0 = svcvtnt_bf16_f32_z (z1, p0, z4), + z0 = svcvtnt_bf16_z (z1, p0, z4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtnt_f16_z.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtnt_f16_z.c new file mode 100644 index 000000000000..d05a0006a82a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtnt_f16_z.c @@ -0,0 +1,34 @@ +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+sve2p2" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+sme2p2" +#endif + +/* +** cvtnt_f16_f32_z_tied1: +** fcvtnt z0\.h, p0/z, z4\.s +** ret +*/ +TEST_DUAL_Z (cvtnt_f16_f32_z_tied1, svfloat16_t, svfloat32_t, + z0 = svcvtnt_f16_f32_z (z0, p0, z4), + z0 = svcvtnt_f16_z (z0, p0, z4)) + +/* +** cvtnt_f16_f32_z_untied: +** ( +** mov z0\.d, z1\.d +** fcvtnt z0\.h, p0/z, z4\.s +** | +** fcvtnt z1\.h, p0/z, z4\.s +** mov z0\.d, z1\.d +** ) +** ret +*/ +TEST_DUAL_Z (cvtnt_f16_f32_z_untied, svfloat16_t, svfloat32_t, + z0 = svcvtnt_f16_f32_z (z1, p0, z4), + z0 = svcvtnt_f16_z (z1, p0, z4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtnt_f32_z.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtnt_f32_z.c new file mode 100644 index 000000000000..042cf1c6784e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtnt_f32_z.c @@ -0,0 +1,34 @@ +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+sve2p2" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+sme2p2" +#endif + +/* +** cvtnt_f32_f64_z_tied1: +** fcvtnt z0\.s, p0/z, z4\.d +** ret +*/ +TEST_DUAL_Z (cvtnt_f32_f64_z_tied1, svfloat32_t, svfloat64_t, + z0 = svcvtnt_f32_f64_z (z0, p0, z4), + z0 = svcvtnt_f32_z (z0, p0, z4)) + +/* +** cvtnt_f32_f64_z_untied: +** ( +** mov z0\.d, z1\.d +** fcvtnt z0\.s, p0/z, z4\.d +** | +** fcvtnt z1\.s, p0/z, z4\.d +** mov z0\.d, z1\.d +** ) +** ret +*/ +TEST_DUAL_Z (cvtnt_f32_f64_z_untied, svfloat32_t, svfloat64_t, + z0 = svcvtnt_f32_f64_z (z1, p0, z4), + z0 = svcvtnt_f32_z (z1, p0, z4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtxnt_f32_z.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtxnt_f32_z.c new file mode 100644 index 000000000000..0f12bca6b55a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtxnt_f32_z.c @@ -0,0 +1,34 @@ +/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+sve2p2" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+sme2p2" +#endif + +/* +** cvtxnt_f32_f64_z_tied1: +** fcvtxnt z0\.s, p0/z, z4\.d +** ret +*/ +TEST_DUAL_Z (cvtxnt_f32_f64_z_tied1, svfloat32_t, svfloat64_t, + z0 = svcvtxnt_f32_f64_z (z0, p0, z4), + z0 = svcvtxnt_f32_z (z0, p0, z4)) + +/* +** cvtxnt_f32_f64_z_untied: +** ( +** mov z0\.d, z1\.d +** fcvtxnt z0\.s, p0/z, z4\.d +** | +** fcvtxnt z1\.s, p0/z, z4\.d +** mov z0\.d, z1\.d +** ) +** ret +*/ +TEST_DUAL_Z (cvtxnt_f32_f64_z_untied, svfloat32_t, svfloat64_t, + z0 = svcvtxnt_f32_f64_z (z1, p0, z4), + z0 = svcvtxnt_f32_z (z1, p0, z4))
