[gcc r17-976] aarch64: add zeroing forms for predicated SVE top FP conversions

Artemiy Volkov via Gcc-cvs Fri, 29 May 2026 04:34:50 -0700

https://gcc.gnu.org/g:2b371286fd58c8b59b9de2f062ae51e29a2d4e8d


commit r17-976-g2b371286fd58c8b59b9de2f062ae51e29a2d4e8d
Author: Artemiy Volkov <[email protected]>
Date:   Sat Jan 10 07:40:59 2026 +0000

    aarch64: add zeroing forms for predicated SVE top FP conversions
    
    SVE2.2 (or in streaming mode, SME2.2) adds support for zeroing
    predication for the following SVE FP conversion instructions:
    
    SVE1:
            - BFCVTNT (Single-precision convert to BFloat16 (top, predicated))
    
    SVE2:
            - FCVTLT (Floating-point widening convert (top, predicated))
            - FCVTNT (Floating-point narrowing convert (top, predicated))
            - FCVTXNT (Double-precision convert to single-precision, rounding
              to odd (top, predicated))
    
    Additionally, this patch implements corresponding intrinsics documented in
    the ACLE manual [0] with the following signatures:
    
    svfloat{32,64}_t svcvtlt_{f32[_f16],_f64[_f32]}_z
      (svbool_t pg, svfloat{16,32}_t op);
    
    sv{bfloat16,float16,float32}_t svcvtnt_{f16[_f32],_f32[_f64],_bf16[_f32]}_z
      (sv{bfloat16,float16,float32}_t even, svbool_t pg, svfloat{32,64}_t op);
    
    svfloat32_t svcvtxnt_f32[_f64]_z
      (svfloat32_t even, svbool_t pg, svfloat64_t op);
    
    This patch adds an alternative that emits a single zeroing-predication
    form of the instructions mentioned above (as long as the sve2p2_or_sme2p2
    condition holds) to corresponding RTL patterns.  For narrowing conversions
    ([B]FCVTNT and FCVTXNT), since an additional merge operand controlling the
    values of inactive lanes is required, the intrinsics have been changed to
    use the new top_narrowing_convert SVE function base class; this new class
    injects a const_vector selector operand at expand time.  Depending on the
    value of this operand, either the destination vector or a constant zero
    vector is used to supply values for inactive lanes.
    
    The new tests all have "_z" in their names since they only cover the
    zeroing-predication versions of their respective intrinsics.
    
    [0] https://github.com/ARM-software/acle
    
    gcc/ChangeLog:
    
            * config/aarch64/aarch64-sve-builtins-base.cc (class svcvtnt_impl):
            Remove.
            (svcvtnt): Redefine using narrowing_top_convert.
            * config/aarch64/aarch64-sve-builtins-functions.h
            (class narrowing_top_convert): New SVE function base class.
            (NARROWING_TOP_CONVERT0): New function-like macro for specializing
            narrowing_top_convert.
            (NARROWING_TOP_CONVERT1): Likewise.
            * config/aarch64/aarch64-sve-builtins-sve2.cc (class svcvtxnt_impl):
            Remove.
            (svcvtxnt): Redefine using narrowing_top_convert.
            * config/aarch64/aarch64-sve-builtins-sve2.def (svcvtlt): Allow
            zeroing predication.
            (svcvtnt): Likewise.
            (svcvtxnt): Likewise.
            * config/aarch64/aarch64-sve.md (@aarch64_sve_cvtnt<mode>):
            Convert to compact syntax. Add operand 4 for values of
            inactive lanes.  New alternative for zeroing predication.
            * config/aarch64/aarch64-sve2.md
            (*cond_<sve_fp_op><mode>_relaxed): Convert to compact syntax.
            New alternative for zeroing predication.
            (*cond_<sve_fp_op><mode>_strict): Likewise.
            (@aarch64_sve_cvtnt<mode>): Convert to compact syntax. Add
            operand 4 for values of inactive lanes.  New alternative for
            zeroing predication.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/aarch64/sve2/acle/asm/cvtlt_f32_z.c: New test.
            * gcc.target/aarch64/sve2/acle/asm/cvtlt_f64_z.c: Likewise.
            * gcc.target/aarch64/sve2/acle/asm/cvtnt_bf16_z.c: Likewise.
            * gcc.target/aarch64/sve2/acle/asm/cvtnt_f16_z.c: Likewise.
            * gcc.target/aarch64/sve2/acle/asm/cvtnt_f32_z.c: Likewise.
            * gcc.target/aarch64/sve2/acle/asm/cvtxnt_f32_z.c: Likewise.

Diff:
---
 gcc/config/aarch64/aarch64-sve-builtins-base.cc    | 14 +-----
 .../aarch64/aarch64-sve-builtins-functions.h       | 41 ++++++++++++++++
 gcc/config/aarch64/aarch64-sve-builtins-sve2.cc    | 14 +-----
 gcc/config/aarch64/aarch64-sve-builtins-sve2.def   |  7 +++
 gcc/config/aarch64/aarch64-sve.md                  | 16 ++++---
 gcc/config/aarch64/aarch64-sve2.md                 | 55 ++++++++++++++--------
 .../gcc.target/aarch64/sve2/acle/asm/cvtlt_f32_z.c | 28 +++++++++++
 .../gcc.target/aarch64/sve2/acle/asm/cvtlt_f64_z.c | 28 +++++++++++
 .../aarch64/sve2/acle/asm/cvtnt_bf16_z.c           | 34 +++++++++++++
 .../gcc.target/aarch64/sve2/acle/asm/cvtnt_f16_z.c | 34 +++++++++++++
 .../gcc.target/aarch64/sve2/acle/asm/cvtnt_f32_z.c | 34 +++++++++++++
 .../aarch64/sve2/acle/asm/cvtxnt_f32_z.c           | 34 +++++++++++++
 12 files changed, 287 insertions(+), 52 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc 
b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index 9661a031fa01..4edb4f89c165 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -855,18 +855,6 @@ public:
   }
 };
 
-class svcvtnt_impl : public CODE_FOR_MODE0 (aarch64_sve_cvtnt)
-{
-public:
-  gimple *
-  fold (gimple_folder &f) const override
-  {
-    if (f.pred == PRED_x && is_pfalse (gimple_call_arg (f.call, 1)))
-      f.fold_call_to (build_zero_cst (TREE_TYPE (f.lhs)));
-    return NULL;
-  }
-};
-
 class svdiv_impl : public rtx_code_function
 {
 public:
@@ -3594,7 +3582,7 @@ FUNCTION (svcreate2, svcreate_impl, (2))
 FUNCTION (svcreate3, svcreate_impl, (3))
 FUNCTION (svcreate4, svcreate_impl, (4))
 FUNCTION (svcvt, svcvt_impl,)
-FUNCTION (svcvtnt, svcvtnt_impl,)
+FUNCTION (svcvtnt, NARROWING_TOP_CONVERT0 (aarch64_sve_cvtnt),)
 FUNCTION (svdiv, svdiv_impl,)
 FUNCTION (svdivr, rtx_code_function_rotated, (DIV, UDIV, UNSPEC_COND_FDIV))
 FUNCTION (svdot, svdot_impl,)
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-functions.h 
b/gcc/config/aarch64/aarch64-sve-builtins-functions.h
index df5e44a294fd..629408668e73 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-functions.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins-functions.h
@@ -870,6 +870,47 @@ public:
   int m_unspec_for_uint;
 };
 
+template<insn_code (*CODE_FOR_MODE) (machine_mode), unsigned int N>
+class narrowing_top_convert : public code_for_mode_function <CODE_FOR_MODE, N>
+{
+  using base = code_for_mode_function <CODE_FOR_MODE, N>;
+
+public:
+  gimple *
+  fold (gimple_folder &f) const override
+  {
+    if (f.pred == PRED_x && is_pfalse (gimple_call_arg (f.call, 1)))
+      return f.fold_call_to (build_zero_cst (TREE_TYPE (f.lhs)));
+    return NULL;
+  }
+
+  rtx
+  expand (function_expander &e) const override
+  {
+    /* If the instruction is predicated, Add a selector argument for the
+       values of inactive lanes, which is equal to all ones for merging
+       predication and to all zeros for zeroing predication.  */
+    if (e.pred == PRED_none)
+      ;
+    else if (e.pred == PRED_z)
+      {
+       e.args.quick_push (CONST0_RTX (e.result_mode ()));
+      }
+    else
+      {
+       gcc_assert (e.pred == PRED_m || e.pred == PRED_x);
+       e.args.quick_push (CONST1_RTX (e.result_mode ()));
+      }
+
+    return base::expand (e);
+  }
+};
+
+#define NARROWING_TOP_CONVERT0(PATTERN)\
+  narrowing_top_convert<code_for_##PATTERN, 0>
+#define NARROWING_TOP_CONVERT1(PATTERN)\
+  narrowing_top_convert<code_for_##PATTERN, 1>
+
 }
 
 /* Declare the global function base NAME, creating it from an instance
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc 
b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
index c4c8bae86b8e..86ea2efe5aa5 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
@@ -252,18 +252,6 @@ public:
   }
 };
 
-class svcvtxnt_impl : public CODE_FOR_MODE1 (aarch64_sve2_cvtxnt)
-{
-public:
-  gimple *
-  fold (gimple_folder &f) const override
-  {
-    if (f.pred == PRED_x && is_pfalse (gimple_call_arg (f.call, 1)))
-      return f.fold_call_to (build_zero_cst (TREE_TYPE (f.lhs)));
-    return NULL;
-  }
-};
-
 class svdup_laneq_impl : public function_base
 {
 public:
@@ -1028,7 +1016,7 @@ FUNCTION (svcvtlt, unspec_based_function, (-1, -1, 
UNSPEC_COND_FCVTLT))
 FUNCTION (svcvtn, svcvtn_impl,)
 FUNCTION (svcvtnb, fixed_insn_function, 
(CODE_FOR_aarch64_sve2_fp8_cvtnbvnx16qi))
 FUNCTION (svcvtx, unspec_based_function, (-1, -1, UNSPEC_COND_FCVTX))
-FUNCTION (svcvtxnt, svcvtxnt_impl,)
+FUNCTION (svcvtxnt, NARROWING_TOP_CONVERT1 (aarch64_sve2_cvtxnt),)
 FUNCTION (svdup_laneq, svdup_laneq_impl,)
 FUNCTION (sveor3, CODE_FOR_MODE0 (aarch64_sve2_eor3),)
 FUNCTION (sveorbt, unspec_based_function, (UNSPEC_EORBT, UNSPEC_EORBT, -1))
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def 
b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
index 62a80a7b3203..82714361275a 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
@@ -299,6 +299,13 @@ DEF_SVE_FUNCTION (svst1q_scatter, store_scatter64_index, 
hsd_data, implicit)
 DEF_SVE_FUNCTION (svst1wq, store, s_data, implicit)
 #undef REQUIRED_EXTENSIONS
 
+#define REQUIRED_EXTENSIONS sve_and_sme (AARCH64_FL_SVE2p2, AARCH64_FL_SME2p2)
+DEF_SVE_FUNCTION (svcvtlt, unary_convert, cvt_long, z)
+DEF_SVE_FUNCTION (svcvtnt, unary_convert_narrowt, cvt_narrow, z)
+DEF_SVE_FUNCTION (svcvtnt, unary_convert_narrowt, cvt_bfloat, z)
+DEF_SVE_FUNCTION (svcvtxnt, unary_convert_narrowt, cvt_narrow_s, z)
+#undef REQUIRED_EXTENSIONS
+
 #define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME2)
 DEF_SVE_FUNCTION_GS (svadd, binary_single, all_integer, x24, none)
 DEF_SVE_FUNCTION_GS (svclamp, clamp, all_arith, x24, none)
diff --git a/gcc/config/aarch64/aarch64-sve.md 
b/gcc/config/aarch64/aarch64-sve.md
index c7fe14973f95..e36ff9d6468b 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -11374,15 +11374,19 @@
 ;;
 ;; This instructions does not take MOVPRFX.
 (define_insn "@aarch64_sve_cvtnt<mode>"
-  [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w")
+  [(set (match_operand:VNx8BF_ONLY 0 "register_operand")
        (unspec:VNx8BF_ONLY
-         [(match_operand:VNx4BI 2 "register_operand" "Upl")
+         [(match_operand:VNx4BI 2 "register_operand")
           (const_int SVE_STRICT_GP)
-          (match_operand:VNx8BF_ONLY 1 "register_operand" "0")
-          (match_operand:VNx4SF 3 "register_operand" "w")]
+          (match_operand:VNx8BF_ONLY 1 "register_operand")
+          (match_operand:VNx8BF_ONLY 4 "aarch64_constant_vector_operand")
+          (match_operand:VNx4SF 3 "register_operand")]
          UNSPEC_COND_FCVTNT))]
-  "TARGET_SVE_BF16"
-  "bfcvtnt\t%0.h, %2/m, %3.s"
+  "TARGET_SVE_BF16 || TARGET_SVE2p2_OR_SME2p2"
+  {@ [ cons: =0 , 1 , 2   , 3 , 4   ; attrs: arch ]
+     [ w        , 0 , Upl , w , vs1 ; *                ] bfcvtnt\t%0.h, %2/m, 
%3.s
+     [ w        , 0 , Upl , w , Dz  ; sve2p2_or_sme2p2 ] bfcvtnt\t%0.h, %2/z, 
%3.s
+  }
   [(set_attr "sve_type" "sve_fp_cvt")]
 )
 
diff --git a/gcc/config/aarch64/aarch64-sve2.md 
b/gcc/config/aarch64/aarch64-sve2.md
index 995b08f084cf..97beccbe51f7 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -3479,18 +3479,21 @@
 
 ;; These instructions do not take MOVPRFX.
 (define_insn_and_rewrite "*cond_<sve_fp_op><mode>_relaxed"
-  [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w")
+  [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
        (unspec:SVE_FULL_SDF
-         [(match_operand:<VPRED> 1 "register_operand" "Upl")
+         [(match_operand:<VPRED> 1 "register_operand")
           (unspec:SVE_FULL_SDF
             [(match_operand 4)
              (const_int SVE_RELAXED_GP)
-             (match_operand:<VNARROW> 2 "register_operand" "w")]
+             (match_operand:<VNARROW> 2 "register_operand")]
             SVE2_COND_FP_UNARY_LONG)
-          (match_operand:SVE_FULL_SDF 3 "register_operand" "0")]
+          (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_direct_zero")]
          UNSPEC_SEL))]
   "TARGET_SVE2"
-  "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype>"
+  {@ [ cons: =0 , 1   , 2 , 3  ; attrs: arch ]
+     [ w        , Upl , w , 0  ; *                ] <sve_fp_op>\t%0.<Vetype>, 
%1/m, %2.<Ventype>
+     [ w        , Upl , w , Dz ; sve2p2_or_sme2p2 ] <sve_fp_op>\t%0.<Vetype>, 
%1/z, %2.<Ventype>
+  }
   "&& !rtx_equal_p (operands[1], operands[4])"
   {
     operands[4] = copy_rtx (operands[1]);
@@ -3499,18 +3502,21 @@
 )
 
 (define_insn "*cond_<sve_fp_op><mode>_strict"
-  [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w")
+  [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
        (unspec:SVE_FULL_SDF
-         [(match_operand:<VPRED> 1 "register_operand" "Upl")
+         [(match_operand:<VPRED> 1 "register_operand")
           (unspec:SVE_FULL_SDF
             [(match_dup 1)
              (const_int SVE_STRICT_GP)
-             (match_operand:<VNARROW> 2 "register_operand" "w")]
+             (match_operand:<VNARROW> 2 "register_operand")]
             SVE2_COND_FP_UNARY_LONG)
-          (match_operand:SVE_FULL_SDF 3 "register_operand" "0")]
+          (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_direct_zero")]
          UNSPEC_SEL))]
   "TARGET_SVE2"
-  "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype>"
+  {@ [ cons: =0 , 1   , 2 , 3  ; attrs: arch ]
+     [ w        , Upl , w , 0  ; *                ] <sve_fp_op>\t%0.<Vetype>, 
%1/m, %2.<Ventype>
+     [ w        , Upl , w , Dz ; sve2p2_or_sme2p2 ] <sve_fp_op>\t%0.<Vetype>, 
%1/z, %2.<Ventype>
+  }
   [(set_attr "sve_type" "sve_fp_cvt")]
 )
 
@@ -3540,15 +3546,19 @@
 ;;
 ;; These instructions do not take MOVPRFX.
 (define_insn "@aarch64_sve_cvtnt<mode>"
-  [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w")
+  [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
        (unspec:SVE_FULL_HSF
-         [(match_operand:<VWIDE_PRED> 2 "register_operand" "Upl")
+         [(match_operand:<VWIDE_PRED> 2 "register_operand")
           (const_int SVE_STRICT_GP)
-          (match_operand:SVE_FULL_HSF 1 "register_operand" "0")
-          (match_operand:<VWIDE> 3 "register_operand" "w")]
+          (match_operand:SVE_FULL_HSF 1 "register_operand")
+          (match_operand:SVE_FULL_HSF 4 "aarch64_constant_vector_operand")
+          (match_operand:<VWIDE> 3 "register_operand")]
          UNSPEC_COND_FCVTNT))]
   "TARGET_SVE2"
-  "fcvtnt\t%0.<Vetype>, %2/m, %3.<Vewtype>"
+  {@ [ cons: =0 , 1 , 2   , 3 , 4   ; attrs: arch ]
+     [ w        , 0 , Upl , w , vs1 ; *                ] fcvtnt\t%0.<Vetype>, 
%2/m, %3.<Vewtype>
+     [ w        , 0 , Upl , w , Dz  ; sve2p2_or_sme2p2 ] fcvtnt\t%0.<Vetype>, 
%2/z, %3.<Vewtype>
+  }
   [(set_attr "sve_type" "sve_fp_cvt")]
 )
 
@@ -3636,18 +3646,23 @@
 ;;
 ;; These instructions do not take MOVPRFX.
 (define_insn "@aarch64_sve2_cvtxnt<mode>"
-  [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
+  [(set (match_operand:<VNARROW> 0 "register_operand")
        (unspec:<VNARROW>
-         [(match_operand:<VPRED> 2 "register_operand" "Upl")
+         [(match_operand:<VPRED> 2 "register_operand")
           (const_int SVE_STRICT_GP)
-          (match_operand:<VNARROW> 1 "register_operand" "0")
-          (match_operand:VNx2DF_ONLY 3 "register_operand" "w")]
+          (match_operand:<VNARROW> 1 "register_operand")
+          (match_operand:<VNARROW> 4 "aarch64_constant_vector_operand")
+          (match_operand:VNx2DF_ONLY 3 "register_operand")]
          UNSPEC_COND_FCVTXNT))]
   "TARGET_SVE2"
-  "fcvtxnt\t%0.<Ventype>, %2/m, %3.<Vetype>"
+  {@ [ cons: =0 , 1 , 2   , 3 , 4    ; attrs: arch ]
+     [ w        , 0 , Upl , w , vs1  ; *                ] 
fcvtxnt\t%0.<Ventype>, %2/m, %3.<Vetype>
+     [ w        , 0 , Upl , w , Dz   ; sve2p2_or_sme2p2 ] 
fcvtxnt\t%0.<Ventype>, %2/z, %3.<Vetype>
+  }
   [(set_attr "sve_type" "sve_fp_cvt")]
 )
 
+
 ;; -------------------------------------------------------------------------
 ;; ---- [FP<-FP] Multi-vector widening conversions
 ;; -------------------------------------------------------------------------
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtlt_f32_z.c 
b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtlt_f32_z.c
new file mode 100644
index 000000000000..c7ca18e63867
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtlt_f32_z.c
@@ -0,0 +1,28 @@
+/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p2"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2p2"
+#endif
+
+/*
+** cvtlt_f32_f16_z_tied1:
+**     fcvtlt  z0\.s, p0/z, z0\.h
+**     ret
+*/
+TEST_DUAL_Z_REV (cvtlt_f32_f16_z_tied1, svfloat32_t, svfloat16_t,
+                z0_res = svcvtlt_f32_f16_z (p0, z0),
+                z0_res = svcvtlt_f32_z (p0, z0))
+
+/*
+** cvtlt_f32_f16_z_untied:
+**     fcvtlt  z0\.s, p0/z, z4\.h
+**     ret
+*/
+TEST_DUAL_Z (cvtlt_f32_f16_z_untied, svfloat32_t, svfloat16_t,
+                z0 = svcvtlt_f32_f16_z (p0, z4),
+                z0 = svcvtlt_f32_z (p0, z4))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtlt_f64_z.c 
b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtlt_f64_z.c
new file mode 100644
index 000000000000..29229c022fb5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtlt_f64_z.c
@@ -0,0 +1,28 @@
+/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p2"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2p2"
+#endif
+
+/*
+** cvtlt_f64_f32_z_tied1:
+**     fcvtlt  z0\.d, p0/z, z0\.s
+**     ret
+*/
+TEST_DUAL_Z_REV (cvtlt_f64_f32_z_tied1, svfloat64_t, svfloat32_t,
+                z0_res = svcvtlt_f64_f32_z (p0, z0),
+                z0_res = svcvtlt_f64_z (p0, z0))
+
+/*
+** cvtlt_f64_f32_z_untied:
+**     fcvtlt  z0\.d, p0/z, z4\.s
+**     ret
+*/
+TEST_DUAL_Z (cvtlt_f64_f32_z_untied, svfloat64_t, svfloat32_t,
+                z0 = svcvtlt_f64_f32_z (p0, z4),
+                z0 = svcvtlt_f64_z (p0, z4))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtnt_bf16_z.c 
b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtnt_bf16_z.c
new file mode 100644
index 000000000000..2f23e49d7823
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtnt_bf16_z.c
@@ -0,0 +1,34 @@
+/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p2+bf16"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2p2"
+#endif
+
+/*
+** cvtnt_bf16_f32_z_tied1:
+**     bfcvtnt z0\.h, p0/z, z4\.s
+**     ret
+*/
+TEST_DUAL_Z (cvtnt_bf16_f32_z_tied1, svbfloat16_t, svfloat32_t,
+                z0 = svcvtnt_bf16_f32_z (z0, p0, z4),
+                z0 = svcvtnt_bf16_z (z0, p0, z4))
+
+/*
+** cvtnt_bf16_f32_z_untied:
+** (
+**     mov     z0\.d, z1\.d
+**     bfcvtnt z0\.h, p0/z, z4\.s
+** |
+**     bfcvtnt z1\.h, p0/z, z4\.s
+**     mov     z0\.d, z1\.d
+** )
+**     ret
+*/
+TEST_DUAL_Z (cvtnt_bf16_f32_z_untied, svbfloat16_t, svfloat32_t,
+            z0 = svcvtnt_bf16_f32_z (z1, p0, z4),
+            z0 = svcvtnt_bf16_z (z1, p0, z4))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtnt_f16_z.c 
b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtnt_f16_z.c
new file mode 100644
index 000000000000..d05a0006a82a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtnt_f16_z.c
@@ -0,0 +1,34 @@
+/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p2"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2p2"
+#endif
+
+/*
+** cvtnt_f16_f32_z_tied1:
+**     fcvtnt  z0\.h, p0/z, z4\.s
+**     ret
+*/
+TEST_DUAL_Z (cvtnt_f16_f32_z_tied1, svfloat16_t, svfloat32_t,
+            z0 = svcvtnt_f16_f32_z (z0, p0, z4),
+            z0 = svcvtnt_f16_z (z0, p0, z4))
+
+/*
+** cvtnt_f16_f32_z_untied:
+** (
+**     mov     z0\.d, z1\.d
+**     fcvtnt  z0\.h, p0/z, z4\.s
+** |
+**     fcvtnt  z1\.h, p0/z, z4\.s
+**     mov     z0\.d, z1\.d
+** )
+**     ret
+*/
+TEST_DUAL_Z (cvtnt_f16_f32_z_untied, svfloat16_t, svfloat32_t,
+            z0 = svcvtnt_f16_f32_z (z1, p0, z4),
+            z0 = svcvtnt_f16_z (z1, p0, z4))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtnt_f32_z.c 
b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtnt_f32_z.c
new file mode 100644
index 000000000000..042cf1c6784e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtnt_f32_z.c
@@ -0,0 +1,34 @@
+/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p2"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2p2"
+#endif
+
+/*
+** cvtnt_f32_f64_z_tied1:
+**     fcvtnt  z0\.s, p0/z, z4\.d
+**     ret
+*/
+TEST_DUAL_Z (cvtnt_f32_f64_z_tied1, svfloat32_t, svfloat64_t,
+            z0 = svcvtnt_f32_f64_z (z0, p0, z4),
+            z0 = svcvtnt_f32_z (z0, p0, z4))
+
+/*
+** cvtnt_f32_f64_z_untied:
+** (
+**     mov     z0\.d, z1\.d
+**     fcvtnt  z0\.s, p0/z, z4\.d
+** |
+**     fcvtnt  z1\.s, p0/z, z4\.d
+**     mov     z0\.d, z1\.d
+** )
+**     ret
+*/
+TEST_DUAL_Z (cvtnt_f32_f64_z_untied, svfloat32_t, svfloat64_t,
+            z0 = svcvtnt_f32_f64_z (z1, p0, z4),
+            z0 = svcvtnt_f32_z (z1, p0, z4))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtxnt_f32_z.c 
b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtxnt_f32_z.c
new file mode 100644
index 000000000000..0f12bca6b55a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cvtxnt_f32_z.c
@@ -0,0 +1,34 @@
+/* { dg-do assemble { target aarch64_asm_sve2p2_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p2_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p2"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2p2"
+#endif
+
+/*
+** cvtxnt_f32_f64_z_tied1:
+**     fcvtxnt z0\.s, p0/z, z4\.d
+**     ret
+*/
+TEST_DUAL_Z (cvtxnt_f32_f64_z_tied1, svfloat32_t, svfloat64_t,
+            z0 = svcvtxnt_f32_f64_z (z0, p0, z4),
+            z0 = svcvtxnt_f32_z (z0, p0, z4))
+
+/*
+** cvtxnt_f32_f64_z_untied:
+** (
+**     mov     z0\.d, z1\.d
+**     fcvtxnt z0\.s, p0/z, z4\.d
+** |
+**     fcvtxnt z1\.s, p0/z, z4\.d
+**     mov     z0\.d, z1\.d
+** )
+**     ret
+*/
+TEST_DUAL_Z (cvtxnt_f32_f64_z_untied, svfloat32_t, svfloat64_t,
+            z0 = svcvtxnt_f32_f64_z (z1, p0, z4),
+            z0 = svcvtxnt_f32_z (z1, p0, z4))

[gcc r17-976] aarch64: add zeroing forms for predicated SVE top FP conversions

Reply via email to