[PATCH 11/12] aarch64: Use VNx16BI for svdupq_b*

Richard Sandiford Tue, 29 Jul 2025 09:46:41 -0700

This patch continues the work of making ACLE intrinsics use VNx16BI
for svbool_t results.  It deals with the predicate forms of svdupq.


The general predicate expansion builds an equivalent integer vector
and then compares it with zero.  This patch therefore relies on
the earlier patches to the comparison patterns.

gcc/
        * config/aarch64/aarch64-protos.h
        (aarch64_convert_sve_data_to_pred): Remove the mode argument.
        * config/aarch64/aarch64.cc
        (aarch64_sve_emit_int_cmp): Allow PRED_MODE to be VNx16BI or
        the natural predicate mode for the data mode.
        (aarch64_convert_sve_data_to_pred): Remove the mode argument
        and instead always create a VNx16BI result.
        (aarch64_expand_sve_const_pred): Update call accordingly.
        * config/aarch64/aarch64-sve-builtins-base.cc
        (svdupq_impl::expand): Likewise, ensuring that the result
        has mode VNx16BI.

gcc/testsuite/
        * gcc.target/aarch64/sve/acle/general/dupq_13.c: New test.
---
 gcc/config/aarch64/aarch64-protos.h           |  2 +-
 .../aarch64/aarch64-sve-builtins-base.cc      |  3 +-
 gcc/config/aarch64/aarch64.cc                 | 26 ++++++-----
 .../aarch64/sve/acle/general/dupq_13.c        | 45 +++++++++++++++++++
 4 files changed, 63 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_13.c

diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index 8f2fc9d2f97..7d3312b9918 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1020,7 +1020,7 @@ void aarch64_err_no_fpadvsimd (machine_mode);
 void aarch64_expand_epilogue (rtx_call_insn *);
 rtx aarch64_ptrue_all (unsigned int);
 opt_machine_mode aarch64_ptrue_all_mode (rtx);
-rtx aarch64_convert_sve_data_to_pred (rtx, machine_mode, rtx);
+rtx aarch64_convert_sve_data_to_pred (rtx, rtx);
 rtx aarch64_expand_sve_dupq (rtx, machine_mode, rtx);
 void aarch64_expand_mov_immediate (rtx, rtx);
 rtx aarch64_stack_protect_canary_mem (machine_mode, rtx, aarch64_salt_type);
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc 
b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index 314d53ec9ad..ecc06877cac 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -1215,8 +1215,7 @@ public:
     if (mode != e.vector_mode (0))
       {
        rtx data_dupq = aarch64_expand_sve_dupq (NULL, mode, vq_reg);
-       return aarch64_convert_sve_data_to_pred (e.possible_target,
-                                                e.vector_mode (0), data_dupq);
+       return aarch64_convert_sve_data_to_pred (e.possible_target, data_dupq);
       }
 
     return aarch64_expand_sve_dupq (e.possible_target, mode, vq_reg);
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 3c8d08b7fdf..8b2395f243e 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -3935,16 +3935,24 @@ aarch64_sve_fp_pred (machine_mode data_mode, rtx 
*strictness)
 
 /* Emit a comparison CMP between OP0 and OP1, both of which have mode
    DATA_MODE, and return the result in a predicate of mode PRED_MODE.
-   Use TARGET as the target register if nonnull and convenient.  */
+   Use TARGET as the target register if nonnull and convenient.
+
+   PRED_MODE can be either VNx16BI or the natural predicate mode for
+   DATA_MODE.  */
 
 static rtx
 aarch64_sve_emit_int_cmp (rtx target, machine_mode pred_mode, rtx_code cmp,
                          machine_mode data_mode, rtx op1, rtx op2)
 {
-  insn_code icode = code_for_aarch64_pred_cmp (cmp, data_mode);
+  auto src_pred_mode = aarch64_sve_pred_mode (data_mode);
+  insn_code icode;
+  if (known_eq (GET_MODE_NUNITS (pred_mode), GET_MODE_NUNITS (data_mode)))
+    icode = code_for_aarch64_pred_cmp (cmp, data_mode);
+  else
+    icode = code_for_aarch64_pred_cmp_acle (cmp, data_mode);
   expand_operand ops[5];
   create_output_operand (&ops[0], target, pred_mode);
-  create_input_operand (&ops[1], CONSTM1_RTX (pred_mode), pred_mode);
+  create_input_operand (&ops[1], CONSTM1_RTX (src_pred_mode), src_pred_mode);
   create_integer_operand (&ops[2], SVE_KNOWN_PTRUE);
   create_input_operand (&ops[3], op1, data_mode);
   create_input_operand (&ops[4], op2, data_mode);
@@ -3952,15 +3960,14 @@ aarch64_sve_emit_int_cmp (rtx target, machine_mode 
pred_mode, rtx_code cmp,
   return ops[0].value;
 }
 
-/* Use a comparison to convert integer vector SRC into MODE, which is
-   the corresponding SVE predicate mode.  Use TARGET for the result
-   if it's nonnull and convenient.  */
+/* Use a comparison to convert integer vector SRC into VNx16BI.
+   Use TARGET for the result if it's nonnull and convenient.  */
 
 rtx
-aarch64_convert_sve_data_to_pred (rtx target, machine_mode mode, rtx src)
+aarch64_convert_sve_data_to_pred (rtx target, rtx src)
 {
   machine_mode src_mode = GET_MODE (src);
-  return aarch64_sve_emit_int_cmp (target, mode, NE, src_mode,
+  return aarch64_sve_emit_int_cmp (target, VNx16BImode, NE, src_mode,
                                   src, CONST0_RTX (src_mode));
 }
 
@@ -6272,8 +6279,7 @@ aarch64_expand_sve_const_pred (rtx target, 
rtx_vector_builder &builder)
   for (unsigned int i = 0; i < builder.encoded_nelts (); ++i)
     int_builder.quick_push (INTVAL (builder.elt (i))
                            ? constm1_rtx : const0_rtx);
-  return aarch64_convert_sve_data_to_pred (target, VNx16BImode,
-                                          int_builder.build ());
+  return aarch64_convert_sve_data_to_pred (target, int_builder.build ());
 }
 
 /* Set DEST to immediate IMM.  */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_13.c 
b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_13.c
new file mode 100644
index 00000000000..6d702b86b7d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_13.c
@@ -0,0 +1,45 @@
+/* { dg-options "-O2" } */
+
+#include <arm_sve.h>
+
+svbool_t
+test1 (int x0, int x1)
+{
+  return svand_z (svptrue_b8 (), svdupq_b64 (x0, x1), svptrue_b16 ());
+}
+
+svbool_t
+test2 (int x0, int x1, int x2, int x3)
+{
+  return svand_z (svptrue_b8 (), svdupq_b32 (x0, x1, x2, x3), svptrue_b16 ());
+}
+
+svbool_t
+test3 (int x0, int x1, int x2, int x3)
+{
+  return svand_z (svptrue_b32 (), svdupq_b32 (x0, x1, x2, x3), svptrue_b16 ());
+}
+
+svbool_t
+test4 (int x0, int x1, int x2, int x3)
+{
+  return svand_z (svptrue_b32 (), svdupq_b32 (x0, x1, x2, x3), svptrue_b32 ());
+}
+
+svbool_t
+test5 (int x0, int x1, int x2, int x3)
+{
+  return svand_z (svptrue_b8 (),
+                 svdupq_b16 (x0, x1, x2, x3, x2, x0, x1, x3),
+                 svptrue_b32 ());
+}
+
+svbool_t
+test6 (int x0, int x1, int x2, int x3)
+{
+  return svand_z (svptrue_b64 (),
+                 svdupq_b16 (x0, x1, x2, x3, x2, x0, x1, x3),
+                 svptrue_b16 ());
+}
+
+/* { dg-final { scan-assembler-not {\tand\tp} } } */
-- 
2.43.0

[PATCH 11/12] aarch64: Use VNx16BI for svdupq_b*

Reply via email to