[gcc r16-4560] AArch64: Add support for boolean reductions for Adv. SIMD

Tamar Christina via Gcc-cvs Wed, 22 Oct 2025 02:52:28 -0700

https://gcc.gnu.org/g:9f54332461238a365acb4e1ada5d0327e4c93644


commit r16-4560-g9f54332461238a365acb4e1ada5d0327e4c93644
Author: Tamar Christina <[email protected]>
Date:   Wed Oct 22 10:51:41 2025 +0100

    AArch64: Add support for boolean reductions for Adv. SIMD
    
    The vectorizer has learned how to do boolean reductions of masks to a C bool
    for the operations OR, XOR and AND.
    
    This implements the new optabs for Adv.SIMD.  Adv.SIMD today can already
    vectorize such loops but does so through SHIFT-AND-INSERT to perform the
    reductions step-wise and inorder.  As an example, an OR reduction today 
does:
    
            movi    v3.4s, 0
            ext     v5.16b, v30.16b, v3.16b, #8
            orr     v5.16b, v5.16b, v30.16b
            ext     v29.16b, v5.16b, v3.16b, #4
            orr     v29.16b, v29.16b, v5.16b
            ext     v4.16b, v29.16b, v3.16b, #2
            orr     v4.16b, v4.16b, v29.16b
            ext     v3.16b, v4.16b, v3.16b, #1
            orr     v3.16b, v3.16b, v4.16b
            fmov    w1, s3
            and     w1, w1, 1
    
    For reducing to a boolean however we don't need the stepwise reduction and 
can
    just look at the bit patterns. For e.g. OR we now generate:
    
            umaxp   v3.4s, v3.4s, v3.4s
            fmov    x1, d3
            cmp     x1, 0
            cset    w0, ne
    
    For the remaining codegen see test vect-reduc-bool-9.c.
    
    gcc/ChangeLog:
    
            * config/aarch64/aarch64-simd.md (reduc_sbool_and_scal_<mode>,
            reduc_sbool_ior_scal_<mode>, reduc_sbool_xor_scal_<mode>): New.
            * config/aarch64/iterators.md (VALLI): New.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/aarch64/vect-reduc-bool-1.c: New test.
            * gcc.target/aarch64/vect-reduc-bool-2.c: New test.
            * gcc.target/aarch64/vect-reduc-bool-3.c: New test.
            * gcc.target/aarch64/vect-reduc-bool-4.c: New test.
            * gcc.target/aarch64/vect-reduc-bool-5.c: New test.
            * gcc.target/aarch64/vect-reduc-bool-6.c: New test.
            * gcc.target/aarch64/vect-reduc-bool-7.c: New test.
            * gcc.target/aarch64/vect-reduc-bool-8.c: New test.
            * gcc.target/aarch64/vect-reduc-bool-9.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64-simd.md                 | 97 ++++++++++++++++++++++
 gcc/config/aarch64/iterators.md                    |  3 +
 .../gcc.target/aarch64/vect-reduc-bool-1.c         | 51 ++++++++++++
 .../gcc.target/aarch64/vect-reduc-bool-2.c         | 51 ++++++++++++
 .../gcc.target/aarch64/vect-reduc-bool-3.c         | 51 ++++++++++++
 .../gcc.target/aarch64/vect-reduc-bool-4.c         | 51 ++++++++++++
 .../gcc.target/aarch64/vect-reduc-bool-5.c         | 49 +++++++++++
 .../gcc.target/aarch64/vect-reduc-bool-6.c         | 49 +++++++++++
 .../gcc.target/aarch64/vect-reduc-bool-7.c         | 49 +++++++++++
 .../gcc.target/aarch64/vect-reduc-bool-8.c         | 49 +++++++++++
 .../gcc.target/aarch64/vect-reduc-bool-9.c         | 63 ++++++++++++++
 11 files changed, 563 insertions(+)

diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index eaa8d57cc413..648a42f7d0f7 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -3469,6 +3469,103 @@
   DONE;
 })
 
+;; AND tree reductions.
+;; Check if after a min pairwise reduction that all the lanes are 1.
+;;
+;;   uminp  v1.4s, v1.4s, v1.4s
+;;   fmov   x1, d1
+;;   cmn    x1, #1
+;;   cset   w0, eq
+;;
+(define_expand "reduc_sbool_and_scal_<mode>"
+  [(set (match_operand:QI 0 "register_operand")
+       (unspec:QI [(match_operand:VALLI 1 "register_operand")]
+                   UNSPEC_ANDV))]
+  "TARGET_SIMD"
+{
+  rtx tmp = operands[1];
+  /* 128-bit vectors need to be compressed to 64-bits first.  */
+  if (known_eq (128, GET_MODE_BITSIZE (<MODE>mode)))
+    {
+      /* Always reduce using a V4SI.  */
+      rtx reduc = gen_lowpart (V4SImode, tmp);
+      rtx res = gen_reg_rtx (V4SImode);
+      emit_insn (gen_aarch64_uminpv4si (res, reduc, reduc));
+      emit_move_insn (tmp, gen_lowpart (<MODE>mode, res));
+    }
+  rtx val = gen_reg_rtx (DImode);
+  emit_move_insn (val, gen_lowpart (DImode, tmp));
+  rtx cc_reg = aarch64_gen_compare_reg (EQ, val, constm1_rtx);
+  rtx cmp = gen_rtx_fmt_ee (EQ, SImode, cc_reg, constm1_rtx);
+  rtx tmp2 = gen_reg_rtx (SImode);
+  emit_insn (gen_aarch64_cstoresi (tmp2, cmp, cc_reg));
+  emit_move_insn (operands[0], gen_lowpart (QImode, tmp2));
+  DONE;
+})
+
+;; IOR tree reductions.
+;; Check that after a MAX pairwise reduction any lane is not 0
+;;
+;;   umaxp  v1.4s, v1.4s, v1.4s
+;;   fmov   x1, d1
+;;   cmp    x1, 0
+;;   cset   w0, ne
+;;
+(define_expand "reduc_sbool_ior_scal_<mode>"
+  [(set (match_operand:QI 0 "register_operand")
+       (unspec:QI [(match_operand:VALLI 1 "register_operand")]
+                   UNSPEC_IORV))]
+  "TARGET_SIMD"
+{
+  rtx tmp = operands[1];
+  /* 128-bit vectors need to be compressed to 64-bits first.  */
+  if (known_eq (128, GET_MODE_BITSIZE (<MODE>mode)))
+    {
+      /* Always reduce using a V4SI.  */
+      rtx reduc = gen_lowpart (V4SImode, tmp);
+      rtx res = gen_reg_rtx (V4SImode);
+      emit_insn (gen_aarch64_umaxpv4si (res, reduc, reduc));
+      emit_move_insn (tmp, gen_lowpart (<MODE>mode, res));
+    }
+  rtx val = gen_reg_rtx (DImode);
+  emit_move_insn (val, gen_lowpart (DImode, tmp));
+  rtx cc_reg = aarch64_gen_compare_reg (NE, val, const0_rtx);
+  rtx cmp = gen_rtx_fmt_ee (NE, SImode, cc_reg, const0_rtx);
+  rtx tmp2 = gen_reg_rtx (SImode);
+  emit_insn (gen_aarch64_cstoresi (tmp2, cmp, cc_reg));
+  emit_move_insn (operands[0], gen_lowpart (QImode, tmp2));
+  DONE;
+})
+
+;; Unpredicated predicate XOR tree reductions.
+;; Check to see if the number of active lanes in the predicates is a multiple
+;; of 2.  We use a normal reduction after masking with 0x1.
+;;
+;;  movi  v1.16b, 0x1
+;;  and   v2.16b, v2.16b, v2.16b
+;;  addv  b3, v2.16b
+;;  fmov  w1, s3
+;;  and   w0, w1, 1
+;;
+(define_expand "reduc_sbool_xor_scal_<mode>"
+  [(set (match_operand:QI 0 "register_operand")
+       (unspec:QI [(match_operand:VALLI 1 "register_operand")]
+                   UNSPEC_XORV))]
+  "TARGET_SIMD"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  rtx one_reg = force_reg (<MODE>mode, CONST1_RTX (<MODE>mode));
+  emit_move_insn (tmp, gen_rtx_AND (<MODE>mode, operands[1], one_reg));
+  rtx tmp2 = gen_reg_rtx (<VEL>mode);
+  emit_insn (gen_reduc_plus_scal_<mode> (tmp2, tmp));
+  rtx tmp3 = gen_reg_rtx (DImode);
+  emit_move_insn (tmp3, gen_rtx_AND (DImode,
+                                    lowpart_subreg (DImode, tmp2, <VEL>mode),
+                                    const1_rtx));
+  emit_move_insn (operands[0], gen_lowpart (QImode, tmp2));
+  DONE;
+})
+
 ;; SADDLV and UADDLV can be expressed as an ADDV instruction that first
 ;; sign or zero-extends its elements.
 (define_insn "aarch64_<su>addlv<mode>"
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 3757998c0ea9..517b2808b5f7 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -217,6 +217,9 @@
 ;; All Advanced SIMD modes on which we support any arithmetic operations.
 (define_mode_iterator VALL [V8QI V16QI V4HI V8HI V2SI V4SI V2DI V2SF V4SF 
V2DF])
 
+;; All Advanced SIMD integer modes
+(define_mode_iterator VALLI [VDQ_BHSI V2DI])
+
 ;; All Advanced SIMD modes suitable for moving, loading, and storing.
 (define_mode_iterator VALL_F16 [V8QI V16QI V4HI V8HI V2SI V4SI V2DI
                                V4HF V8HF V4BF V8BF V2SF V4SF V2DF])
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-1.c 
b/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-1.c
new file mode 100644
index 000000000000..c9b1c85c222e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-1.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -march=armv8-a -mautovec-preference=asimd-only 
-fdump-tree-vect-details" }*/
+
+char p[128];
+
+bool __attribute__((noipa))
+fand (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r &= (p[i] != 0);
+  return r;
+}
+
+bool __attribute__((noipa))
+fior (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r |= (p[i] != 0);
+  return r;
+}
+
+int main()
+{
+  __builtin_memset (p, 1, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (!fand (n))
+      __builtin_abort ();
+
+  p[0] = 0;
+  for (int n = 1; n < 77; ++n)
+    if (fand (n))
+      __builtin_abort ();
+
+  __builtin_memset (p, 0, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (fior (n))
+      __builtin_abort ();
+
+  p[0] = 1;
+  for (int n = 1; n < 77; ++n)
+    if (!fior (n))
+      __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } 
*/
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-2.c 
b/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-2.c
new file mode 100644
index 000000000000..598d6c71ec84
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -march=armv8-a -mautovec-preference=asimd-only 
-fdump-tree-vect-details" }*/
+
+short p[128];
+
+bool __attribute__((noipa))
+fand (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r &= (p[i] != 0);
+  return r;
+}
+
+bool __attribute__((noipa))
+fior (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r |= (p[i] != 0);
+  return r;
+}
+
+int main()
+{
+  __builtin_memset (p, 1, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (!fand (n))
+      __builtin_abort ();
+
+  p[0] = 0;
+  for (int n = 1; n < 77; ++n)
+    if (fand (n))
+      __builtin_abort ();
+
+  __builtin_memset (p, 0, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (fior (n))
+      __builtin_abort ();
+
+  p[0] = 1;
+  for (int n = 1; n < 77; ++n)
+    if (!fior (n))
+      __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } 
*/
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-3.c 
b/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-3.c
new file mode 100644
index 000000000000..9517965753a7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-3.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -march=armv8-a -mautovec-preference=asimd-only 
-fdump-tree-vect-details" }*/
+
+int p[128];
+
+bool __attribute__((noipa))
+fand (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r &= (p[i] != 0);
+  return r;
+}
+
+bool __attribute__((noipa))
+fior (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r |= (p[i] != 0);
+  return r;
+}
+
+int main()
+{
+  __builtin_memset (p, 1, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (!fand (n))
+      __builtin_abort ();
+
+  p[0] = 0;
+  for (int n = 1; n < 77; ++n)
+    if (fand (n))
+      __builtin_abort ();
+
+  __builtin_memset (p, 0, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (fior (n))
+      __builtin_abort ();
+
+  p[0] = 1;
+  for (int n = 1; n < 77; ++n)
+    if (!fior (n))
+      __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } 
*/
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-4.c 
b/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-4.c
new file mode 100644
index 000000000000..3cd577f5ed59
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-4.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -march=armv8-a -mautovec-preference=asimd-only 
-fdump-tree-vect-details" }*/
+
+long long p[128];
+
+bool __attribute__((noipa))
+fand (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r &= (p[i] != 0);
+  return r;
+}
+
+bool __attribute__((noipa))
+fior (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r |= (p[i] != 0);
+  return r;
+}
+
+int main()
+{
+  __builtin_memset (p, 1, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (!fand (n))
+      __builtin_abort ();
+
+  p[0] = 0;
+  for (int n = 1; n < 77; ++n)
+    if (fand (n))
+      __builtin_abort ();
+
+  __builtin_memset (p, 0, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (fior (n))
+      __builtin_abort ();
+
+  p[0] = 1;
+  for (int n = 1; n < 77; ++n)
+    if (!fior (n))
+      __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } 
*/
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-5.c 
b/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-5.c
new file mode 100644
index 000000000000..c6fa63b7657e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-5.c
@@ -0,0 +1,49 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -march=armv8-a -mautovec-preference=asimd-only 
-fdump-tree-vect-details" }*/
+
+char p[128];
+
+bool __attribute__((noipa))
+fxort (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+bool __attribute__((noipa))
+fxorf (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+int main()
+{
+  __builtin_memset (p, 1, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (fxort (n) != !(n & 1))
+      __builtin_abort ();
+
+  for (int n = 0; n < 77; ++n)
+    if (fxorf (n) != (n & 1))
+      __builtin_abort ();
+
+  __builtin_memset (p, 0, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (!fxort (n))
+      __builtin_abort ();
+
+  for (int n = 0; n < 77; ++n)
+    if (fxorf (n))
+      __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } 
*/
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-6.c 
b/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-6.c
new file mode 100644
index 000000000000..6d12e6a7cb4f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-6.c
@@ -0,0 +1,49 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -march=armv8-a -mautovec-preference=asimd-only 
-fdump-tree-vect-details" }*/
+
+short p[128];
+
+bool __attribute__((noipa))
+fxort (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+bool __attribute__((noipa))
+fxorf (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+int main()
+{
+  __builtin_memset (p, 1, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (fxort (n) != !(n & 1))
+      __builtin_abort ();
+
+  for (int n = 0; n < 77; ++n)
+    if (fxorf (n) != (n & 1))
+      __builtin_abort ();
+
+  __builtin_memset (p, 0, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (!fxort (n))
+      __builtin_abort ();
+
+  for (int n = 0; n < 77; ++n)
+    if (fxorf (n))
+      __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } 
*/
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-7.c 
b/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-7.c
new file mode 100644
index 000000000000..58d6a785f9a0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-7.c
@@ -0,0 +1,49 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -march=armv8-a -mautovec-preference=asimd-only 
-fdump-tree-vect-details" }*/
+
+int p[128];
+
+bool __attribute__((noipa))
+fxort (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+bool __attribute__((noipa))
+fxorf (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+int main()
+{
+  __builtin_memset (p, 1, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (fxort (n) != !(n & 1))
+      __builtin_abort ();
+
+  for (int n = 0; n < 77; ++n)
+    if (fxorf (n) != (n & 1))
+      __builtin_abort ();
+
+  __builtin_memset (p, 0, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (!fxort (n))
+      __builtin_abort ();
+
+  for (int n = 0; n < 77; ++n)
+    if (fxorf (n))
+      __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" } } 
*/
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-8.c 
b/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-8.c
new file mode 100644
index 000000000000..18ad94a4bd7f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-8.c
@@ -0,0 +1,49 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -march=armv8-a -mautovec-preference=asimd-only 
-fdump-tree-vect-details" }*/
+
+long long p[128];
+
+bool __attribute__((noipa))
+fxort (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+bool __attribute__((noipa))
+fxorf (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+int main()
+{
+  __builtin_memset (p, 1, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (fxort (n) != !(n & 1))
+      __builtin_abort ();
+
+  for (int n = 0; n < 77; ++n)
+    if (fxorf (n) != (n & 1))
+      __builtin_abort ();
+
+  __builtin_memset (p, 0, sizeof(p));
+
+  for (int n = 0; n < 77; ++n)
+    if (!fxort (n))
+      __builtin_abort ();
+
+  for (int n = 0; n < 77; ++n)
+    if (fxorf (n))
+      __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 2 "vect" { 
target { vect_int && vect_condition } } } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-9.c 
b/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-9.c
new file mode 100644
index 000000000000..7d9a82f5fc3a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-reduc-bool-9.c
@@ -0,0 +1,63 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a -mautovec-preference=asimd-only 
-fno-schedule-insns -fno-reorder-blocks -fno-schedule-insns2 
-fdump-tree-vect-details" }*/
+/* { dg-final { check-function-bodies "**" "" } } */
+
+char p[128];
+
+/*
+** fand:
+**     ...
+**     uminp   v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+**     fmov    x[0-9]+, d[0-9]+
+**     cmn     x[0-9]+, #1
+**     cset    w[0-9]+, eq
+**     ...
+*/
+bool __attribute__((noipa))
+fand (int n)
+{
+  bool r = true;
+  for (int i = 0; i < n; ++i)
+    r &= (p[i] != 0);
+  return r;
+}
+
+/*
+** fior:
+**     ...
+**     umaxp   v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
+**     fmov    x[0-9]+, d[0-9]+
+**     cmp     x[0-9]+, 0
+**     cset    w[0-9]+, ne
+**     ...
+*/
+bool __attribute__((noipa))
+fior (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r |= (p[i] != 0);
+  return r;
+}
+
+/* 
+** fxor:
+**     ...
+**     movi    v[0-9]+.16b, 0x1
+**     and     v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
+**     addv    b[0-9]+, v[0-9]+.16b
+**     fmov    w[0-9]+, s[0-9]+
+**     and     w[0-9]+, w[0-9]+, 1
+**     ...
+*/
+bool __attribute__((noipa))
+fxor (int n)
+{
+  bool r = false;
+  for (int i = 0; i < n; ++i)
+    r ^= (p[i] != 0);
+  return r;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 3 "vect" } } 
*/
+

[gcc r16-4560] AArch64: Add support for boolean reductions for Adv. SIMD

Reply via email to