This patch adds unpacked support for unconditional and
conditional CNOT.  The type suffix has to be taken from
the element size rather than the container size.

Tested on aarch64-linux-gnu and aarch64_be-elf.  Pushed to trunk.

Richard


gcc/
        * config/aarch64/aarch64-sve.md (*cnot<mode>): Extend from
        SVE_FULL_I to SVE_I.
        (*cond_cnot<mode>_2, *cond_cnot<mode>_any): Likewise.

gcc/testsuite/
        * gcc.target/aarch64/sve/cnot_2.c: New test.
        * gcc.target/aarch64/sve/cond_cnot_4.c: Likewise.
        * gcc.target/aarch64/sve/cond_cnot_4_run.c: Likewise.
        * gcc.target/aarch64/sve/cond_cnot_5.c: Likewise.
        * gcc.target/aarch64/sve/cond_cnot_5_run.c: Likewise.
        * gcc.target/aarch64/sve/cond_cnot_6.c: Likewise.
        * gcc.target/aarch64/sve/cond_cnot_6_run.c: Likewise.
---
 gcc/config/aarch64/aarch64-sve.md             | 36 +++++++++----------
 gcc/testsuite/gcc.target/aarch64/sve/cnot_2.c | 29 +++++++++++++++
 .../gcc.target/aarch64/sve/cond_cnot_4.c      | 32 +++++++++++++++++
 .../gcc.target/aarch64/sve/cond_cnot_4_run.c  | 26 ++++++++++++++
 .../gcc.target/aarch64/sve/cond_cnot_5.c      | 32 +++++++++++++++++
 .../gcc.target/aarch64/sve/cond_cnot_5_run.c  | 26 ++++++++++++++
 .../gcc.target/aarch64/sve/cond_cnot_6.c      | 31 ++++++++++++++++
 .../gcc.target/aarch64/sve/cond_cnot_6_run.c  | 26 ++++++++++++++
 8 files changed, 220 insertions(+), 18 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cnot_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4_run.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_5.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_5_run.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6_run.c

diff --git a/gcc/config/aarch64/aarch64-sve.md 
b/gcc/config/aarch64/aarch64-sve.md
index b83f9912cb6..2f5a5e3c914 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -3227,16 +3227,16 @@ (define_expand "@aarch64_pred_cnot<mode>"
 )
 
 (define_insn "*cnot<mode>"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
-       (unspec:SVE_FULL_I
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+       (unspec:SVE_I
          [(unspec:<VPRED>
             [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
              (match_operand:SI 5 "aarch64_sve_ptrue_flag")
              (eq:<VPRED>
-               (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
-               (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))]
+               (match_operand:SVE_I 2 "register_operand" "0, w")
+               (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
             UNSPEC_PRED_Z)
-          (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one")
+          (match_operand:SVE_I 4 "aarch64_simd_imm_one")
           (match_dup 3)]
          UNSPEC_SEL))]
   "TARGET_SVE"
@@ -3274,19 +3274,19 @@ (define_expand "@cond_cnot<mode>"
 
 ;; Predicated logical inverse, merging with the first input.
 (define_insn_and_rewrite "*cond_cnot<mode>_2"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
-       (unspec:SVE_FULL_I
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+       (unspec:SVE_I
          [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
           ;; Logical inverse of operand 2 (as above).
-          (unspec:SVE_FULL_I
+          (unspec:SVE_I
             [(unspec:<VPRED>
                [(match_operand 5)
                 (const_int SVE_KNOWN_PTRUE)
                 (eq:<VPRED>
-                  (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
-                  (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))]
+                  (match_operand:SVE_I 2 "register_operand" "0, w")
+                  (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
                UNSPEC_PRED_Z)
-             (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one")
+             (match_operand:SVE_I 4 "aarch64_simd_imm_one")
              (match_dup 3)]
             UNSPEC_SEL)
           (match_dup 2)]
@@ -3310,22 +3310,22 @@ (define_insn_and_rewrite "*cond_cnot<mode>_2"
 ;; as earlyclobber helps to make the instruction more regular to the
 ;; register allocator.
 (define_insn_and_rewrite "*cond_cnot<mode>_any"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, ?&w, ?&w")
-       (unspec:SVE_FULL_I
+  [(set (match_operand:SVE_I 0 "register_operand" "=&w, ?&w, ?&w")
+       (unspec:SVE_I
          [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
           ;; Logical inverse of operand 2 (as above).
-          (unspec:SVE_FULL_I
+          (unspec:SVE_I
             [(unspec:<VPRED>
                [(match_operand 5)
                 (const_int SVE_KNOWN_PTRUE)
                 (eq:<VPRED>
-                  (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")
-                  (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))]
+                  (match_operand:SVE_I 2 "register_operand" "w, w, w")
+                  (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
                UNSPEC_PRED_Z)
-             (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one")
+             (match_operand:SVE_I 4 "aarch64_simd_imm_one")
              (match_dup 3)]
             UNSPEC_SEL)
-          (match_operand:SVE_FULL_I 6 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+          (match_operand:SVE_I 6 "aarch64_simd_reg_or_zero" "0, Dz, w")]
          UNSPEC_SEL))]
   "TARGET_SVE && !rtx_equal_p (operands[2], operands[6])"
   "@
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cnot_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/cnot_2.c
new file mode 100644
index 00000000000..fe778234424
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cnot_2.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define DEF_LOOP(TYPE1, TYPE2, COUNT)                          \
+  void __attribute__ ((noipa))                                 \
+  test_##TYPE1##_##TYPE2##_##TYPE3 (TYPE2 *restrict r,         \
+                                   TYPE1 *restrict pred,       \
+                                   TYPE2 *restrict a)          \
+  {                                                            \
+    for (int i = 0; i < COUNT; ++i)                            \
+      if (pred[i])                                             \
+       r[i] = !a[i];                                           \
+  }
+
+#define TEST_ALL(T) \
+  T (int16_t, int8_t, 7) \
+  T (int32_t, int8_t, 3) \
+  T (int32_t, int16_t, 3) \
+  T (int64_t, int8_t, 5) \
+  T (int64_t, int16_t, 5) \
+  T (int64_t, int32_t, 5)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.b, p[0-7]/m,} 3 } } */
+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4.c 
b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4.c
new file mode 100644
index 00000000000..729d3f4f2ac
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define DEF_LOOP(TYPE1, TYPE2, COUNT)                          \
+  void __attribute__ ((noipa))                                 \
+  test_##TYPE1##_##TYPE2 (TYPE2 *__restrict r,                 \
+                         TYPE2 *__restrict a,                  \
+                         TYPE1 *__restrict pred)               \
+  {                                                            \
+    for (int i = 0; i < COUNT; ++i)                            \
+      r[i] = pred[i] ? !a[i] : a[i];                           \
+  }
+
+#define TEST_ALL(T) \
+  T (int16_t, int8_t, 7) \
+  T (int32_t, int8_t, 3) \
+  T (int32_t, int16_t, 3) \
+  T (int64_t, int8_t, 5) \
+  T (int64_t, int16_t, 5) \
+  T (int64_t, int32_t, 5)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.b, p[0-7]/m,} 3 } } */
+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-not {\tmov\tz} } } */
+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4_run.c 
b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4_run.c
new file mode 100644
index 00000000000..de9c0a502e9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4_run.c
@@ -0,0 +1,26 @@
+/* { dg-do run { target { aarch64_sve_hw } } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "cond_cnot_4.c"
+
+#define TEST_LOOP(TYPE1, TYPE2, N)                             \
+  {                                                            \
+    TYPE1 pred[N];                                             \
+    TYPE2 r[N], a[N];                                          \
+    for (int i = 0; i < N; ++i)                                        \
+      {                                                                \
+       a[i] = i & 1 ? 0 : 3 * (i + 1);                         \
+       pred[i] = (i % 3 < 2);                                  \
+       asm volatile ("" ::: "memory");                         \
+      }                                                                \
+    test_##TYPE1##_##TYPE2 (r, a, pred);                       \
+    for (int i = 0; i < N; ++i)                                        \
+      if (r[i] != (TYPE2) (pred[i] ? !a[i] : a[i]))            \
+       __builtin_abort ();                                     \
+  }
+
+int main ()
+{
+  TEST_ALL (TEST_LOOP)
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_5.c 
b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_5.c
new file mode 100644
index 00000000000..7318e108591
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_5.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define DEF_LOOP(TYPE1, TYPE2, COUNT)                          \
+  void __attribute__ ((noipa))                                 \
+  test_##TYPE1##_##TYPE2 (TYPE2 *__restrict r,                 \
+                         TYPE1 *__restrict a,                  \
+                         TYPE2 *__restrict b)                  \
+  {                                                            \
+    for (int i = 0; i < COUNT; ++i)                            \
+      r[i] = a[i] == 0 ? !b[i] : a[i];                         \
+  }
+
+#define TEST_ALL(T) \
+  T (int16_t, int8_t, 7) \
+  T (int32_t, int8_t, 3) \
+  T (int32_t, int16_t, 3) \
+  T (int64_t, int8_t, 5) \
+  T (int64_t, int16_t, 5) \
+  T (int64_t, int32_t, 5)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.b, p[0-7]/m,} 3 } } */
+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-not {\tmov\tz} } } */
+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_5_run.c 
b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_5_run.c
new file mode 100644
index 00000000000..f8f277c32c2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_5_run.c
@@ -0,0 +1,26 @@
+/* { dg-do run { target { aarch64_sve_hw } } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "cond_cnot_5.c"
+
+#define TEST_LOOP(TYPE1, TYPE2, N)                             \
+  {                                                            \
+    TYPE1 a[N];                                                        \
+    TYPE2 r[N], b[N];                                          \
+    for (int i = 0; i < N; ++i)                                        \
+      {                                                                \
+       a[i] = i % 3 < 2 ? 0 : i * 42;                          \
+       b[i] = i & 1 ? 0 : 3 * (i + 1);                         \
+       asm volatile ("" ::: "memory");                         \
+      }                                                                \
+    test_##TYPE1##_##TYPE2 (r, a, b);                          \
+    for (int i = 0; i < N; ++i)                                        \
+      if (r[i] != (TYPE2) (a[i] == 0 ? !b[i] : a[i]))          \
+       __builtin_abort ();                                     \
+  }
+
+int main ()
+{
+  TEST_ALL (TEST_LOOP)
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6.c 
b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6.c
new file mode 100644
index 00000000000..d44e357f44a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define DEF_LOOP(TYPE1, TYPE2, COUNT)                          \
+  void __attribute__ ((noipa))                                 \
+  test_##TYPE1##_##TYPE2 (TYPE2 *__restrict r,                 \
+                         TYPE1 *__restrict a,                  \
+                         TYPE2 *__restrict b)                  \
+  {                                                            \
+    for (int i = 0; i < COUNT; ++i)                            \
+      r[i] = a[i] == 0 ? !b[i] : 127;                          \
+  }
+
+#define TEST_ALL(T) \
+  T (int16_t, int8_t, 7) \
+  T (int32_t, int8_t, 3) \
+  T (int32_t, int16_t, 3) \
+  T (int64_t, int8_t, 5) \
+  T (int64_t, int16_t, 5) \
+  T (int64_t, int32_t, 5)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.b, p[0-7]/m,} 3 } } */
+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-not {\tmov\tz[^\n]*z} } } */
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6_run.c 
b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6_run.c
new file mode 100644
index 00000000000..9e33616dc8f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6_run.c
@@ -0,0 +1,26 @@
+/* { dg-do run { target { aarch64_sve_hw } } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "cond_cnot_6.c"
+
+#define TEST_LOOP(TYPE1, TYPE2, N)                             \
+  {                                                            \
+    TYPE1 a[N];                                                        \
+    TYPE2 r[N], b[N];                                          \
+    for (int i = 0; i < N; ++i)                                        \
+      {                                                                \
+       a[i] = i % 3 < 2 ? 0 : i * 42;                          \
+       b[i] = i & 1 ? 0 : 3 * (i + 1);                         \
+       asm volatile ("" ::: "memory");                         \
+      }                                                                \
+    test_##TYPE1##_##TYPE2 (r, a, b);                          \
+    for (int i = 0; i < N; ++i)                                        \
+      if (r[i] != (TYPE2) (a[i] == 0 ? !b[i] : 127))           \
+       __builtin_abort ();                                     \
+  }
+
+int main ()
+{
+  TEST_ALL (TEST_LOOP)
+  return 0;
+}

Reply via email to