[pushed] aarch64: Add support for unpacked SVE MLA and MAD

Richard Sandiford via Gcc-patches Wed, 13 Jan 2021 05:02:16 -0800

This patch extends the MLA/MAD patterns to support unpacked
integer vectors.  The type suffix could be either the element
size or the container size, but using the element size should
be more efficient.


Tested on aarch64-linux-gnu and aarch64_be-elf, pushed to trunk.

Richard


gcc/
        * config/aarch64/aarch64-sve.md (fma<mode>4): Extend from SVE_FULL_I
        to SVE_I.
        (@aarch64_pred_fma<mode>, cond_fma<mode>, *cond_fma<mode>_2)
        (*cond_fma<mode>_4, *cond_fma<mode>_any): Likewise.

gcc/testsuite/
        * gcc.target/aarch64/sve/mla_2.c: New test.
        * g++.target/aarch64/sve/cond_mla_1.C: Likewise.
        * g++.target/aarch64/sve/cond_mla_2.C: Likewise.
        * g++.target/aarch64/sve/cond_mla_3.C: Likewise.
        * g++.target/aarch64/sve/cond_mla_4.C: Likewise.
        * g++.target/aarch64/sve/cond_mla_5.C: Likewise.
---
 gcc/config/aarch64/aarch64-sve.md             | 88 +++++++++----------
 .../g++.target/aarch64/sve/cond_mla_1.C       | 33 +++++++
 .../g++.target/aarch64/sve/cond_mla_2.C       | 33 +++++++
 .../g++.target/aarch64/sve/cond_mla_3.C       | 33 +++++++
 .../g++.target/aarch64/sve/cond_mla_4.C       | 36 ++++++++
 .../g++.target/aarch64/sve/cond_mla_5.C       | 33 +++++++
 gcc/testsuite/gcc.target/aarch64/sve/mla_2.c  | 34 +++++++
 7 files changed, 246 insertions(+), 44 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/aarch64/sve/cond_mla_1.C
 create mode 100644 gcc/testsuite/g++.target/aarch64/sve/cond_mla_2.C
 create mode 100644 gcc/testsuite/g++.target/aarch64/sve/cond_mla_3.C
 create mode 100644 gcc/testsuite/g++.target/aarch64/sve/cond_mla_4.C
 create mode 100644 gcc/testsuite/g++.target/aarch64/sve/cond_mla_5.C
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/mla_2.c

diff --git a/gcc/config/aarch64/aarch64-sve.md 
b/gcc/config/aarch64/aarch64-sve.md
index a6f8450f951..ac8a9b4b167 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -6554,15 +6554,15 @@ (define_insn "*<logical_nn><mode>3_ptest"
 
 ;; Unpredicated integer addition of product.
 (define_expand "fma<mode>4"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand")
-       (plus:SVE_FULL_I
-         (unspec:SVE_FULL_I
+  [(set (match_operand:SVE_I 0 "register_operand")
+       (plus:SVE_I
+         (unspec:SVE_I
            [(match_dup 4)
-            (mult:SVE_FULL_I
-              (match_operand:SVE_FULL_I 1 "register_operand")
-              (match_operand:SVE_FULL_I 2 "nonmemory_operand"))]
+            (mult:SVE_I
+              (match_operand:SVE_I 1 "register_operand")
+              (match_operand:SVE_I 2 "nonmemory_operand"))]
            UNSPEC_PRED_X)
-         (match_operand:SVE_FULL_I 3 "register_operand")))]
+         (match_operand:SVE_I 3 "register_operand")))]
   "TARGET_SVE"
   {
     if (aarch64_prepare_sve_int_fma (operands, PLUS))
@@ -6573,15 +6573,15 @@ (define_expand "fma<mode>4"
 
 ;; Predicated integer addition of product.
 (define_insn "@aarch64_pred_fma<mode>"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w")
-       (plus:SVE_FULL_I
-         (unspec:SVE_FULL_I
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
+       (plus:SVE_I
+         (unspec:SVE_I
            [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
-            (mult:SVE_FULL_I
-              (match_operand:SVE_FULL_I 2 "register_operand" "%0, w, w")
-              (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w"))]
+            (mult:SVE_I
+              (match_operand:SVE_I 2 "register_operand" "%0, w, w")
+              (match_operand:SVE_I 3 "register_operand" "w, w, w"))]
            UNSPEC_PRED_X)
-         (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w")))]
+         (match_operand:SVE_I 4 "register_operand" "w, 0, w")))]
   "TARGET_SVE"
   "@
    mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
@@ -6592,15 +6592,15 @@ (define_insn "@aarch64_pred_fma<mode>"
 
 ;; Predicated integer addition of product with merging.
 (define_expand "cond_fma<mode>"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand")
-       (unspec:SVE_FULL_I
+  [(set (match_operand:SVE_I 0 "register_operand")
+       (unspec:SVE_I
          [(match_operand:<VPRED> 1 "register_operand")
-          (plus:SVE_FULL_I
-            (mult:SVE_FULL_I
-              (match_operand:SVE_FULL_I 2 "register_operand")
-              (match_operand:SVE_FULL_I 3 "general_operand"))
-            (match_operand:SVE_FULL_I 4 "register_operand"))
-          (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero")]
+          (plus:SVE_I
+            (mult:SVE_I
+              (match_operand:SVE_I 2 "register_operand")
+              (match_operand:SVE_I 3 "general_operand"))
+            (match_operand:SVE_I 4 "register_operand"))
+          (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero")]
          UNSPEC_SEL))]
   "TARGET_SVE"
   {
@@ -6615,14 +6615,14 @@ (define_expand "cond_fma<mode>"
 
 ;; Predicated integer addition of product, merging with the first input.
 (define_insn "*cond_fma<mode>_2"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
-       (unspec:SVE_FULL_I
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+       (unspec:SVE_I
          [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-          (plus:SVE_FULL_I
-            (mult:SVE_FULL_I
-              (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
-              (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
-            (match_operand:SVE_FULL_I 4 "register_operand" "w, w"))
+          (plus:SVE_I
+            (mult:SVE_I
+              (match_operand:SVE_I 2 "register_operand" "0, w")
+              (match_operand:SVE_I 3 "register_operand" "w, w"))
+            (match_operand:SVE_I 4 "register_operand" "w, w"))
           (match_dup 2)]
          UNSPEC_SEL))]
   "TARGET_SVE"
@@ -6634,14 +6634,14 @@ (define_insn "*cond_fma<mode>_2"
 
 ;; Predicated integer addition of product, merging with the third input.
 (define_insn "*cond_fma<mode>_4"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
-       (unspec:SVE_FULL_I
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+       (unspec:SVE_I
          [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-          (plus:SVE_FULL_I
-            (mult:SVE_FULL_I
-              (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
-              (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
-            (match_operand:SVE_FULL_I 4 "register_operand" "0, w"))
+          (plus:SVE_I
+            (mult:SVE_I
+              (match_operand:SVE_I 2 "register_operand" "w, w")
+              (match_operand:SVE_I 3 "register_operand" "w, w"))
+            (match_operand:SVE_I 4 "register_operand" "0, w"))
           (match_dup 4)]
          UNSPEC_SEL))]
   "TARGET_SVE"
@@ -6653,15 +6653,15 @@ (define_insn "*cond_fma<mode>_4"
 
 ;; Predicated integer addition of product, merging with an independent value.
 (define_insn_and_rewrite "*cond_fma<mode>_any"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, &w, 
?&w")
-       (unspec:SVE_FULL_I
+  [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
+       (unspec:SVE_I
          [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, 
Upl, Upl")
-          (plus:SVE_FULL_I
-            (mult:SVE_FULL_I
-              (match_operand:SVE_FULL_I 2 "register_operand" "w, w, 0, w, w, 
w")
-              (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w, 0, w, 
w"))
-            (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w, w, w, w"))
-          (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 
Dz, 0, w")]
+          (plus:SVE_I
+            (mult:SVE_I
+              (match_operand:SVE_I 2 "register_operand" "w, w, 0, w, w, w")
+              (match_operand:SVE_I 3 "register_operand" "w, w, w, 0, w, w"))
+            (match_operand:SVE_I 4 "register_operand" "w, 0, w, w, w, w"))
+          (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 
0, w")]
          UNSPEC_SEL))]
   "TARGET_SVE
    && !rtx_equal_p (operands[2], operands[5])
diff --git a/gcc/testsuite/g++.target/aarch64/sve/cond_mla_1.C 
b/gcc/testsuite/g++.target/aarch64/sve/cond_mla_1.C
new file mode 100644
index 00000000000..d5168b7a841
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/sve/cond_mla_1.C
@@ -0,0 +1,33 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O -msve-vector-bits=2048 -save-temps" } */
+
+#include <stdint.h>
+
+#define TEST_OP(TYPE) \
+  TYPE \
+  test##_##TYPE##_reg (TYPE a, TYPE b, TYPE c, TYPE d) \
+  { \
+    return d == 0 ? a + b * c : a; \
+  }
+
+#define TEST_TYPE(TYPE, SIZE) \
+  typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \
+  TEST_OP (TYPE##SIZE)
+
+TEST_TYPE (uint8_t, 32)
+
+TEST_TYPE (uint8_t, 64)
+TEST_TYPE (uint16_t, 64)
+
+TEST_TYPE (uint8_t, 128)
+TEST_TYPE (uint16_t, 128)
+TEST_TYPE (uint32_t, 128)
+
+/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.h, p[0-7]/z, 
\[x0\][^L]*\tmla\t\1\.b,} } } */
+/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.s, p[0-7]/z, 
\[x0\][^L]*\tmla\t\1\.b,} } } */
+/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.d, p[0-7]/z, 
\[x0\][^L]*\tmla\t\1\.b,} } } */
+/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.s, p[0-7]/z, 
\[x0\][^L]*\tmla\t\1\.h,} } } */
+/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.d, p[0-7]/z, 
\[x0\][^L]*\tmla\t\1\.h,} } } */
+/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+)\.d, p[0-7]/z, 
\[x0\][^L]*\tmla\t\1\.s,} } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/g++.target/aarch64/sve/cond_mla_2.C 
b/gcc/testsuite/g++.target/aarch64/sve/cond_mla_2.C
new file mode 100644
index 00000000000..8ff0a69c7ab
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/sve/cond_mla_2.C
@@ -0,0 +1,33 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O -msve-vector-bits=2048 -save-temps" } */
+
+#include <stdint.h>
+
+#define TEST_OP(TYPE) \
+  TYPE \
+  test##_##TYPE##_reg (TYPE a, TYPE b, TYPE c, TYPE d) \
+  { \
+    return d == 0 ? a + b * c : b; \
+  }
+
+#define TEST_TYPE(TYPE, SIZE) \
+  typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \
+  TEST_OP (TYPE##SIZE)
+
+TEST_TYPE (uint8_t, 32)
+
+TEST_TYPE (uint8_t, 64)
+TEST_TYPE (uint16_t, 64)
+
+TEST_TYPE (uint8_t, 128)
+TEST_TYPE (uint16_t, 128)
+TEST_TYPE (uint32_t, 128)
+
+/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.h, p[0-7]/z, 
\[x1\][^L]*\tmad\t\1\.b,} } } */
+/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.s, p[0-7]/z, 
\[x1\][^L]*\tmad\t\1\.b,} } } */
+/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.d, p[0-7]/z, 
\[x1\][^L]*\tmad\t\1\.b,} } } */
+/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.s, p[0-7]/z, 
\[x1\][^L]*\tmad\t\1\.h,} } } */
+/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.d, p[0-7]/z, 
\[x1\][^L]*\tmad\t\1\.h,} } } */
+/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+)\.d, p[0-7]/z, 
\[x1\][^L]*\tmad\t\1\.s,} } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/g++.target/aarch64/sve/cond_mla_3.C 
b/gcc/testsuite/g++.target/aarch64/sve/cond_mla_3.C
new file mode 100644
index 00000000000..b2f2942a23c
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/sve/cond_mla_3.C
@@ -0,0 +1,33 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -msve-vector-bits=2048 -save-temps" } */
+
+#include <stdint.h>
+
+#define TEST_OP(TYPE) \
+  TYPE \
+  test##_##TYPE##_reg (TYPE a, TYPE b, TYPE c, TYPE d) \
+  { \
+    return d == 0 ? a + b * c : c; \
+  }
+
+#define TEST_TYPE(TYPE, SIZE) \
+  typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \
+  TEST_OP (TYPE##SIZE)
+
+TEST_TYPE (uint8_t, 32)
+
+TEST_TYPE (uint8_t, 64)
+TEST_TYPE (uint16_t, 64)
+
+TEST_TYPE (uint8_t, 128)
+TEST_TYPE (uint16_t, 128)
+TEST_TYPE (uint32_t, 128)
+
+/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.h, p[0-7]/z, 
\[x2\][^L]*\tmad\t\1\.b,} } } */
+/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.s, p[0-7]/z, 
\[x2\][^L]*\tmad\t\1\.b,} } } */
+/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.d, p[0-7]/z, 
\[x2\][^L]*\tmad\t\1\.b,} } } */
+/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.s, p[0-7]/z, 
\[x2\][^L]*\tmad\t\1\.h,} } } */
+/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.d, p[0-7]/z, 
\[x2\][^L]*\tmad\t\1\.h,} } } */
+/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+)\.d, p[0-7]/z, 
\[x2\][^L]*\tmad\t\1\.s,} } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/g++.target/aarch64/sve/cond_mla_4.C 
b/gcc/testsuite/g++.target/aarch64/sve/cond_mla_4.C
new file mode 100644
index 00000000000..6edc96b6f68
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/sve/cond_mla_4.C
@@ -0,0 +1,36 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O -msve-vector-bits=2048 -save-temps" } */
+
+#include <stdint.h>
+
+#define TEST_OP(TYPE) \
+  TYPE \
+  test##_##TYPE##_reg (TYPE a, TYPE b, TYPE c, TYPE d) \
+  { \
+    return d == 0 ? a + b * c : d; \
+  }
+
+#define TEST_TYPE(TYPE, SIZE) \
+  typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \
+  TEST_OP (TYPE##SIZE)
+
+TEST_TYPE (uint8_t, 32)
+
+TEST_TYPE (uint8_t, 64)
+TEST_TYPE (uint16_t, 64)
+
+TEST_TYPE (uint8_t, 128)
+TEST_TYPE (uint16_t, 128)
+TEST_TYPE (uint32_t, 128)
+
+/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.h, p[0-7]/z, 
\[x3\][^L]*\tmla\t\1\.b,} } } */
+/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.s, p[0-7]/z, 
\[x3\][^L]*\tmla\t\1\.b,} } } */
+/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.d, p[0-7]/z, 
\[x3\][^L]*\tmla\t\1\.b,} } } */
+/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.s, p[0-7]/z, 
\[x3\][^L]*\tmla\t\1\.h,} } } */
+/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.d, p[0-7]/z, 
\[x3\][^L]*\tmla\t\1\.h,} } } */
+/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+)\.d, p[0-7]/z, 
\[x3\][^L]*\tmla\t\1\.s,} } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-7]/m, 
z[0-9]+\.b\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/g++.target/aarch64/sve/cond_mla_5.C 
b/gcc/testsuite/g++.target/aarch64/sve/cond_mla_5.C
new file mode 100644
index 00000000000..6c169a805d0
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/sve/cond_mla_5.C
@@ -0,0 +1,33 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O -msve-vector-bits=2048 -save-temps" } */
+
+#include <stdint.h>
+
+#define TEST_OP(TYPE) \
+  TYPE \
+  test##_##TYPE##_reg (TYPE a, TYPE b, TYPE c, TYPE d) \
+  { \
+    return d == 0 ? a + b * c : 0; \
+  }
+
+#define TEST_TYPE(TYPE, SIZE) \
+  typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \
+  TEST_OP (TYPE##SIZE)
+
+TEST_TYPE (uint8_t, 32)
+
+TEST_TYPE (uint8_t, 64)
+TEST_TYPE (uint16_t, 64)
+
+TEST_TYPE (uint8_t, 128)
+TEST_TYPE (uint16_t, 128)
+TEST_TYPE (uint32_t, 128)
+
+/* { dg-final { scan-assembler-times {\t(?:mla|mad)\tz[0-9]+\.b,} 3 } } */
+/* { dg-final { scan-assembler-times {\t(?:mla|mad)\tz[0-9]+\.h,} 2 } } */
+/* { dg-final { scan-assembler-times {\t(?:mla|mad)\tz[0-9]+\.s,} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-7]/z, 
z[0-9]+\.b\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, 
z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, 
z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mla_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/mla_2.c
new file mode 100644
index 00000000000..2fafd4b73cd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/mla_2.c
@@ -0,0 +1,34 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O -msve-vector-bits=2048 -save-temps" } */
+
+#include <stdint.h>
+
+#define TEST_OP(TYPE) \
+  TYPE \
+  test##_##TYPE##_##AMT (TYPE a, TYPE b, TYPE c) \
+  { \
+    return a + b * c; \
+  }
+
+#define TEST_TYPE(TYPE, SIZE) \
+  typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \
+  TEST_OP (TYPE##SIZE)
+
+TEST_TYPE (int8_t, 32)
+TEST_TYPE (uint8_t, 32)
+
+TEST_TYPE (int8_t, 64)
+TEST_TYPE (uint8_t, 64)
+TEST_TYPE (int16_t, 64)
+TEST_TYPE (uint16_t, 64)
+
+TEST_TYPE (int8_t, 128)
+TEST_TYPE (uint8_t, 128)
+TEST_TYPE (int16_t, 128)
+TEST_TYPE (uint16_t, 128)
+TEST_TYPE (int32_t, 128)
+TEST_TYPE (uint32_t, 128)
+
+/* { dg-final { scan-assembler-times {\t(?:mla|mad)\tz[0-9]+\.b,} 6 } } */
+/* { dg-final { scan-assembler-times {\t(?:mla|mad)\tz[0-9]+\.h,} 4 } } */
+/* { dg-final { scan-assembler-times {\t(?:mla|mad)\tz[0-9]+\.s,} 2 } } */

[pushed] aarch64: Add support for unpacked SVE MLA and MAD

Reply via email to