This patch extends the MLA/MAD patterns to support unpacked integer vectors. The type suffix could be either the element size or the container size, but using the element size should be more efficient.
Tested on aarch64-linux-gnu and aarch64_be-elf, pushed to trunk. Richard gcc/ * config/aarch64/aarch64-sve.md (fma<mode>4): Extend from SVE_FULL_I to SVE_I. (@aarch64_pred_fma<mode>, cond_fma<mode>, *cond_fma<mode>_2) (*cond_fma<mode>_4, *cond_fma<mode>_any): Likewise. gcc/testsuite/ * gcc.target/aarch64/sve/mla_2.c: New test. * g++.target/aarch64/sve/cond_mla_1.C: Likewise. * g++.target/aarch64/sve/cond_mla_2.C: Likewise. * g++.target/aarch64/sve/cond_mla_3.C: Likewise. * g++.target/aarch64/sve/cond_mla_4.C: Likewise. * g++.target/aarch64/sve/cond_mla_5.C: Likewise. --- gcc/config/aarch64/aarch64-sve.md | 88 +++++++++---------- .../g++.target/aarch64/sve/cond_mla_1.C | 33 +++++++ .../g++.target/aarch64/sve/cond_mla_2.C | 33 +++++++ .../g++.target/aarch64/sve/cond_mla_3.C | 33 +++++++ .../g++.target/aarch64/sve/cond_mla_4.C | 36 ++++++++ .../g++.target/aarch64/sve/cond_mla_5.C | 33 +++++++ gcc/testsuite/gcc.target/aarch64/sve/mla_2.c | 34 +++++++ 7 files changed, 246 insertions(+), 44 deletions(-) create mode 100644 gcc/testsuite/g++.target/aarch64/sve/cond_mla_1.C create mode 100644 gcc/testsuite/g++.target/aarch64/sve/cond_mla_2.C create mode 100644 gcc/testsuite/g++.target/aarch64/sve/cond_mla_3.C create mode 100644 gcc/testsuite/g++.target/aarch64/sve/cond_mla_4.C create mode 100644 gcc/testsuite/g++.target/aarch64/sve/cond_mla_5.C create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/mla_2.c diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index a6f8450f951..ac8a9b4b167 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -6554,15 +6554,15 @@ (define_insn "*<logical_nn><mode>3_ptest" ;; Unpredicated integer addition of product. (define_expand "fma<mode>4" - [(set (match_operand:SVE_FULL_I 0 "register_operand") - (plus:SVE_FULL_I - (unspec:SVE_FULL_I + [(set (match_operand:SVE_I 0 "register_operand") + (plus:SVE_I + (unspec:SVE_I [(match_dup 4) - (mult:SVE_FULL_I - (match_operand:SVE_FULL_I 1 "register_operand") - (match_operand:SVE_FULL_I 2 "nonmemory_operand"))] + (mult:SVE_I + (match_operand:SVE_I 1 "register_operand") + (match_operand:SVE_I 2 "nonmemory_operand"))] UNSPEC_PRED_X) - (match_operand:SVE_FULL_I 3 "register_operand")))] + (match_operand:SVE_I 3 "register_operand")))] "TARGET_SVE" { if (aarch64_prepare_sve_int_fma (operands, PLUS)) @@ -6573,15 +6573,15 @@ (define_expand "fma<mode>4" ;; Predicated integer addition of product. (define_insn "@aarch64_pred_fma<mode>" - [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w") - (plus:SVE_FULL_I - (unspec:SVE_FULL_I + [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w") + (plus:SVE_I + (unspec:SVE_I [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") - (mult:SVE_FULL_I - (match_operand:SVE_FULL_I 2 "register_operand" "%0, w, w") - (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w"))] + (mult:SVE_I + (match_operand:SVE_I 2 "register_operand" "%0, w, w") + (match_operand:SVE_I 3 "register_operand" "w, w, w"))] UNSPEC_PRED_X) - (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w")))] + (match_operand:SVE_I 4 "register_operand" "w, 0, w")))] "TARGET_SVE" "@ mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> @@ -6592,15 +6592,15 @@ (define_insn "@aarch64_pred_fma<mode>" ;; Predicated integer addition of product with merging. (define_expand "cond_fma<mode>" - [(set (match_operand:SVE_FULL_I 0 "register_operand") - (unspec:SVE_FULL_I + [(set (match_operand:SVE_I 0 "register_operand") + (unspec:SVE_I [(match_operand:<VPRED> 1 "register_operand") - (plus:SVE_FULL_I - (mult:SVE_FULL_I - (match_operand:SVE_FULL_I 2 "register_operand") - (match_operand:SVE_FULL_I 3 "general_operand")) - (match_operand:SVE_FULL_I 4 "register_operand")) - (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero")] + (plus:SVE_I + (mult:SVE_I + (match_operand:SVE_I 2 "register_operand") + (match_operand:SVE_I 3 "general_operand")) + (match_operand:SVE_I 4 "register_operand")) + (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE" { @@ -6615,14 +6615,14 @@ (define_expand "cond_fma<mode>" ;; Predicated integer addition of product, merging with the first input. (define_insn "*cond_fma<mode>_2" - [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") - (unspec:SVE_FULL_I + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") + (unspec:SVE_I [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") - (plus:SVE_FULL_I - (mult:SVE_FULL_I - (match_operand:SVE_FULL_I 2 "register_operand" "0, w") - (match_operand:SVE_FULL_I 3 "register_operand" "w, w")) - (match_operand:SVE_FULL_I 4 "register_operand" "w, w")) + (plus:SVE_I + (mult:SVE_I + (match_operand:SVE_I 2 "register_operand" "0, w") + (match_operand:SVE_I 3 "register_operand" "w, w")) + (match_operand:SVE_I 4 "register_operand" "w, w")) (match_dup 2)] UNSPEC_SEL))] "TARGET_SVE" @@ -6634,14 +6634,14 @@ (define_insn "*cond_fma<mode>_2" ;; Predicated integer addition of product, merging with the third input. (define_insn "*cond_fma<mode>_4" - [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") - (unspec:SVE_FULL_I + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") + (unspec:SVE_I [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") - (plus:SVE_FULL_I - (mult:SVE_FULL_I - (match_operand:SVE_FULL_I 2 "register_operand" "w, w") - (match_operand:SVE_FULL_I 3 "register_operand" "w, w")) - (match_operand:SVE_FULL_I 4 "register_operand" "0, w")) + (plus:SVE_I + (mult:SVE_I + (match_operand:SVE_I 2 "register_operand" "w, w") + (match_operand:SVE_I 3 "register_operand" "w, w")) + (match_operand:SVE_I 4 "register_operand" "0, w")) (match_dup 4)] UNSPEC_SEL))] "TARGET_SVE" @@ -6653,15 +6653,15 @@ (define_insn "*cond_fma<mode>_4" ;; Predicated integer addition of product, merging with an independent value. (define_insn_and_rewrite "*cond_fma<mode>_any" - [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w") - (unspec:SVE_FULL_I + [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w") + (unspec:SVE_I [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") - (plus:SVE_FULL_I - (mult:SVE_FULL_I - (match_operand:SVE_FULL_I 2 "register_operand" "w, w, 0, w, w, w") - (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w, 0, w, w")) - (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w, w, w, w")) - (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")] + (plus:SVE_I + (mult:SVE_I + (match_operand:SVE_I 2 "register_operand" "w, w, 0, w, w, w") + (match_operand:SVE_I 3 "register_operand" "w, w, w, 0, w, w")) + (match_operand:SVE_I 4 "register_operand" "w, 0, w, w, w, w")) + (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")] UNSPEC_SEL))] "TARGET_SVE && !rtx_equal_p (operands[2], operands[5]) diff --git a/gcc/testsuite/g++.target/aarch64/sve/cond_mla_1.C b/gcc/testsuite/g++.target/aarch64/sve/cond_mla_1.C new file mode 100644 index 00000000000..d5168b7a841 --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve/cond_mla_1.C @@ -0,0 +1,33 @@ +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ +/* { dg-options "-O -msve-vector-bits=2048 -save-temps" } */ + +#include <stdint.h> + +#define TEST_OP(TYPE) \ + TYPE \ + test##_##TYPE##_reg (TYPE a, TYPE b, TYPE c, TYPE d) \ + { \ + return d == 0 ? a + b * c : a; \ + } + +#define TEST_TYPE(TYPE, SIZE) \ + typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \ + TEST_OP (TYPE##SIZE) + +TEST_TYPE (uint8_t, 32) + +TEST_TYPE (uint8_t, 64) +TEST_TYPE (uint16_t, 64) + +TEST_TYPE (uint8_t, 128) +TEST_TYPE (uint16_t, 128) +TEST_TYPE (uint32_t, 128) + +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.h, p[0-7]/z, \[x0\][^L]*\tmla\t\1\.b,} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.s, p[0-7]/z, \[x0\][^L]*\tmla\t\1\.b,} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.d, p[0-7]/z, \[x0\][^L]*\tmla\t\1\.b,} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.s, p[0-7]/z, \[x0\][^L]*\tmla\t\1\.h,} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.d, p[0-7]/z, \[x0\][^L]*\tmla\t\1\.h,} } } */ +/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+)\.d, p[0-7]/z, \[x0\][^L]*\tmla\t\1\.s,} } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/g++.target/aarch64/sve/cond_mla_2.C b/gcc/testsuite/g++.target/aarch64/sve/cond_mla_2.C new file mode 100644 index 00000000000..8ff0a69c7ab --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve/cond_mla_2.C @@ -0,0 +1,33 @@ +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ +/* { dg-options "-O -msve-vector-bits=2048 -save-temps" } */ + +#include <stdint.h> + +#define TEST_OP(TYPE) \ + TYPE \ + test##_##TYPE##_reg (TYPE a, TYPE b, TYPE c, TYPE d) \ + { \ + return d == 0 ? a + b * c : b; \ + } + +#define TEST_TYPE(TYPE, SIZE) \ + typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \ + TEST_OP (TYPE##SIZE) + +TEST_TYPE (uint8_t, 32) + +TEST_TYPE (uint8_t, 64) +TEST_TYPE (uint16_t, 64) + +TEST_TYPE (uint8_t, 128) +TEST_TYPE (uint16_t, 128) +TEST_TYPE (uint32_t, 128) + +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.h, p[0-7]/z, \[x1\][^L]*\tmad\t\1\.b,} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.s, p[0-7]/z, \[x1\][^L]*\tmad\t\1\.b,} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.d, p[0-7]/z, \[x1\][^L]*\tmad\t\1\.b,} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.s, p[0-7]/z, \[x1\][^L]*\tmad\t\1\.h,} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.d, p[0-7]/z, \[x1\][^L]*\tmad\t\1\.h,} } } */ +/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+)\.d, p[0-7]/z, \[x1\][^L]*\tmad\t\1\.s,} } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/g++.target/aarch64/sve/cond_mla_3.C b/gcc/testsuite/g++.target/aarch64/sve/cond_mla_3.C new file mode 100644 index 00000000000..b2f2942a23c --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve/cond_mla_3.C @@ -0,0 +1,33 @@ +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ +/* { dg-options "-O2 -msve-vector-bits=2048 -save-temps" } */ + +#include <stdint.h> + +#define TEST_OP(TYPE) \ + TYPE \ + test##_##TYPE##_reg (TYPE a, TYPE b, TYPE c, TYPE d) \ + { \ + return d == 0 ? a + b * c : c; \ + } + +#define TEST_TYPE(TYPE, SIZE) \ + typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \ + TEST_OP (TYPE##SIZE) + +TEST_TYPE (uint8_t, 32) + +TEST_TYPE (uint8_t, 64) +TEST_TYPE (uint16_t, 64) + +TEST_TYPE (uint8_t, 128) +TEST_TYPE (uint16_t, 128) +TEST_TYPE (uint32_t, 128) + +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.h, p[0-7]/z, \[x2\][^L]*\tmad\t\1\.b,} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.s, p[0-7]/z, \[x2\][^L]*\tmad\t\1\.b,} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.d, p[0-7]/z, \[x2\][^L]*\tmad\t\1\.b,} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.s, p[0-7]/z, \[x2\][^L]*\tmad\t\1\.h,} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.d, p[0-7]/z, \[x2\][^L]*\tmad\t\1\.h,} } } */ +/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+)\.d, p[0-7]/z, \[x2\][^L]*\tmad\t\1\.s,} } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/g++.target/aarch64/sve/cond_mla_4.C b/gcc/testsuite/g++.target/aarch64/sve/cond_mla_4.C new file mode 100644 index 00000000000..6edc96b6f68 --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve/cond_mla_4.C @@ -0,0 +1,36 @@ +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ +/* { dg-options "-O -msve-vector-bits=2048 -save-temps" } */ + +#include <stdint.h> + +#define TEST_OP(TYPE) \ + TYPE \ + test##_##TYPE##_reg (TYPE a, TYPE b, TYPE c, TYPE d) \ + { \ + return d == 0 ? a + b * c : d; \ + } + +#define TEST_TYPE(TYPE, SIZE) \ + typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \ + TEST_OP (TYPE##SIZE) + +TEST_TYPE (uint8_t, 32) + +TEST_TYPE (uint8_t, 64) +TEST_TYPE (uint16_t, 64) + +TEST_TYPE (uint8_t, 128) +TEST_TYPE (uint16_t, 128) +TEST_TYPE (uint32_t, 128) + +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.h, p[0-7]/z, \[x3\][^L]*\tmla\t\1\.b,} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.s, p[0-7]/z, \[x3\][^L]*\tmla\t\1\.b,} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.d, p[0-7]/z, \[x3\][^L]*\tmla\t\1\.b,} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.s, p[0-7]/z, \[x3\][^L]*\tmla\t\1\.h,} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.d, p[0-7]/z, \[x3\][^L]*\tmla\t\1\.h,} } } */ +/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+)\.d, p[0-7]/z, \[x3\][^L]*\tmla\t\1\.s,} } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/g++.target/aarch64/sve/cond_mla_5.C b/gcc/testsuite/g++.target/aarch64/sve/cond_mla_5.C new file mode 100644 index 00000000000..6c169a805d0 --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve/cond_mla_5.C @@ -0,0 +1,33 @@ +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ +/* { dg-options "-O -msve-vector-bits=2048 -save-temps" } */ + +#include <stdint.h> + +#define TEST_OP(TYPE) \ + TYPE \ + test##_##TYPE##_reg (TYPE a, TYPE b, TYPE c, TYPE d) \ + { \ + return d == 0 ? a + b * c : 0; \ + } + +#define TEST_TYPE(TYPE, SIZE) \ + typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \ + TEST_OP (TYPE##SIZE) + +TEST_TYPE (uint8_t, 32) + +TEST_TYPE (uint8_t, 64) +TEST_TYPE (uint16_t, 64) + +TEST_TYPE (uint8_t, 128) +TEST_TYPE (uint16_t, 128) +TEST_TYPE (uint32_t, 128) + +/* { dg-final { scan-assembler-times {\t(?:mla|mad)\tz[0-9]+\.b,} 3 } } */ +/* { dg-final { scan-assembler-times {\t(?:mla|mad)\tz[0-9]+\.h,} 2 } } */ +/* { dg-final { scan-assembler-times {\t(?:mla|mad)\tz[0-9]+\.s,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-7]/z, z[0-9]+\.b\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mla_2.c b/gcc/testsuite/gcc.target/aarch64/sve/mla_2.c new file mode 100644 index 00000000000..2fafd4b73cd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/mla_2.c @@ -0,0 +1,34 @@ +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ +/* { dg-options "-O -msve-vector-bits=2048 -save-temps" } */ + +#include <stdint.h> + +#define TEST_OP(TYPE) \ + TYPE \ + test##_##TYPE##_##AMT (TYPE a, TYPE b, TYPE c) \ + { \ + return a + b * c; \ + } + +#define TEST_TYPE(TYPE, SIZE) \ + typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \ + TEST_OP (TYPE##SIZE) + +TEST_TYPE (int8_t, 32) +TEST_TYPE (uint8_t, 32) + +TEST_TYPE (int8_t, 64) +TEST_TYPE (uint8_t, 64) +TEST_TYPE (int16_t, 64) +TEST_TYPE (uint16_t, 64) + +TEST_TYPE (int8_t, 128) +TEST_TYPE (uint8_t, 128) +TEST_TYPE (int16_t, 128) +TEST_TYPE (uint16_t, 128) +TEST_TYPE (int32_t, 128) +TEST_TYPE (uint32_t, 128) + +/* { dg-final { scan-assembler-times {\t(?:mla|mad)\tz[0-9]+\.b,} 6 } } */ +/* { dg-final { scan-assembler-times {\t(?:mla|mad)\tz[0-9]+\.h,} 4 } } */ +/* { dg-final { scan-assembler-times {\t(?:mla|mad)\tz[0-9]+\.s,} 2 } } */