Hi! This patch improves code generation for EOR, ORR and AND on unpacked vectors with SVE. The following function: void f (unsigned int *x, unsigned short *y, unsigned short *z) { for (int i = 0; i < 7; ++i) x[i] = (unsigned short) (y[i] & z[i]); }
previously compiled to ptrue p1.d, vl3 ld1h z0.d, p1/z, [x1, #1, mul vl] ptrue p0.b, vl32 st1h z0.d, p0, [sp, #1, mul vl] ld1h z0.d, p1/z, [x2, #1, mul vl] st1h z0.d, p0, [sp] ldr x3, [x2] ldp x4, x2, [sp] ldr x1, [x1] and x1, x3, x1 and x2, x2, x4 str x2, [sp] ld1h z0.d, p0/z, [sp] str x1, [sp] uxth z0.s, p0/m, z0.s st1w z0.d, p1, [x0, #1, mul vl] ld1h z0.d, p0/z, [sp] uxth z0.s, p0/m, z0.s st1w z0.d, p0, [x0] add sp, sp, 16 ret and now compiles to: ptrue p0.s, vl7 ptrue p1.b, vl32 ld1h z1.s, p0/z, [x1] ld1h z0.s, p0/z, [x2] add z0.h, z0.h, z1.h uxth z0.s, p1/m, z0.s st1w z0.s, p0, [x0] ret Tested on aarch64-linux-gnu and x86_64-linux-gnu hosts. Thanks, Joe 2020-05-20 Joe Ramsay <joe.ram...@arm.com> * config/aarch64/aarch64-sve.md (<LOGICAL:optab><mode>3): Add support for unpacked EOR, ORR, AND. gcc/testsuite/ChangeLog 2020-05-20 Joe Ramsay <joe.ram...@arm.com> * gcc.target/aarch64/sve/logical_unpacked_and_1.c: New test. * gcc.target/aarch64/sve/logical_unpacked_and_2.c: New test. * gcc.target/aarch64/sve/logical_unpacked_and_3.c: New test. * gcc.target/aarch64/sve/logical_unpacked_and_4.c: New test. * gcc.target/aarch64/sve/logical_unpacked_and_5.c: New test. * gcc.target/aarch64/sve/logical_unpacked_and_6.c: New test. * gcc.target/aarch64/sve/logical_unpacked_and_7.c: New test. * gcc.target/aarch64/sve/logical_unpacked_eor_1.c: New test. * gcc.target/aarch64/sve/logical_unpacked_eor_2.c: New test. * gcc.target/aarch64/sve/logical_unpacked_eor_3.c: New test. * gcc.target/aarch64/sve/logical_unpacked_eor_4.c: New test. * gcc.target/aarch64/sve/logical_unpacked_eor_5.c: New test. * gcc.target/aarch64/sve/logical_unpacked_eor_6.c: New test. * gcc.target/aarch64/sve/logical_unpacked_eor_7.c: New test. * gcc.target/aarch64/sve/logical_unpacked_orr_1.c: New test. * gcc.target/aarch64/sve/logical_unpacked_orr_2.c: New test. * gcc.target/aarch64/sve/logical_unpacked_orr_3.c: New test. * gcc.target/aarch64/sve/logical_unpacked_orr_4.c: New test. * gcc.target/aarch64/sve/logical_unpacked_orr_5.c: New test. * gcc.target/aarch64/sve/logical_unpacked_orr_6.c: New test. * gcc.target/aarch64/sve/logical_unpacked_orr_7.c: New test. --- diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index f7a0893..8f0944c 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -4211,10 +4211,10 @@ ;; Unpredicated integer binary logical operations. (define_insn "<optab><mode>3" - [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?w, w") - (LOGICAL:SVE_FULL_I - (match_operand:SVE_FULL_I 1 "register_operand" "%0, w, w") - (match_operand:SVE_FULL_I 2 "aarch64_sve_logical_operand" "vsl, vsl, w")))] + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?w, w") + (LOGICAL:SVE_I + (match_operand:SVE_I 1 "register_operand" "%0, w, w") + (match_operand:SVE_I 2 "aarch64_sve_logical_operand" "vsl, vsl, w")))] "TARGET_SVE" "@ <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2 diff --git a/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_and_1.c b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_and_1.c new file mode 100644 index 0000000..7840355 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_and_1.c @@ -0,0 +1,16 @@ +/* { dg-options "-O3 -msve-vector-bits=256" } */ + +#include <stdint.h> + +void +f (uint32_t *restrict dst, uint16_t *restrict src1, uint8_t *restrict src2) +{ + for (int i = 0; i < 7; ++i) + dst[i] = (uint16_t) (src1[i] & src2[i]); +} + +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s,} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.s,} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tuxth\tz[0-9]+\.s,} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_and_2.c b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_and_2.c new file mode 100644 index 0000000..08b2745 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_and_2.c @@ -0,0 +1,17 @@ +/* { dg-options "-O3 -msve-vector-bits=256" } */ + +#include <stdint.h> + +void +f (uint64_t *restrict dst, uint16_t *restrict src1, uint8_t *restrict src2) +{ + for (int i = 0; i < 7; ++i) + dst[i] = (uint16_t) (src1[i] & src2[i]); +} + +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tuxtb\tz[0-9]+\.h,} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tuxth\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_and_3.c b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_and_3.c new file mode 100644 index 0000000..c823470 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_and_3.c @@ -0,0 +1,17 @@ +/* { dg-options "-O3 -msve-vector-bits=256" } */ + +#include <stdint.h> + +void +f (uint64_t *restrict dst, uint32_t *restrict src1, uint8_t *restrict src2) +{ + for (int i = 0; i < 7; ++i) + dst[i] = (uint32_t) (src1[i] & src2[i]); +} + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tuxtb\tz[0-9]+\.s,} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tuxtw\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_and_4.c b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_and_4.c new file mode 100644 index 0000000..52c9291 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_and_4.c @@ -0,0 +1,17 @@ +/* { dg-options "-O3 -msve-vector-bits=256" } */ + +#include <stdint.h> + +void +f (uint64_t *restrict dst, uint32_t *restrict src1, uint16_t *restrict src2) +{ + for (int i = 0; i < 7; ++i) + dst[i] = (uint32_t) (src1[i] & src2[i]); +} + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tuxth\tz[0-9]+\.s,} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tuxtw\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_and_5.c b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_and_5.c new file mode 100644 index 0000000..7840355 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_and_5.c @@ -0,0 +1,16 @@ +/* { dg-options "-O3 -msve-vector-bits=256" } */ + +#include <stdint.h> + +void +f (uint32_t *restrict dst, uint16_t *restrict src1, uint8_t *restrict src2) +{ + for (int i = 0; i < 7; ++i) + dst[i] = (uint16_t) (src1[i] & src2[i]); +} + +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s,} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.s,} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tuxth\tz[0-9]+\.s,} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_and_6.c b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_and_6.c new file mode 100644 index 0000000..1552ed8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_and_6.c @@ -0,0 +1,17 @@ +/* { dg-options "-O3 -msve-vector-bits=256" } */ + +#include <stdint.h> + +void +f (uint64_t *restrict dst, uint16_t *restrict src1, uint8_t *restrict src2) +{ + for (int i = 0; i < 7; ++i) + dst[i] = (uint16_t) (src1[i] & src2[i]); +} + +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tuxtb\tz[0-9]+\.h,} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tuxth\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_and_7.c b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_and_7.c new file mode 100644 index 0000000..484d9da --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_and_7.c @@ -0,0 +1,16 @@ +/* { dg-options "-O3 -msve-vector-bits=256" } */ + +#include <stdint.h> + +void +f (uint64_t *restrict dst, uint32_t *restrict src1, uint8_t *restrict src2){ + for (int i = 0; i < 7; ++i) + dst[i] = (uint32_t) (src1[i] & src2[i]); +} + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tuxtb\tz[0-9]+\.s,} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tuxtw\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_eor_1.c b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_eor_1.c new file mode 100644 index 0000000..36a0b8c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_eor_1.c @@ -0,0 +1,16 @@ +/* { dg-options "-O3 -msve-vector-bits=256" } */ + +#include <stdint.h> + +void +f (uint32_t *restrict dst, uint16_t *restrict src1, uint8_t *restrict src2) +{ + for (int i = 0; i < 7; ++i) + dst[i] = (uint16_t) (src1[i] ^ src2[i]); +} + +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s,} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.s,} 1 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tuxth\tz[0-9]+\.s,} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_eor_2.c b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_eor_2.c new file mode 100644 index 0000000..23ddeb9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_eor_2.c @@ -0,0 +1,17 @@ +/* { dg-options "-O3 -msve-vector-bits=256" } */ + +#include <stdint.h> + +void +f (uint64_t *restrict dst, uint16_t *restrict src1, uint8_t *restrict src2) +{ + for (int i = 0; i < 7; ++i) + dst[i] = (uint16_t) (src1[i] ^ src2[i]); +} + +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tuxtb\tz[0-9]+\.h,} 1 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tuxth\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_eor_3.c b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_eor_3.c new file mode 100644 index 0000000..4dd1e08 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_eor_3.c @@ -0,0 +1,17 @@ +/* { dg-options "-O3 -msve-vector-bits=256" } */ + +#include <stdint.h> + +void +f (uint64_t *restrict dst, uint32_t *restrict src1, uint8_t *restrict src2) +{ + for (int i = 0; i < 7; ++i) + dst[i] = (uint32_t) (src1[i] ^ src2[i]); +} + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tuxtb\tz[0-9]+\.s,} 1 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tuxtw\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_eor_4.c b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_eor_4.c new file mode 100644 index 0000000..a31a2d4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_eor_4.c @@ -0,0 +1,17 @@ +/* { dg-options "-O3 -msve-vector-bits=256" } */ + +#include <stdint.h> + +void +f (uint64_t *restrict dst, uint32_t *restrict src1, uint16_t *restrict src2) +{ + for (int i = 0; i < 7; ++i) + dst[i] = (uint32_t) (src1[i] ^ src2[i]); +} + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tuxth\tz[0-9]+\.s,} 1 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tuxtw\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_eor_5.c b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_eor_5.c new file mode 100644 index 0000000..36a0b8c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_eor_5.c @@ -0,0 +1,16 @@ +/* { dg-options "-O3 -msve-vector-bits=256" } */ + +#include <stdint.h> + +void +f (uint32_t *restrict dst, uint16_t *restrict src1, uint8_t *restrict src2) +{ + for (int i = 0; i < 7; ++i) + dst[i] = (uint16_t) (src1[i] ^ src2[i]); +} + +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s,} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.s,} 1 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tuxth\tz[0-9]+\.s,} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_eor_6.c b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_eor_6.c new file mode 100644 index 0000000..416567b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_eor_6.c @@ -0,0 +1,17 @@ +/* { dg-options "-O3 -msve-vector-bits=256" } */ + +#include <stdint.h> + +void +f (uint64_t *restrict dst, uint16_t *restrict src1, uint8_t *restrict src2) +{ + for (int i = 0; i < 7; ++i) + dst[i] = (uint16_t) (src1[i] ^ src2[i]); +} + +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tuxtb\tz[0-9]+\.h,} 1 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tuxth\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_eor_7.c b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_eor_7.c new file mode 100644 index 0000000..3f7c3dd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_eor_7.c @@ -0,0 +1,16 @@ +/* { dg-options "-O3 -msve-vector-bits=256" } */ + +#include <stdint.h> + +void +f (uint64_t *restrict dst, uint32_t *restrict src1, uint8_t *restrict src2){ + for (int i = 0; i < 7; ++i) + dst[i] = (uint32_t) (src1[i] ^ src2[i]); +} + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tuxtb\tz[0-9]+\.s,} 1 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tuxtw\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_orr_1.c b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_orr_1.c new file mode 100644 index 0000000..6131792 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_orr_1.c @@ -0,0 +1,16 @@ +/* { dg-options "-O3 -msve-vector-bits=256" } */ + +#include <stdint.h> + +void +f (uint32_t *restrict dst, uint16_t *restrict src1, uint8_t *restrict src2) +{ + for (int i = 0; i < 7; ++i) + dst[i] = (uint16_t) (src1[i] | src2[i]); +} + +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s,} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.s,} 1 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tuxth\tz[0-9]+\.s,} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_orr_2.c b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_orr_2.c new file mode 100644 index 0000000..593de65 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_orr_2.c @@ -0,0 +1,17 @@ +/* { dg-options "-O3 -msve-vector-bits=256" } */ + +#include <stdint.h> + +void +f (uint64_t *restrict dst, uint16_t *restrict src1, uint8_t *restrict src2) +{ + for (int i = 0; i < 7; ++i) + dst[i] = (uint16_t) (src1[i] | src2[i]); +} + +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tuxtb\tz[0-9]+\.h,} 1 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tuxth\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_orr_3.c b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_orr_3.c new file mode 100644 index 0000000..ec34e75 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_orr_3.c @@ -0,0 +1,17 @@ +/* { dg-options "-O3 -msve-vector-bits=256" } */ + +#include <stdint.h> + +void +f (uint64_t *restrict dst, uint32_t *restrict src1, uint8_t *restrict src2) +{ + for (int i = 0; i < 7; ++i) + dst[i] = (uint32_t) (src1[i] | src2[i]); +} + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tuxtb\tz[0-9]+\.s,} 1 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tuxtw\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_orr_4.c b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_orr_4.c new file mode 100644 index 0000000..561a104 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_orr_4.c @@ -0,0 +1,17 @@ +/* { dg-options "-O3 -msve-vector-bits=256" } */ + +#include <stdint.h> + +void +f (uint64_t *restrict dst, uint32_t *restrict src1, uint16_t *restrict src2) +{ + for (int i = 0; i < 7; ++i) + dst[i] = (uint32_t) (src1[i] | src2[i]); +} + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tuxth\tz[0-9]+\.s,} 1 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tuxtw\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_orr_5.c b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_orr_5.c new file mode 100644 index 0000000..6131792 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_orr_5.c @@ -0,0 +1,16 @@ +/* { dg-options "-O3 -msve-vector-bits=256" } */ + +#include <stdint.h> + +void +f (uint32_t *restrict dst, uint16_t *restrict src1, uint8_t *restrict src2) +{ + for (int i = 0; i < 7; ++i) + dst[i] = (uint16_t) (src1[i] | src2[i]); +} + +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s,} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.s,} 1 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tuxth\tz[0-9]+\.s,} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_orr_6.c b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_orr_6.c new file mode 100644 index 0000000..3ce1c3f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_orr_6.c @@ -0,0 +1,17 @@ +/* { dg-options "-O3 -msve-vector-bits=256" } */ + +#include <stdint.h> + +void +f (uint64_t *restrict dst, uint16_t *restrict src1, uint8_t *restrict src2) +{ + for (int i = 0; i < 7; ++i) + dst[i] = (uint16_t) (src1[i] | src2[i]); +} + +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tuxtb\tz[0-9]+\.h,} 1 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tuxth\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_orr_7.c b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_orr_7.c new file mode 100644 index 0000000..e6a4291 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/logical_unpacked_orr_7.c @@ -0,0 +1,16 @@ +/* { dg-options "-O3 -msve-vector-bits=256" } */ + +#include <stdint.h> + +void +f (uint64_t *restrict dst, uint32_t *restrict src1, uint8_t *restrict src2){ + for (int i = 0; i < 7; ++i) + dst[i] = (uint32_t) (src1[i] | src2[i]); +} + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tuxtb\tz[0-9]+\.s,} 1 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tuxtw\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 2 } } */