Akram Ahmad <[email protected]> writes:
> Rename the existing SVE unpredicated saturating arithmetic instructions
> to use standard names which are used by IFN_SAT_ADD and IFN_SAT_SUB.
>
> gcc/ChangeLog:
>
> * config/aarch64/aarch64-sve.md: Rename insns
>
> gcc/testsuite/ChangeLog:
>
> * gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic.inc:
> Template file for auto-vectorizer tests.
> * gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic_1.c:
> Instantiate 8-bit vector tests.
> * gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic_2.c
> Instantiate 16-bit vector tests.
> * gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic_3.c
> Instantiate 32-bit vector tests.
> * gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic_4.c
> Instantiate 64-bit vector tests.
OK, thanks. I'll push it along with patch 1.
Sorry again for the long delay in reviewing this series.
Richard
> ---
> gcc/config/aarch64/aarch64-sve.md | 4 +-
> .../aarch64/sve/saturating_arithmetic.inc | 68 +++++++++++++++++++
> .../aarch64/sve/saturating_arithmetic_1.c | 60 ++++++++++++++++
> .../aarch64/sve/saturating_arithmetic_2.c | 60 ++++++++++++++++
> .../aarch64/sve/saturating_arithmetic_3.c | 62 +++++++++++++++++
> .../aarch64/sve/saturating_arithmetic_4.c | 62 +++++++++++++++++
> 6 files changed, 314 insertions(+), 2 deletions(-)
> create mode 100644
> gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic.inc
> create mode 100644
> gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic_1.c
> create mode 100644
> gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic_2.c
> create mode 100644
> gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic_3.c
> create mode 100644
> gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic_4.c
>
> diff --git a/gcc/config/aarch64/aarch64-sve.md
> b/gcc/config/aarch64/aarch64-sve.md
> index 06bd3e4bb2c..b987b292b20 100644
> --- a/gcc/config/aarch64/aarch64-sve.md
> +++ b/gcc/config/aarch64/aarch64-sve.md
> @@ -4379,7 +4379,7 @@
> ;; -------------------------------------------------------------------------
>
> ;; Unpredicated saturating signed addition and subtraction.
> -(define_insn "@aarch64_sve_<optab><mode>"
> +(define_insn "<su_optab>s<addsub><mode>3"
> [(set (match_operand:SVE_FULL_I 0 "register_operand")
> (SBINQOPS:SVE_FULL_I
> (match_operand:SVE_FULL_I 1 "register_operand")
> @@ -4395,7 +4395,7 @@
> )
>
> ;; Unpredicated saturating unsigned addition and subtraction.
> -(define_insn "@aarch64_sve_<optab><mode>"
> +(define_insn "<su_optab>s<addsub><mode>3"
> [(set (match_operand:SVE_FULL_I 0 "register_operand")
> (UBINQOPS:SVE_FULL_I
> (match_operand:SVE_FULL_I 1 "register_operand")
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic.inc
> b/gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic.inc
> new file mode 100644
> index 00000000000..0b3ebbcb0d6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic.inc
> @@ -0,0 +1,68 @@
> +/* Template file for vector saturating arithmetic validation.
> +
> + This file defines saturating addition and subtraction functions for a
> given
> + scalar type, testing the auto-vectorization of these two operators. This
> + type, along with the corresponding minimum and maximum values for that
> type,
> + must be defined by any test file which includes this template file. */
> +
> +#ifndef SAT_ARIT_AUTOVEC_INC
> +#define SAT_ARIT_AUTOVEC_INC
> +
> +#include <limits.h>
> +#include <arm_neon.h>
> +
> +#ifndef UT
> +#define UT uint32_t
> +#define UMAX UINT_MAX
> +#define UMIN 0
> +#endif
> +
> +void uaddq (UT *out, UT *a, UT *b, int n)
> +{
> + for (int i = 0; i < n; i++)
> + {
> + UT sum = a[i] + b[i];
> + out[i] = sum < a[i] ? UMAX : sum;
> + }
> +}
> +
> +void uaddq2 (UT *out, UT *a, UT *b, int n)
> +{
> + for (int i = 0; i < n; i++)
> + {
> + UT sum;
> + if (!__builtin_add_overflow(a[i], b[i], &sum))
> + out[i] = sum;
> + else
> + out[i] = UMAX;
> + }
> +}
> +
> +void uaddq_imm (UT *out, UT *a, int n)
> +{
> + for (int i = 0; i < n; i++)
> + {
> + UT sum = a[i] + 50;
> + out[i] = sum < a[i] ? UMAX : sum;
> + }
> +}
> +
> +void usubq (UT *out, UT *a, UT *b, int n)
> +{
> + for (int i = 0; i < n; i++)
> + {
> + UT sum = a[i] - b[i];
> + out[i] = sum > a[i] ? UMIN : sum;
> + }
> +}
> +
> +void usubq_imm (UT *out, UT *a, int n)
> +{
> + for (int i = 0; i < n; i++)
> + {
> + UT sum = a[i] - 50;
> + out[i] = sum > a[i] ? UMIN : sum;
> + }
> +}
> +
> +#endif
> \ No newline at end of file
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic_1.c
> b/gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic_1.c
> new file mode 100644
> index 00000000000..6936e9a2704
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic_1.c
> @@ -0,0 +1,60 @@
> +/* { dg-do compile { target { aarch64*-*-* } } } */
> +/* { dg-options "-O2 --save-temps -ftree-vectorize" } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +
> +/*
> +** uaddq:
> +** ...
> +** ld1b\tz([0-9]+)\.b, .*
> +** ld1b\tz([0-9]+)\.b, .*
> +** uqadd\tz\2.b, z\1\.b, z\2\.b
> +** ...
> +** ldr\tb([0-9]+), .*
> +** ldr\tb([0-9]+), .*
> +** uqadd\tb\4, b\3, b\4
> +** ...
> +*/
> +/*
> +** uaddq2:
> +** ...
> +** ld1b\tz([0-9]+)\.b, .*
> +** ld1b\tz([0-9]+)\.b, .*
> +** uqadd\tz\2.b, z\1\.b, z\2\.b
> +** ...
> +** ldr\tb([0-9]+), .*
> +** ldr\tb([0-9]+), .*
> +** uqadd\tb\4, b\3, b\4
> +** ...
> +*/
> +/*
> +** uaddq_imm:
> +** ...
> +** ld1b\tz([0-9]+)\.b, .*
> +** uqadd\tz\1.b, z\1\.b, #50
> +** ...
> +** movi\tv([0-9]+)\.8b, 0x32
> +** ...
> +** ldr\tb([0-9]+), .*
> +** uqadd\tb\3, b\3, b\2
> +** ...
> +*/
> +/*
> +** usubq: { xfail *-*-* }
> +** ...
> +** ld1b\tz([0-9]+)\.b, .*
> +** ld1b\tz([0-9]+)\.b, .*
> +** uqsub\tz\2.b, z\1\.b, z\2\.b
> +** ...
> +** ldr\tb([0-9]+), .*
> +** ldr\tb([0-9]+), .*
> +** uqsub\tb\4, b\3, b\4
> +** ...
> +*/
> +
> +#include <limits.h>
> +
> +#define UT unsigned char
> +#define UMAX UCHAR_MAX
> +#define UMIN 0
> +
> +#include "saturating_arithmetic.inc"
> \ No newline at end of file
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic_2.c
> b/gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic_2.c
> new file mode 100644
> index 00000000000..928bc0054df
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic_2.c
> @@ -0,0 +1,60 @@
> +/* { dg-do compile { target { aarch64*-*-* } } } */
> +/* { dg-options "-O2 --save-temps -ftree-vectorize" } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +
> +/*
> +** uaddq:
> +** ...
> +** ld1h\tz([0-9]+)\.h, .*
> +** ld1h\tz([0-9]+)\.h, .*
> +** uqadd\tz\2.h, z\1\.h, z\2\.h
> +** ...
> +** ldr\th([0-9]+), .*
> +** ldr\th([0-9]+), .*
> +** uqadd\th\4, h\3, h\4
> +** ...
> +*/
> +/*
> +** uaddq2:
> +** ...
> +** ld1h\tz([0-9]+)\.h, .*
> +** ld1h\tz([0-9]+)\.h, .*
> +** uqadd\tz\2.h, z\1\.h, z\2\.h
> +** ...
> +** ldr\th([0-9]+), .*
> +** ldr\th([0-9]+), .*
> +** uqadd\th\4, h\3, h\4
> +** ...
> +*/
> +/*
> +** uaddq_imm:
> +** ...
> +** ld1h\tz([0-9]+)\.h, .*
> +** uqadd\tz\1.h, z\1\.h, #50
> +** ...
> +** movi\tv([0-9]+)\.4h, 0x32
> +** ...
> +** ldr\th([0-9]+), .*
> +** uqadd\th\3, h\3, h\2
> +** ...
> +*/
> +/*
> +** usubq: { xfail *-*-* }
> +** ...
> +** ld1h\tz([0-9]+)\.h, .*
> +** ld1h\tz([0-9]+)\.h, .*
> +** usubq\tz\2.h, z\1\.h, z\2\.h
> +** ...
> +** ldr\th([0-9]+), .*
> +** ldr\th([0-9]+), .*
> +** usubq\th\4, h\3, h\4
> +** ...
> +*/
> +
> +#include <limits.h>
> +
> +#define UT unsigned short
> +#define UMAX USHRT_MAX
> +#define UMIN 0
> +
> +#include "saturating_arithmetic.inc"
> \ No newline at end of file
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic_3.c
> b/gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic_3.c
> new file mode 100644
> index 00000000000..14e2de59b1e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic_3.c
> @@ -0,0 +1,62 @@
> +/* { dg-do compile { target { aarch64*-*-* } } } */
> +/* { dg-options "-O2 --save-temps -ftree-vectorize" } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +
> +/*
> +** uaddq:
> +** ...
> +** ld1w\tz([0-9]+)\.s, .*
> +** ld1w\tz([0-9]+)\.s, .*
> +** uqadd\tz\2.s, z\1\.s, z\2\.s
> +** ...
> +** ldr\tw([0-9]+), .*
> +** ldr\tw([0-9]+), .*
> +** adds\tw\3, w\3, w\4
> +** csinv\tw\3, w\3, wzr, cc
> +** ...
> +*/
> +/*
> +** uaddq2:
> +** ...
> +** ld1w\tz([0-9]+)\.s, .*
> +** ld1w\tz([0-9]+)\.s, .*
> +** uqadd\tz\2.s, z\1\.s, z\2\.s
> +** ...
> +** ldr\tw([0-9]+), .*
> +** ldr\tw([0-9]+), .*
> +** adds\tw\3, w\3, w\4
> +** csinv\tw\3, w\3, wzr, cc
> +** ...
> +*/
> +/*
> +** uaddq_imm:
> +** ...
> +** ld1w\tz([0-9]+)\.s, .*
> +** uqadd\tz\1.s, z\1\.s, #50
> +** ...
> +** ldr\tw([0-9]+), .*
> +** adds\tw\2, w\2, #50
> +** csinv\tw\2, w\2, wzr, cc
> +** ...
> +*/
> +/*
> +** usubq: { xfail *-*-* }
> +** ...
> +** ld1w\tz([0-9]+)\.s, .*
> +** ld1w\tz([0-9]+)\.s, .*
> +** uqsub\tz\2.s, z\1\.s, z\2\.s
> +** ...
> +** ldr\tw([0-9]+), .*
> +** ldr\tw([0-9]+), .*
> +** subs\tw\3, w\3, w\4
> +** csel\tw\3, w\3, wzr, cs
> +** ...
> +*/
> +
> +#include <limits.h>
> +
> +#define UT unsigned int
> +#define UMAX UINT_MAX
> +#define UMIN 0
> +
> +#include "saturating_arithmetic.inc"
> \ No newline at end of file
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic_4.c
> b/gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic_4.c
> new file mode 100644
> index 00000000000..05a5786b4ab
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/saturating_arithmetic_4.c
> @@ -0,0 +1,62 @@
> +/* { dg-do compile { target { aarch64*-*-* } } } */
> +/* { dg-options "-O2 --save-temps -ftree-vectorize" } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +
> +/*
> +** uaddq:
> +** ...
> +** ld1d\tz([0-9]+)\.d, .*
> +** ld1d\tz([0-9]+)\.d, .*
> +** uqadd\tz\2.d, z\1\.d, z\2\.d
> +** ...
> +** ldr\tx([0-9]+), .*
> +** ldr\tx([0-9]+), .*
> +** adds\tx\3, x\3, x\4
> +** csinv\tx\3, x\3, xzr, cc
> +** ...
> +*/
> +/*
> +** uaddq2:
> +** ...
> +** ld1d\tz([0-9]+)\.d, .*
> +** ld1d\tz([0-9]+)\.d, .*
> +** uqadd\tz\2.d, z\1\.d, z\2\.d
> +** ...
> +** ldr\tx([0-9]+), .*
> +** ldr\tx([0-9]+), .*
> +** adds\tx\3, x\3, x\4
> +** csinv\tx\3, x\3, xzr, cc
> +** ...
> +*/
> +/*
> +** uaddq_imm:
> +** ...
> +** ld1d\tz([0-9]+)\.d, .*
> +** uqadd\tz\1.d, z\1\.d, #50
> +** ...
> +** ldr\tx([0-9]+), .*
> +** adds\tx\2, x\2, #50
> +** csinv\tx\2, x\2, xzr, cc
> +** ...
> +*/
> +/*
> +** usubq: { xfail *-*-* }
> +** ...
> +** ld1d\tz([0-9]+)\.d, .*
> +** ld1d\tz([0-9]+)\.d, .*
> +** uqsub\tz\2.d, z\1\.d, z\2\.d
> +** ...
> +** ldr\tx([0-9]+), .*
> +** ldr\tx([0-9]+), .*
> +** subs\tx\3, x\3, x\4
> +** csel\tx\3, x\3, xzr, cs
> +** ...
> +*/
> +
> +#include <limits.h>
> +
> +#define UT unsigned long
> +#define UMAX ULONG_MAX
> +#define UMIN 0
> +
> +#include "saturating_arithmetic.inc"
> \ No newline at end of file