Re: [PATCH] aarch64: Support unpacked SVE integer division

Spencer Abson Mon, 14 Jul 2025 03:35:57 -0700

On Fri, Jul 11, 2025 at 02:40:46PM +0000, Remi Machet wrote:
> 
> On 7/11/25 08:21, Spencer Abson wrote:
> 
> External email: Use caution opening links or attachments
> 
> 
> This patch extends the existing patterns for SVE_INT_BINARY_SD to
> support partial SVE integer modes, including those implement the
> conditional form.
> 
> gcc/ChangeLog:
> 
>         * config/aarch64/aarch64-sve.md (<optab><mode>3): Extend
>         to SVE_SDI_SIMD.
>         (@aarch64_pred_<optab><mode>): Likewise.
>         (@cond_<optab><mode>): Extend to SVE_SDI.
>         (*cond_<optab><mode>_2): Likewise.
>         (*cond_<optab><mode>_3): Likewise.
>         (*cond_<optab><mode>_any): Likewise.
>         * config/aarch64/iterators.md (SVE_SDI): New iterator for
>         all SVE vector modes with 32-bit or 64-bit elements.
>         (SVE_SDI_SIMD): New iterator.  As above, but including
>         V4SI and V2DI.
> 
> gcc/testsuite/ChangeLog:
> 
>         * g++.target/aarch64/sve/cond_arith_1.C: Rename TEST_SHIFT
>         to TEST_OP, add tests for SDIV and UDIV.
>         * g++.target/aarch64/sve/cond_arith_2.C: Likewise.
>         * g++.target/aarch64/sve/cond_arith_3.C: Likewise.
>         * g++.target/aarch64/sve/cond_arith_4.C: Likewise.
>         * gcc.target/aarch64/sve/div_2.c: New test.
> 
> ---
> 
> Bootstrapped & regtested on aarch64-linux-gnu.  OK for master?
> 
> Thanks,
> Spencer
> 
> ---
>  gcc/config/aarch64/aarch64-sve.md             | 64 +++++++++----------
>  gcc/config/aarch64/iterators.md               |  7 ++
>  .../g++.target/aarch64/sve/cond_arith_1.C     | 25 +++++---
>  .../g++.target/aarch64/sve/cond_arith_2.C     | 25 +++++---
>  .../g++.target/aarch64/sve/cond_arith_3.C     | 27 +++++---
>  .../g++.target/aarch64/sve/cond_arith_4.C     | 27 +++++---
>  gcc/testsuite/gcc.target/aarch64/sve/div_2.c  | 22 +++++++
>  7 files changed, 127 insertions(+), 70 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/div_2.c
> 
> diff --git a/gcc/config/aarch64/aarch64-sve.md 
> b/gcc/config/aarch64/aarch64-sve.md
> index 6b5113eb70f..871b31623bb 100644
> --- a/gcc/config/aarch64/aarch64-sve.md
> +++ b/gcc/config/aarch64/aarch64-sve.md
> @@ -4712,12 +4712,12 @@
>  ;; We can use it with Advanced SIMD modes to expose the V2DI and V4SI
>  ;; optabs to the midend.
>  (define_expand "<optab><mode>3"
> -  [(set (match_operand:SVE_FULL_SDI_SIMD 0 "register_operand")
> -       (unspec:SVE_FULL_SDI_SIMD
> +  [(set (match_operand:SVE_SDI_SIMD 0 "register_operand")
> +       (unspec:SVE_SDI_SIMD
>           [(match_dup 3)
> -          (SVE_INT_BINARY_SD:SVE_FULL_SDI_SIMD
> -            (match_operand:SVE_FULL_SDI_SIMD 1 "register_operand")
> -            (match_operand:SVE_FULL_SDI_SIMD 2 "register_operand"))]
> +          (SVE_INT_BINARY_SD:SVE_SDI_SIMD
> +            (match_operand:SVE_SDI_SIMD 1 "register_operand")
> +            (match_operand:SVE_SDI_SIMD 2 "register_operand"))]
>           UNSPEC_PRED_X))]
>    "TARGET_SVE"
>    {
> @@ -4727,12 +4727,12 @@
> 
>  ;; Integer division predicated with a PTRUE.
>  (define_insn "@aarch64_pred_<optab><mode>"
> -  [(set (match_operand:SVE_FULL_SDI_SIMD 0 "register_operand")
> -       (unspec:SVE_FULL_SDI_SIMD
> +  [(set (match_operand:SVE_SDI_SIMD 0 "register_operand")
> +       (unspec:SVE_SDI_SIMD
>           [(match_operand:<VPRED> 1 "register_operand")
> -          (SVE_INT_BINARY_SD:SVE_FULL_SDI_SIMD
> -            (match_operand:SVE_FULL_SDI_SIMD 2 "register_operand")
> -            (match_operand:SVE_FULL_SDI_SIMD 3 "register_operand"))]
> +          (SVE_INT_BINARY_SD:SVE_SDI_SIMD
> +            (match_operand:SVE_SDI_SIMD 2 "register_operand")
> +            (match_operand:SVE_SDI_SIMD 3 "register_operand"))]
>           UNSPEC_PRED_X))]
>    "TARGET_SVE"
>    {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
> @@ -4744,25 +4744,25 @@
> 
>  ;; Predicated integer division with merging.
>  (define_expand "@cond_<optab><mode>"
> -  [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
> -       (unspec:SVE_FULL_SDI
> +  [(set (match_operand:SVE_SDI 0 "register_operand")
> +       (unspec:SVE_SDI
>           [(match_operand:<VPRED> 1 "register_operand")
> -          (SVE_INT_BINARY_SD:SVE_FULL_SDI
> -            (match_operand:SVE_FULL_SDI 2 "register_operand")
> -            (match_operand:SVE_FULL_SDI 3 "register_operand"))
> -          (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero")]
> +          (SVE_INT_BINARY_SD:SVE_SDI
> +            (match_operand:SVE_SDI 2 "register_operand")
> +            (match_operand:SVE_SDI 3 "register_operand"))
> +          (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero")]
>           UNSPEC_SEL))]
>    "TARGET_SVE"
>  )
> 
>  ;; Predicated integer division, merging with the first input.
>  (define_insn "*cond_<optab><mode>_2"
> -  [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
> -       (unspec:SVE_FULL_SDI
> +  [(set (match_operand:SVE_SDI 0 "register_operand")
> +       (unspec:SVE_SDI
>           [(match_operand:<VPRED> 1 "register_operand")
> -          (SVE_INT_BINARY_SD:SVE_FULL_SDI
> -            (match_operand:SVE_FULL_SDI 2 "register_operand")
> -            (match_operand:SVE_FULL_SDI 3 "register_operand"))
> +          (SVE_INT_BINARY_SD:SVE_SDI
> +            (match_operand:SVE_SDI 2 "register_operand")
> +            (match_operand:SVE_SDI 3 "register_operand"))
>            (match_dup 2)]
>           UNSPEC_SEL))]
>    "TARGET_SVE"
> @@ -4774,12 +4774,12 @@
> 
>  ;; Predicated integer division, merging with the second input.
>  (define_insn 
> "<mailto:@@-4774,12+4774,12@@;;Predicatedintegerdivision,mergingwiththesecondinput.(define_insn>*cond_<optab><mode>_3"
> -  [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
> -       (unspec:SVE_FULL_SDI
> +  [(set (match_operand:SVE_SDI 0 "register_operand")
> +       (unspec:SVE_SDI
>           [(match_operand:<VPRED> 1 "register_operand")
> -          (SVE_INT_BINARY_SD:SVE_FULL_SDI
> -            (match_operand:SVE_FULL_SDI 2 "register_operand")
> -            (match_operand:SVE_FULL_SDI 3 "register_operand"))
> +          (SVE_INT_BINARY_SD:SVE_SDI
> +            (match_operand:SVE_SDI 2 "register_operand")
> +            (match_operand:SVE_SDI 3 "register_operand"))
>            (match_dup 3)]
>           UNSPEC_SEL))]
>    "TARGET_SVE"
> @@ -4791,13 +4791,13 @@
> 
>  ;; Predicated integer division, merging with an independent value.
>  (define_insn_and_rewrite 
> "<mailto:@@-4791,13+4791,13@@;;Predicatedintegerdivision,mergingwithanindependentvalue.(define_insn_and_rewrite>*cond_<optab><mode>_any"
> -  [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
> -       (unspec:SVE_FULL_SDI
> +  [(set (match_operand:SVE_SDI 0 "register_operand")
> +       (unspec:SVE_SDI
>           [(match_operand:<VPRED> 1 "register_operand")
> -          (SVE_INT_BINARY_SD:SVE_FULL_SDI
> -            (match_operand:SVE_FULL_SDI 2 "register_operand")
> -            (match_operand:SVE_FULL_SDI 3 "register_operand"))
> -          (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero")]
> +          (SVE_INT_BINARY_SD:SVE_SDI
> +            (match_operand:SVE_SDI 2 "register_operand")
> +            (match_operand:SVE_SDI 3 "register_operand"))
> +          (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero")]
>           UNSPEC_SEL))]
>    "TARGET_SVE
>     && !rtx_equal_p (operands[2], operands[4])
> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> index c59fcd679d7..08ff6e42780 100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -557,10 +557,17 @@
>  ;; Fully-packed SVE integer vector modes that have 32-bit or 64-bit elements.
>  (define_mode_iterator SVE_FULL_SDI [VNx4SI VNx2DI])
> 
> +;; SVE integer vector modes that have 32-bit or 64-bit elements.
> +(define_mode_iterator SVE_SDI [VNx2SI SVE_FULL_SDI])
> +
>  ;; Fully-packed SVE and Advanced SIMD integer vector modes that have 32-bit 
> or
>  ;; 64-bit elements.
>  (define_mode_iterator SVE_FULL_SDI_SIMD [SVE_FULL_SDI V4SI V2DI])
> 
> +;; SVE and full Advanced SIMD integer vector modes that have 32-bit or
> +;; 64-bit elements.
> +(define_mode_iterator SVE_SDI_SIMD [VNx2SI SVE_FULL_SDI_SIMD])
> +
> 
> Hi Spencer,
> 
> Based on the definition of SVE_FULL_SDI_SIMD, I would have expected V2SI to 
> also be in SVE_SDI_SIMD. I assume it is excluded because it is taken care of 
> by another iterator already? If so it might be worth mentioning.
> 
> Looks good to me otherwise (but someone else needs to approve).
> 
Hi Remi,


Good point.  Perhaps SVE_SDI_SIMD isn't the best name...

This change needs an iterator to replace SVE_FULL_SDI_SIMD which includes
all full/partial SVE integer modes that have 32-bit or 64-bit elements, plus
V4SI and V2DI.  V2SI was intentionally exlcuded from this new iterator since
it is not part of SVE_FULL_SDI_SIMD and adding it would be a functional change
that I'm not intending to make here.

Perhaps a better name for this new iterator would be SVE_VQ_SDI?

Also, I've realised that this patch would remove the only existing uses of
SVE_FULL_SDI_SIMD, so I could remove it entirely if we want.

Thanks,
Spencer
> 
> 
>  ;; 2x and 4x tuples of the above, excluding 2x DI.
>  (define_mode_iterator SVE_FULL_SIx2_SDIx4 [VNx8SI VNx16SI VNx8DI])
> 
> diff --git a/gcc/testsuite/g++.target/aarch64/sve/cond_arith_1.C 
> b/gcc/testsuite/g++.target/aarch64/sve/cond_arith_1.C
> index 0c6f640e35b..40ecb3a012e 100644
> --- a/gcc/testsuite/g++.target/aarch64/sve/cond_arith_1.C
> +++ b/gcc/testsuite/g++.target/aarch64/sve/cond_arith_1.C
> @@ -9,10 +9,11 @@
>  #define op_ior(A, B) ((A) | (B))
>  #define op_xor(A, B) ((A) ^ (B))
>  #define op_mul(A, B) ((A) * (B))
> +#define op_div(A, B) ((A) / (B))
>  #define op_max(A, B) ((A) > (B) ? (A) : (B))
>  #define op_min(A, B) ((A) < (B) ? (A) : (B))
> 
> -#define TEST_SHIFT(TYPE, NAME) \
> +#define TEST_OP(TYPE, NAME) \
>    TYPE \
>    NAME##_##TYPE##_reg (TYPE a, TYPE b, TYPE c) \
>    { \
> @@ -21,14 +22,14 @@
> 
>  #define TEST_TYPE(TYPE, SIZE) \
>    typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \
> -  TEST_SHIFT (TYPE##SIZE, add) \
> -  TEST_SHIFT (TYPE##SIZE, sub) \
> -  TEST_SHIFT (TYPE##SIZE, and) \
> -  TEST_SHIFT (TYPE##SIZE, ior) \
> -  TEST_SHIFT (TYPE##SIZE, xor) \
> -  TEST_SHIFT (TYPE##SIZE, mul) \
> -  TEST_SHIFT (TYPE##SIZE, min) \
> -  TEST_SHIFT (TYPE##SIZE, max)
> +  TEST_OP (TYPE##SIZE, add) \
> +  TEST_OP (TYPE##SIZE, sub) \
> +  TEST_OP (TYPE##SIZE, and) \
> +  TEST_OP (TYPE##SIZE, ior) \
> +  TEST_OP (TYPE##SIZE, xor) \
> +  TEST_OP (TYPE##SIZE, mul) \
> +  TEST_OP (TYPE##SIZE, min) \
> +  TEST_OP (TYPE##SIZE, max)
> 
>  TEST_TYPE (int8_t, 32)
>  TEST_TYPE (uint8_t, 32)
> @@ -45,6 +46,9 @@ TEST_TYPE (uint16_t, 128)
>  TEST_TYPE (int32_t, 128)
>  TEST_TYPE (uint32_t, 128)
> 
> +TEST_OP (int32_t128, div)
> +TEST_OP (uint32_t128, div)
> +
>  /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m, 
> z[0-9]+\.b, z[0-9]+\.b\n} 6 } } */
>  /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
>  /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
> @@ -85,5 +89,8 @@ TEST_TYPE (uint32_t, 128)
>  /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
>  /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
> 
> +/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
> +
>  /* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
>  /* { dg-final { scan-assembler-not {\tsel\t} } } */
> diff --git a/gcc/testsuite/g++.target/aarch64/sve/cond_arith_2.C 
> b/gcc/testsuite/g++.target/aarch64/sve/cond_arith_2.C
> index 8965c949873..9e40249fc11 100644
> --- a/gcc/testsuite/g++.target/aarch64/sve/cond_arith_2.C
> +++ b/gcc/testsuite/g++.target/aarch64/sve/cond_arith_2.C
> @@ -9,10 +9,11 @@
>  #define op_ior(A, B) ((A) | (B))
>  #define op_xor(A, B) ((A) ^ (B))
>  #define op_mul(A, B) ((A) * (B))
> +#define op_div(A, B) ((A) / (B))
>  #define op_max(A, B) ((A) > (B) ? (A) : (B))
>  #define op_min(A, B) ((A) < (B) ? (A) : (B))
> 
> -#define TEST_SHIFT(TYPE, NAME) \
> +#define TEST_OP(TYPE, NAME) \
>    TYPE \
>    NAME##_##TYPE##_reg (TYPE a, TYPE b, TYPE c) \
>    { \
> @@ -21,14 +22,14 @@
> 
>  #define TEST_TYPE(TYPE, SIZE) \
>    typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \
> -  TEST_SHIFT (TYPE##SIZE, add) \
> -  TEST_SHIFT (TYPE##SIZE, sub) \
> -  TEST_SHIFT (TYPE##SIZE, and) \
> -  TEST_SHIFT (TYPE##SIZE, ior) \
> -  TEST_SHIFT (TYPE##SIZE, xor) \
> -  TEST_SHIFT (TYPE##SIZE, mul) \
> -  TEST_SHIFT (TYPE##SIZE, min) \
> -  TEST_SHIFT (TYPE##SIZE, max)
> +  TEST_OP (TYPE##SIZE, add) \
> +  TEST_OP (TYPE##SIZE, sub) \
> +  TEST_OP (TYPE##SIZE, and) \
> +  TEST_OP (TYPE##SIZE, ior) \
> +  TEST_OP (TYPE##SIZE, xor) \
> +  TEST_OP (TYPE##SIZE, mul) \
> +  TEST_OP (TYPE##SIZE, min) \
> +  TEST_OP (TYPE##SIZE, max)
> 
>  TEST_TYPE (int8_t, 32)
>  TEST_TYPE (uint8_t, 32)
> @@ -45,6 +46,9 @@ TEST_TYPE (uint16_t, 128)
>  TEST_TYPE (int32_t, 128)
>  TEST_TYPE (uint32_t, 128)
> 
> +TEST_OP (int32_t128, div)
> +TEST_OP (uint32_t128, div)
> +
>  /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m, 
> z[0-9]+\.b, z[0-9]+\.b\n} 6 } } */
>  /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
>  /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
> @@ -85,5 +89,8 @@ TEST_TYPE (uint32_t, 128)
>  /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
>  /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
> 
> +/* { dg-final { scan-assembler-times {\tsdivr\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tudivr\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
> +
>  /* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
>  /* { dg-final { scan-assembler-not {\tsel\t} } } */
> diff --git a/gcc/testsuite/g++.target/aarch64/sve/cond_arith_3.C 
> b/gcc/testsuite/g++.target/aarch64/sve/cond_arith_3.C
> index 3aa8669fe3e..bbc7cc331fb 100644
> --- a/gcc/testsuite/g++.target/aarch64/sve/cond_arith_3.C
> +++ b/gcc/testsuite/g++.target/aarch64/sve/cond_arith_3.C
> @@ -9,10 +9,11 @@
>  #define op_ior(A, B) ((A) | (B))
>  #define op_xor(A, B) ((A) ^ (B))
>  #define op_mul(A, B) ((A) * (B))
> +#define op_div(A, B) ((A) / (B))
>  #define op_max(A, B) ((A) > (B) ? (A) : (B))
>  #define op_min(A, B) ((A) < (B) ? (A) : (B))
> 
> -#define TEST_SHIFT(TYPE, NAME) \
> +#define TEST_OP(TYPE, NAME) \
>    TYPE \
>    NAME##_##TYPE##_reg (TYPE a, TYPE b, TYPE c, TYPE d) \
>    { \
> @@ -21,14 +22,14 @@
> 
>  #define TEST_TYPE(TYPE, SIZE) \
>    typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \
> -  TEST_SHIFT (TYPE##SIZE, add) \
> -  TEST_SHIFT (TYPE##SIZE, sub) \
> -  TEST_SHIFT (TYPE##SIZE, and) \
> -  TEST_SHIFT (TYPE##SIZE, ior) \
> -  TEST_SHIFT (TYPE##SIZE, xor) \
> -  TEST_SHIFT (TYPE##SIZE, mul) \
> -  TEST_SHIFT (TYPE##SIZE, min) \
> -  TEST_SHIFT (TYPE##SIZE, max)
> +  TEST_OP (TYPE##SIZE, add) \
> +  TEST_OP (TYPE##SIZE, sub) \
> +  TEST_OP (TYPE##SIZE, and) \
> +  TEST_OP (TYPE##SIZE, ior) \
> +  TEST_OP (TYPE##SIZE, xor) \
> +  TEST_OP (TYPE##SIZE, mul) \
> +  TEST_OP (TYPE##SIZE, min) \
> +  TEST_OP (TYPE##SIZE, max)
> 
>  TEST_TYPE (int8_t, 32)
>  TEST_TYPE (uint8_t, 32)
> @@ -45,6 +46,9 @@ TEST_TYPE (uint16_t, 128)
>  TEST_TYPE (int32_t, 128)
>  TEST_TYPE (uint32_t, 128)
> 
> +TEST_OP (int32_t128, div)
> +TEST_OP (uint32_t128, div)
> +
>  /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m, 
> z[0-9]+\.b, z[0-9]+\.b\n} 6 } } */
>  /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
>  /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
> @@ -85,7 +89,10 @@ TEST_TYPE (uint32_t, 128)
>  /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
>  /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
> 
> +/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
> +
>  /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-7]/m, 
> z[0-9]+\.b\n} 48 } } */
>  /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h\n} 32 } } */
> -/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s\n} 16 } } */
> +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s\n} 18 } } */
>  /* { dg-final { scan-assembler-not {\tsel\t} } } */
> diff --git a/gcc/testsuite/g++.target/aarch64/sve/cond_arith_4.C 
> b/gcc/testsuite/g++.target/aarch64/sve/cond_arith_4.C
> index efa4b8953a3..fc799255e19 100644
> --- a/gcc/testsuite/g++.target/aarch64/sve/cond_arith_4.C
> +++ b/gcc/testsuite/g++.target/aarch64/sve/cond_arith_4.C
> @@ -9,10 +9,11 @@
>  #define op_ior(A, B) ((A) | (B))
>  #define op_xor(A, B) ((A) ^ (B))
>  #define op_mul(A, B) ((A) * (B))
> +#define op_div(A, B) ((A) / (B))
>  #define op_max(A, B) ((A) > (B) ? (A) : (B))
>  #define op_min(A, B) ((A) < (B) ? (A) : (B))
> 
> -#define TEST_SHIFT(TYPE, NAME) \
> +#define TEST_OP(TYPE, NAME) \
>    TYPE \
>    NAME##_##TYPE##_reg (TYPE a, TYPE b, TYPE c) \
>    { \
> @@ -21,14 +22,14 @@
> 
>  #define TEST_TYPE(TYPE, SIZE) \
>    typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \
> -  TEST_SHIFT (TYPE##SIZE, add) \
> -  TEST_SHIFT (TYPE##SIZE, sub) \
> -  TEST_SHIFT (TYPE##SIZE, and) \
> -  TEST_SHIFT (TYPE##SIZE, ior) \
> -  TEST_SHIFT (TYPE##SIZE, xor) \
> -  TEST_SHIFT (TYPE##SIZE, mul) \
> -  TEST_SHIFT (TYPE##SIZE, min) \
> -  TEST_SHIFT (TYPE##SIZE, max)
> +  TEST_OP (TYPE##SIZE, add) \
> +  TEST_OP (TYPE##SIZE, sub) \
> +  TEST_OP (TYPE##SIZE, and) \
> +  TEST_OP (TYPE##SIZE, ior) \
> +  TEST_OP (TYPE##SIZE, xor) \
> +  TEST_OP (TYPE##SIZE, mul) \
> +  TEST_OP (TYPE##SIZE, min) \
> +  TEST_OP (TYPE##SIZE, max)
> 
>  TEST_TYPE (int8_t, 32)
>  TEST_TYPE (uint8_t, 32)
> @@ -45,6 +46,9 @@ TEST_TYPE (uint16_t, 128)
>  TEST_TYPE (int32_t, 128)
>  TEST_TYPE (uint32_t, 128)
> 
> +TEST_OP (int32_t128, div)
> +TEST_OP (uint32_t128, div)
> +
>  /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m, 
> z[0-9]+\.b, z[0-9]+\.b\n} 6 } } */
>  /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
>  /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
> @@ -85,7 +89,10 @@ TEST_TYPE (uint32_t, 128)
>  /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
>  /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
> 
> +/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
> +
>  /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-7]/z, 
> z[0-9]+\.b\n} 48 } } */
>  /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, 
> z[0-9]+\.h\n} 32 } } */
> -/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, 
> z[0-9]+\.s\n} 16 } } */
> +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, 
> z[0-9]+\.s\n} 18 } } */
>  /* { dg-final { scan-assembler-not {\tsel\t} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/div_2.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/div_2.c
> new file mode 100644
> index 00000000000..02483ec47de
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/div_2.c
> @@ -0,0 +1,22 @@
> +/* { dg-do assemble { target aarch64_asm_sve_ok } } */
> +/* { dg-options "-O -msve-vector-bits=2048 -save-temps" } */
> +
> +#include <stdint.h>
> +
> +typedef int32_t v32si __attribute__((vector_size(128)));
> +typedef uint32_t v32usi __attribute__((vector_size(128)));
> +
> +v32si
> +test_sdiv (v32si x, v32si y)
> +{
> +    return x / y;
> +}
> +
> +v32usi
> +test_udiv (v32usi x, v32usi y)
> +{
> +    return x / y;
> +}
> +
> +/* { dg-final { scan-assembler-times {\tsdivr?\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tudivr?\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
> --
> 2.34.1
> 
>

Re: [PATCH] aarch64: Support unpacked SVE integer division

Reply via email to