Re: [PATCH] aarch64: Support unpacked SVE integer division

Remi Machet Mon, 14 Jul 2025 07:30:27 -0700

On 7/14/25 06:35, Spencer Abson wrote:
> External email: Use caution opening links or attachments
>
>
> On Fri, Jul 11, 2025 at 02:40:46PM +0000, Remi Machet wrote:
>> On 7/11/25 08:21, Spencer Abson wrote:
>>
>> External email: Use caution opening links or attachments
>>
>>
>> This patch extends the existing patterns for SVE_INT_BINARY_SD to
>> support partial SVE integer modes, including those implement the
>> conditional form.
>>
>> gcc/ChangeLog:
>>
>>          * config/aarch64/aarch64-sve.md (<optab><mode>3): Extend
>>          to SVE_SDI_SIMD.
>>          (@aarch64_pred_<optab><mode>): Likewise.
>>          (@cond_<optab><mode>): Extend to SVE_SDI.
>>          (*cond_<optab><mode>_2): Likewise.
>>          (*cond_<optab><mode>_3): Likewise.
>>          (*cond_<optab><mode>_any): Likewise.
>>          * config/aarch64/iterators.md (SVE_SDI): New iterator for
>>          all SVE vector modes with 32-bit or 64-bit elements.
>>          (SVE_SDI_SIMD): New iterator.  As above, but including
>>          V4SI and V2DI.
>>
>> gcc/testsuite/ChangeLog:
>>
>>          * g++.target/aarch64/sve/cond_arith_1.C: Rename TEST_SHIFT
>>          to TEST_OP, add tests for SDIV and UDIV.
>>          * g++.target/aarch64/sve/cond_arith_2.C: Likewise.
>>          * g++.target/aarch64/sve/cond_arith_3.C: Likewise.
>>          * g++.target/aarch64/sve/cond_arith_4.C: Likewise.
>>          * gcc.target/aarch64/sve/div_2.c: New test.
>>
>> ---
>>
>> Bootstrapped & regtested on aarch64-linux-gnu.  OK for master?
>>
>> Thanks,
>> Spencer
>>
>> ---
>>   gcc/config/aarch64/aarch64-sve.md             | 64 +++++++++----------
>>   gcc/config/aarch64/iterators.md               |  7 ++
>>   .../g++.target/aarch64/sve/cond_arith_1.C     | 25 +++++---
>>   .../g++.target/aarch64/sve/cond_arith_2.C     | 25 +++++---
>>   .../g++.target/aarch64/sve/cond_arith_3.C     | 27 +++++---
>>   .../g++.target/aarch64/sve/cond_arith_4.C     | 27 +++++---
>>   gcc/testsuite/gcc.target/aarch64/sve/div_2.c  | 22 +++++++
>>   7 files changed, 127 insertions(+), 70 deletions(-)
>>   create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/div_2.c
>>
>> diff --git a/gcc/config/aarch64/aarch64-sve.md 
>> b/gcc/config/aarch64/aarch64-sve.md
>> index 6b5113eb70f..871b31623bb 100644
>> --- a/gcc/config/aarch64/aarch64-sve.md
>> +++ b/gcc/config/aarch64/aarch64-sve.md
>> @@ -4712,12 +4712,12 @@
>>   ;; We can use it with Advanced SIMD modes to expose the V2DI and V4SI
>>   ;; optabs to the midend.
>>   (define_expand "<optab><mode>3"
>> -  [(set (match_operand:SVE_FULL_SDI_SIMD 0 "register_operand")
>> -       (unspec:SVE_FULL_SDI_SIMD
>> +  [(set (match_operand:SVE_SDI_SIMD 0 "register_operand")
>> +       (unspec:SVE_SDI_SIMD
>>            [(match_dup 3)
>> -          (SVE_INT_BINARY_SD:SVE_FULL_SDI_SIMD
>> -            (match_operand:SVE_FULL_SDI_SIMD 1 "register_operand")
>> -            (match_operand:SVE_FULL_SDI_SIMD 2 "register_operand"))]
>> +          (SVE_INT_BINARY_SD:SVE_SDI_SIMD
>> +            (match_operand:SVE_SDI_SIMD 1 "register_operand")
>> +            (match_operand:SVE_SDI_SIMD 2 "register_operand"))]
>>            UNSPEC_PRED_X))]
>>     "TARGET_SVE"
>>     {
>> @@ -4727,12 +4727,12 @@
>>
>>   ;; Integer division predicated with a PTRUE.
>>   (define_insn "@aarch64_pred_<optab><mode>"
>> -  [(set (match_operand:SVE_FULL_SDI_SIMD 0 "register_operand")
>> -       (unspec:SVE_FULL_SDI_SIMD
>> +  [(set (match_operand:SVE_SDI_SIMD 0 "register_operand")
>> +       (unspec:SVE_SDI_SIMD
>>            [(match_operand:<VPRED> 1 "register_operand")
>> -          (SVE_INT_BINARY_SD:SVE_FULL_SDI_SIMD
>> -            (match_operand:SVE_FULL_SDI_SIMD 2 "register_operand")
>> -            (match_operand:SVE_FULL_SDI_SIMD 3 "register_operand"))]
>> +          (SVE_INT_BINARY_SD:SVE_SDI_SIMD
>> +            (match_operand:SVE_SDI_SIMD 2 "register_operand")
>> +            (match_operand:SVE_SDI_SIMD 3 "register_operand"))]
>>            UNSPEC_PRED_X))]
>>     "TARGET_SVE"
>>     {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
>> @@ -4744,25 +4744,25 @@
>>
>>   ;; Predicated integer division with merging.
>>   (define_expand "@cond_<optab><mode>"
>> -  [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
>> -       (unspec:SVE_FULL_SDI
>> +  [(set (match_operand:SVE_SDI 0 "register_operand")
>> +       (unspec:SVE_SDI
>>            [(match_operand:<VPRED> 1 "register_operand")
>> -          (SVE_INT_BINARY_SD:SVE_FULL_SDI
>> -            (match_operand:SVE_FULL_SDI 2 "register_operand")
>> -            (match_operand:SVE_FULL_SDI 3 "register_operand"))
>> -          (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero")]
>> +          (SVE_INT_BINARY_SD:SVE_SDI
>> +            (match_operand:SVE_SDI 2 "register_operand")
>> +            (match_operand:SVE_SDI 3 "register_operand"))
>> +          (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero")]
>>            UNSPEC_SEL))]
>>     "TARGET_SVE"
>>   )
>>
>>   ;; Predicated integer division, merging with the first input.
>>   (define_insn "*cond_<optab><mode>_2"
>> -  [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
>> -       (unspec:SVE_FULL_SDI
>> +  [(set (match_operand:SVE_SDI 0 "register_operand")
>> +       (unspec:SVE_SDI
>>            [(match_operand:<VPRED> 1 "register_operand")
>> -          (SVE_INT_BINARY_SD:SVE_FULL_SDI
>> -            (match_operand:SVE_FULL_SDI 2 "register_operand")
>> -            (match_operand:SVE_FULL_SDI 3 "register_operand"))
>> +          (SVE_INT_BINARY_SD:SVE_SDI
>> +            (match_operand:SVE_SDI 2 "register_operand")
>> +            (match_operand:SVE_SDI 3 "register_operand"))
>>             (match_dup 2)]
>>            UNSPEC_SEL))]
>>     "TARGET_SVE"
>> @@ -4774,12 +4774,12 @@
>>
>>   ;; Predicated integer division, merging with the second input.
>>   (define_insn 
>> "<mailto:@@-4774,12+4774,12@@;;Predicatedintegerdivision,mergingwiththesecondinput.(define_insn>*cond_<optab><mode>_3"
>> -  [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
>> -       (unspec:SVE_FULL_SDI
>> +  [(set (match_operand:SVE_SDI 0 "register_operand")
>> +       (unspec:SVE_SDI
>>            [(match_operand:<VPRED> 1 "register_operand")
>> -          (SVE_INT_BINARY_SD:SVE_FULL_SDI
>> -            (match_operand:SVE_FULL_SDI 2 "register_operand")
>> -            (match_operand:SVE_FULL_SDI 3 "register_operand"))
>> +          (SVE_INT_BINARY_SD:SVE_SDI
>> +            (match_operand:SVE_SDI 2 "register_operand")
>> +            (match_operand:SVE_SDI 3 "register_operand"))
>>             (match_dup 3)]
>>            UNSPEC_SEL))]
>>     "TARGET_SVE"
>> @@ -4791,13 +4791,13 @@
>>
>>   ;; Predicated integer division, merging with an independent value.
>>   (define_insn_and_rewrite 
>> "<mailto:@@-4791,13+4791,13@@;;Predicatedintegerdivision,mergingwithanindependentvalue.(define_insn_and_rewrite>*cond_<optab><mode>_any"
>> -  [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
>> -       (unspec:SVE_FULL_SDI
>> +  [(set (match_operand:SVE_SDI 0 "register_operand")
>> +       (unspec:SVE_SDI
>>            [(match_operand:<VPRED> 1 "register_operand")
>> -          (SVE_INT_BINARY_SD:SVE_FULL_SDI
>> -            (match_operand:SVE_FULL_SDI 2 "register_operand")
>> -            (match_operand:SVE_FULL_SDI 3 "register_operand"))
>> -          (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero")]
>> +          (SVE_INT_BINARY_SD:SVE_SDI
>> +            (match_operand:SVE_SDI 2 "register_operand")
>> +            (match_operand:SVE_SDI 3 "register_operand"))
>> +          (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero")]
>>            UNSPEC_SEL))]
>>     "TARGET_SVE
>>      && !rtx_equal_p (operands[2], operands[4])
>> diff --git a/gcc/config/aarch64/iterators.md 
>> b/gcc/config/aarch64/iterators.md
>> index c59fcd679d7..08ff6e42780 100644
>> --- a/gcc/config/aarch64/iterators.md
>> +++ b/gcc/config/aarch64/iterators.md
>> @@ -557,10 +557,17 @@
>>   ;; Fully-packed SVE integer vector modes that have 32-bit or 64-bit 
>> elements.
>>   (define_mode_iterator SVE_FULL_SDI [VNx4SI VNx2DI])
>>
>> +;; SVE integer vector modes that have 32-bit or 64-bit elements.
>> +(define_mode_iterator SVE_SDI [VNx2SI SVE_FULL_SDI])
>> +
>>   ;; Fully-packed SVE and Advanced SIMD integer vector modes that have 
>> 32-bit or
>>   ;; 64-bit elements.
>>   (define_mode_iterator SVE_FULL_SDI_SIMD [SVE_FULL_SDI V4SI V2DI])
>>
>> +;; SVE and full Advanced SIMD integer vector modes that have 32-bit or
>> +;; 64-bit elements.
>> +(define_mode_iterator SVE_SDI_SIMD [VNx2SI SVE_FULL_SDI_SIMD])
>> +
>>
>> Hi Spencer,
>>
>> Based on the definition of SVE_FULL_SDI_SIMD, I would have expected V2SI to 
>> also be in SVE_SDI_SIMD. I assume it is excluded because it is taken care of 
>> by another iterator already? If so it might be worth mentioning.
>>
>> Looks good to me otherwise (but someone else needs to approve).
>>
> Hi Remi,
>
> Good point.  Perhaps SVE_SDI_SIMD isn't the best name...
>
> This change needs an iterator to replace SVE_FULL_SDI_SIMD which includes
> all full/partial SVE integer modes that have 32-bit or 64-bit elements, plus
> V4SI and V2DI.  V2SI was intentionally exlcuded from this new iterator since
> it is not part of SVE_FULL_SDI_SIMD and adding it would be a functional change
> that I'm not intending to make here.


Hi Spencer,

Thank you for the explanation.

>
> Perhaps a better name for this new iterator would be SVE_VQ_SDI?

Yes I do think that would make more sense as a name (to me at least).

>
> Also, I've realised that this patch would remove the only existing uses of
> SVE_FULL_SDI_SIMD, so I could remove it entirely if we want.
I am a big fan of cleaning up dead code :)
>
> Thanks,
> Spencer
>>
>>   ;; 2x and 4x tuples of the above, excluding 2x DI.
>>   (define_mode_iterator SVE_FULL_SIx2_SDIx4 [VNx8SI VNx16SI VNx8DI])
>>
>> diff --git a/gcc/testsuite/g++.target/aarch64/sve/cond_arith_1.C 
>> b/gcc/testsuite/g++.target/aarch64/sve/cond_arith_1.C
>> index 0c6f640e35b..40ecb3a012e 100644
>> --- a/gcc/testsuite/g++.target/aarch64/sve/cond_arith_1.C
>> +++ b/gcc/testsuite/g++.target/aarch64/sve/cond_arith_1.C
>> @@ -9,10 +9,11 @@
>>   #define op_ior(A, B) ((A) | (B))
>>   #define op_xor(A, B) ((A) ^ (B))
>>   #define op_mul(A, B) ((A) * (B))
>> +#define op_div(A, B) ((A) / (B))
>>   #define op_max(A, B) ((A) > (B) ? (A) : (B))
>>   #define op_min(A, B) ((A) < (B) ? (A) : (B))
>>
>> -#define TEST_SHIFT(TYPE, NAME) \
>> +#define TEST_OP(TYPE, NAME) \
>>     TYPE \
>>     NAME##_##TYPE##_reg (TYPE a, TYPE b, TYPE c) \
>>     { \
>> @@ -21,14 +22,14 @@
>>
>>   #define TEST_TYPE(TYPE, SIZE) \
>>     typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \
>> -  TEST_SHIFT (TYPE##SIZE, add) \
>> -  TEST_SHIFT (TYPE##SIZE, sub) \
>> -  TEST_SHIFT (TYPE##SIZE, and) \
>> -  TEST_SHIFT (TYPE##SIZE, ior) \
>> -  TEST_SHIFT (TYPE##SIZE, xor) \
>> -  TEST_SHIFT (TYPE##SIZE, mul) \
>> -  TEST_SHIFT (TYPE##SIZE, min) \
>> -  TEST_SHIFT (TYPE##SIZE, max)
>> +  TEST_OP (TYPE##SIZE, add) \
>> +  TEST_OP (TYPE##SIZE, sub) \
>> +  TEST_OP (TYPE##SIZE, and) \
>> +  TEST_OP (TYPE##SIZE, ior) \
>> +  TEST_OP (TYPE##SIZE, xor) \
>> +  TEST_OP (TYPE##SIZE, mul) \
>> +  TEST_OP (TYPE##SIZE, min) \
>> +  TEST_OP (TYPE##SIZE, max)
>>
>>   TEST_TYPE (int8_t, 32)
>>   TEST_TYPE (uint8_t, 32)
>> @@ -45,6 +46,9 @@ TEST_TYPE (uint16_t, 128)
>>   TEST_TYPE (int32_t, 128)
>>   TEST_TYPE (uint32_t, 128)
>>
>> +TEST_OP (int32_t128, div)
>> +TEST_OP (uint32_t128, div)
>> +
>>   /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m, 
>> z[0-9]+\.b, z[0-9]+\.b\n} 6 } } */
>>   /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m, 
>> z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
>>   /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m, 
>> z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
>> @@ -85,5 +89,8 @@ TEST_TYPE (uint32_t, 128)
>>   /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m, 
>> z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
>>   /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m, 
>> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
>>
>> +/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.s, p[0-7]/m, 
>> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
>> +/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.s, p[0-7]/m, 
>> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
>> +
>>   /* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
>>   /* { dg-final { scan-assembler-not {\tsel\t} } } */
>> diff --git a/gcc/testsuite/g++.target/aarch64/sve/cond_arith_2.C 
>> b/gcc/testsuite/g++.target/aarch64/sve/cond_arith_2.C
>> index 8965c949873..9e40249fc11 100644
>> --- a/gcc/testsuite/g++.target/aarch64/sve/cond_arith_2.C
>> +++ b/gcc/testsuite/g++.target/aarch64/sve/cond_arith_2.C
>> @@ -9,10 +9,11 @@
>>   #define op_ior(A, B) ((A) | (B))
>>   #define op_xor(A, B) ((A) ^ (B))
>>   #define op_mul(A, B) ((A) * (B))
>> +#define op_div(A, B) ((A) / (B))
>>   #define op_max(A, B) ((A) > (B) ? (A) : (B))
>>   #define op_min(A, B) ((A) < (B) ? (A) : (B))
>>
>> -#define TEST_SHIFT(TYPE, NAME) \
>> +#define TEST_OP(TYPE, NAME) \
>>     TYPE \
>>     NAME##_##TYPE##_reg (TYPE a, TYPE b, TYPE c) \
>>     { \
>> @@ -21,14 +22,14 @@
>>
>>   #define TEST_TYPE(TYPE, SIZE) \
>>     typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \
>> -  TEST_SHIFT (TYPE##SIZE, add) \
>> -  TEST_SHIFT (TYPE##SIZE, sub) \
>> -  TEST_SHIFT (TYPE##SIZE, and) \
>> -  TEST_SHIFT (TYPE##SIZE, ior) \
>> -  TEST_SHIFT (TYPE##SIZE, xor) \
>> -  TEST_SHIFT (TYPE##SIZE, mul) \
>> -  TEST_SHIFT (TYPE##SIZE, min) \
>> -  TEST_SHIFT (TYPE##SIZE, max)
>> +  TEST_OP (TYPE##SIZE, add) \
>> +  TEST_OP (TYPE##SIZE, sub) \
>> +  TEST_OP (TYPE##SIZE, and) \
>> +  TEST_OP (TYPE##SIZE, ior) \
>> +  TEST_OP (TYPE##SIZE, xor) \
>> +  TEST_OP (TYPE##SIZE, mul) \
>> +  TEST_OP (TYPE##SIZE, min) \
>> +  TEST_OP (TYPE##SIZE, max)
>>
>>   TEST_TYPE (int8_t, 32)
>>   TEST_TYPE (uint8_t, 32)
>> @@ -45,6 +46,9 @@ TEST_TYPE (uint16_t, 128)
>>   TEST_TYPE (int32_t, 128)
>>   TEST_TYPE (uint32_t, 128)
>>
>> +TEST_OP (int32_t128, div)
>> +TEST_OP (uint32_t128, div)
>> +
>>   /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m, 
>> z[0-9]+\.b, z[0-9]+\.b\n} 6 } } */
>>   /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m, 
>> z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
>>   /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m, 
>> z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
>> @@ -85,5 +89,8 @@ TEST_TYPE (uint32_t, 128)
>>   /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m, 
>> z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
>>   /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m, 
>> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
>>
>> +/* { dg-final { scan-assembler-times {\tsdivr\tz[0-9]+\.s, p[0-7]/m, 
>> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
>> +/* { dg-final { scan-assembler-times {\tudivr\tz[0-9]+\.s, p[0-7]/m, 
>> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
>> +
>>   /* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
>>   /* { dg-final { scan-assembler-not {\tsel\t} } } */
>> diff --git a/gcc/testsuite/g++.target/aarch64/sve/cond_arith_3.C 
>> b/gcc/testsuite/g++.target/aarch64/sve/cond_arith_3.C
>> index 3aa8669fe3e..bbc7cc331fb 100644
>> --- a/gcc/testsuite/g++.target/aarch64/sve/cond_arith_3.C
>> +++ b/gcc/testsuite/g++.target/aarch64/sve/cond_arith_3.C
>> @@ -9,10 +9,11 @@
>>   #define op_ior(A, B) ((A) | (B))
>>   #define op_xor(A, B) ((A) ^ (B))
>>   #define op_mul(A, B) ((A) * (B))
>> +#define op_div(A, B) ((A) / (B))
>>   #define op_max(A, B) ((A) > (B) ? (A) : (B))
>>   #define op_min(A, B) ((A) < (B) ? (A) : (B))
>>
>> -#define TEST_SHIFT(TYPE, NAME) \
>> +#define TEST_OP(TYPE, NAME) \
>>     TYPE \
>>     NAME##_##TYPE##_reg (TYPE a, TYPE b, TYPE c, TYPE d) \
>>     { \
>> @@ -21,14 +22,14 @@
>>
>>   #define TEST_TYPE(TYPE, SIZE) \
>>     typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \
>> -  TEST_SHIFT (TYPE##SIZE, add) \
>> -  TEST_SHIFT (TYPE##SIZE, sub) \
>> -  TEST_SHIFT (TYPE##SIZE, and) \
>> -  TEST_SHIFT (TYPE##SIZE, ior) \
>> -  TEST_SHIFT (TYPE##SIZE, xor) \
>> -  TEST_SHIFT (TYPE##SIZE, mul) \
>> -  TEST_SHIFT (TYPE##SIZE, min) \
>> -  TEST_SHIFT (TYPE##SIZE, max)
>> +  TEST_OP (TYPE##SIZE, add) \
>> +  TEST_OP (TYPE##SIZE, sub) \
>> +  TEST_OP (TYPE##SIZE, and) \
>> +  TEST_OP (TYPE##SIZE, ior) \
>> +  TEST_OP (TYPE##SIZE, xor) \
>> +  TEST_OP (TYPE##SIZE, mul) \
>> +  TEST_OP (TYPE##SIZE, min) \
>> +  TEST_OP (TYPE##SIZE, max)
>>
>>   TEST_TYPE (int8_t, 32)
>>   TEST_TYPE (uint8_t, 32)
>> @@ -45,6 +46,9 @@ TEST_TYPE (uint16_t, 128)
>>   TEST_TYPE (int32_t, 128)
>>   TEST_TYPE (uint32_t, 128)
>>
>> +TEST_OP (int32_t128, div)
>> +TEST_OP (uint32_t128, div)
>> +
>>   /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m, 
>> z[0-9]+\.b, z[0-9]+\.b\n} 6 } } */
>>   /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m, 
>> z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
>>   /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m, 
>> z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
>> @@ -85,7 +89,10 @@ TEST_TYPE (uint32_t, 128)
>>   /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m, 
>> z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
>>   /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m, 
>> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
>>
>> +/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.s, p[0-7]/m, 
>> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
>> +/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.s, p[0-7]/m, 
>> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
>> +
>>   /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-7]/m, 
>> z[0-9]+\.b\n} 48 } } */
>>   /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, 
>> z[0-9]+\.h\n} 32 } } */
>> -/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, 
>> z[0-9]+\.s\n} 16 } } */
>> +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, 
>> z[0-9]+\.s\n} 18 } } */
>>   /* { dg-final { scan-assembler-not {\tsel\t} } } */
>> diff --git a/gcc/testsuite/g++.target/aarch64/sve/cond_arith_4.C 
>> b/gcc/testsuite/g++.target/aarch64/sve/cond_arith_4.C
>> index efa4b8953a3..fc799255e19 100644
>> --- a/gcc/testsuite/g++.target/aarch64/sve/cond_arith_4.C
>> +++ b/gcc/testsuite/g++.target/aarch64/sve/cond_arith_4.C
>> @@ -9,10 +9,11 @@
>>   #define op_ior(A, B) ((A) | (B))
>>   #define op_xor(A, B) ((A) ^ (B))
>>   #define op_mul(A, B) ((A) * (B))
>> +#define op_div(A, B) ((A) / (B))
>>   #define op_max(A, B) ((A) > (B) ? (A) : (B))
>>   #define op_min(A, B) ((A) < (B) ? (A) : (B))
>>
>> -#define TEST_SHIFT(TYPE, NAME) \
>> +#define TEST_OP(TYPE, NAME) \
>>     TYPE \
>>     NAME##_##TYPE##_reg (TYPE a, TYPE b, TYPE c) \
>>     { \
>> @@ -21,14 +22,14 @@
>>
>>   #define TEST_TYPE(TYPE, SIZE) \
>>     typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \
>> -  TEST_SHIFT (TYPE##SIZE, add) \
>> -  TEST_SHIFT (TYPE##SIZE, sub) \
>> -  TEST_SHIFT (TYPE##SIZE, and) \
>> -  TEST_SHIFT (TYPE##SIZE, ior) \
>> -  TEST_SHIFT (TYPE##SIZE, xor) \
>> -  TEST_SHIFT (TYPE##SIZE, mul) \
>> -  TEST_SHIFT (TYPE##SIZE, min) \
>> -  TEST_SHIFT (TYPE##SIZE, max)
>> +  TEST_OP (TYPE##SIZE, add) \
>> +  TEST_OP (TYPE##SIZE, sub) \
>> +  TEST_OP (TYPE##SIZE, and) \
>> +  TEST_OP (TYPE##SIZE, ior) \
>> +  TEST_OP (TYPE##SIZE, xor) \
>> +  TEST_OP (TYPE##SIZE, mul) \
>> +  TEST_OP (TYPE##SIZE, min) \
>> +  TEST_OP (TYPE##SIZE, max)
>>
>>   TEST_TYPE (int8_t, 32)
>>   TEST_TYPE (uint8_t, 32)
>> @@ -45,6 +46,9 @@ TEST_TYPE (uint16_t, 128)
>>   TEST_TYPE (int32_t, 128)
>>   TEST_TYPE (uint32_t, 128)
>>
>> +TEST_OP (int32_t128, div)
>> +TEST_OP (uint32_t128, div)
>> +
>>   /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m, 
>> z[0-9]+\.b, z[0-9]+\.b\n} 6 } } */
>>   /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m, 
>> z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
>>   /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m, 
>> z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
>> @@ -85,7 +89,10 @@ TEST_TYPE (uint32_t, 128)
>>   /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m, 
>> z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
>>   /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m, 
>> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
>>
>> +/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.s, p[0-7]/m, 
>> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
>> +/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.s, p[0-7]/m, 
>> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
>> +
>>   /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-7]/z, 
>> z[0-9]+\.b\n} 48 } } */
>>   /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, 
>> z[0-9]+\.h\n} 32 } } */
>> -/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, 
>> z[0-9]+\.s\n} 16 } } */
>> +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, 
>> z[0-9]+\.s\n} 18 } } */
>>   /* { dg-final { scan-assembler-not {\tsel\t} } } */
>> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/div_2.c 
>> b/gcc/testsuite/gcc.target/aarch64/sve/div_2.c
>> new file mode 100644
>> index 00000000000..02483ec47de
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/aarch64/sve/div_2.c
>> @@ -0,0 +1,22 @@
>> +/* { dg-do assemble { target aarch64_asm_sve_ok } } */
>> +/* { dg-options "-O -msve-vector-bits=2048 -save-temps" } */
>> +
>> +#include <stdint.h>
>> +
>> +typedef int32_t v32si __attribute__((vector_size(128)));
>> +typedef uint32_t v32usi __attribute__((vector_size(128)));
>> +
>> +v32si
>> +test_sdiv (v32si x, v32si y)
>> +{
>> +    return x / y;
>> +}
>> +
>> +v32usi
>> +test_udiv (v32usi x, v32usi y)
>> +{
>> +    return x / y;
>> +}
>> +
>> +/* { dg-final { scan-assembler-times {\tsdivr?\tz[0-9]+\.s, p[0-7]/m, 
>> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
>> +/* { dg-final { scan-assembler-times {\tudivr?\tz[0-9]+\.s, p[0-7]/m, 
>> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
>> --
>> 2.34.1
>>
>>

Re: [PATCH] aarch64: Support unpacked SVE integer division

Reply via email to