Re: [PATCH] aarch64: Support unpacked SVE integer division

Remi Machet Fri, 11 Jul 2025 07:41:10 -0700

On 7/11/25 08:21, Spencer Abson wrote:

External email: Use caution opening links or attachments



This patch extends the existing patterns for SVE_INT_BINARY_SD to
support partial SVE integer modes, including those implement the
conditional form.

gcc/ChangeLog:

        * config/aarch64/aarch64-sve.md (<optab><mode>3): Extend
        to SVE_SDI_SIMD.
        (@aarch64_pred_<optab><mode>): Likewise.
        (@cond_<optab><mode>): Extend to SVE_SDI.
        (*cond_<optab><mode>_2): Likewise.
        (*cond_<optab><mode>_3): Likewise.
        (*cond_<optab><mode>_any): Likewise.
        * config/aarch64/iterators.md (SVE_SDI): New iterator for
        all SVE vector modes with 32-bit or 64-bit elements.
        (SVE_SDI_SIMD): New iterator.  As above, but including
        V4SI and V2DI.

gcc/testsuite/ChangeLog:

        * g++.target/aarch64/sve/cond_arith_1.C: Rename TEST_SHIFT
        to TEST_OP, add tests for SDIV and UDIV.
        * g++.target/aarch64/sve/cond_arith_2.C: Likewise.
        * g++.target/aarch64/sve/cond_arith_3.C: Likewise.
        * g++.target/aarch64/sve/cond_arith_4.C: Likewise.
        * gcc.target/aarch64/sve/div_2.c: New test.

---

Bootstrapped & regtested on aarch64-linux-gnu.  OK for master?

Thanks,
Spencer

---
 gcc/config/aarch64/aarch64-sve.md             | 64 +++++++++----------
 gcc/config/aarch64/iterators.md               |  7 ++
 .../g++.target/aarch64/sve/cond_arith_1.C     | 25 +++++---
 .../g++.target/aarch64/sve/cond_arith_2.C     | 25 +++++---
 .../g++.target/aarch64/sve/cond_arith_3.C     | 27 +++++---
 .../g++.target/aarch64/sve/cond_arith_4.C     | 27 +++++---
 gcc/testsuite/gcc.target/aarch64/sve/div_2.c  | 22 +++++++
 7 files changed, 127 insertions(+), 70 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/div_2.c

diff --git a/gcc/config/aarch64/aarch64-sve.md 
b/gcc/config/aarch64/aarch64-sve.md
index 6b5113eb70f..871b31623bb 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -4712,12 +4712,12 @@
 ;; We can use it with Advanced SIMD modes to expose the V2DI and V4SI
 ;; optabs to the midend.
 (define_expand "<optab><mode>3"
-  [(set (match_operand:SVE_FULL_SDI_SIMD 0 "register_operand")
-       (unspec:SVE_FULL_SDI_SIMD
+  [(set (match_operand:SVE_SDI_SIMD 0 "register_operand")
+       (unspec:SVE_SDI_SIMD
          [(match_dup 3)
-          (SVE_INT_BINARY_SD:SVE_FULL_SDI_SIMD
-            (match_operand:SVE_FULL_SDI_SIMD 1 "register_operand")
-            (match_operand:SVE_FULL_SDI_SIMD 2 "register_operand"))]
+          (SVE_INT_BINARY_SD:SVE_SDI_SIMD
+            (match_operand:SVE_SDI_SIMD 1 "register_operand")
+            (match_operand:SVE_SDI_SIMD 2 "register_operand"))]
          UNSPEC_PRED_X))]
   "TARGET_SVE"
   {
@@ -4727,12 +4727,12 @@

 ;; Integer division predicated with a PTRUE.
 (define_insn "@aarch64_pred_<optab><mode>"
-  [(set (match_operand:SVE_FULL_SDI_SIMD 0 "register_operand")
-       (unspec:SVE_FULL_SDI_SIMD
+  [(set (match_operand:SVE_SDI_SIMD 0 "register_operand")
+       (unspec:SVE_SDI_SIMD
          [(match_operand:<VPRED> 1 "register_operand")
-          (SVE_INT_BINARY_SD:SVE_FULL_SDI_SIMD
-            (match_operand:SVE_FULL_SDI_SIMD 2 "register_operand")
-            (match_operand:SVE_FULL_SDI_SIMD 3 "register_operand"))]
+          (SVE_INT_BINARY_SD:SVE_SDI_SIMD
+            (match_operand:SVE_SDI_SIMD 2 "register_operand")
+            (match_operand:SVE_SDI_SIMD 3 "register_operand"))]
          UNSPEC_PRED_X))]
   "TARGET_SVE"
   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
@@ -4744,25 +4744,25 @@

 ;; Predicated integer division with merging.
 (define_expand "@cond_<optab><mode>"
-  [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
-       (unspec:SVE_FULL_SDI
+  [(set (match_operand:SVE_SDI 0 "register_operand")
+       (unspec:SVE_SDI
          [(match_operand:<VPRED> 1 "register_operand")
-          (SVE_INT_BINARY_SD:SVE_FULL_SDI
-            (match_operand:SVE_FULL_SDI 2 "register_operand")
-            (match_operand:SVE_FULL_SDI 3 "register_operand"))
-          (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero")]
+          (SVE_INT_BINARY_SD:SVE_SDI
+            (match_operand:SVE_SDI 2 "register_operand")
+            (match_operand:SVE_SDI 3 "register_operand"))
+          (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero")]
          UNSPEC_SEL))]
   "TARGET_SVE"
 )

 ;; Predicated integer division, merging with the first input.
 (define_insn "*cond_<optab><mode>_2"
-  [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
-       (unspec:SVE_FULL_SDI
+  [(set (match_operand:SVE_SDI 0 "register_operand")
+       (unspec:SVE_SDI
          [(match_operand:<VPRED> 1 "register_operand")
-          (SVE_INT_BINARY_SD:SVE_FULL_SDI
-            (match_operand:SVE_FULL_SDI 2 "register_operand")
-            (match_operand:SVE_FULL_SDI 3 "register_operand"))
+          (SVE_INT_BINARY_SD:SVE_SDI
+            (match_operand:SVE_SDI 2 "register_operand")
+            (match_operand:SVE_SDI 3 "register_operand"))
           (match_dup 2)]
          UNSPEC_SEL))]
   "TARGET_SVE"
@@ -4774,12 +4774,12 @@

 ;; Predicated integer division, merging with the second input.
 (define_insn 
"<mailto:@@-4774,12+4774,12@@;;Predicatedintegerdivision,mergingwiththesecondinput.(define_insn>*cond_<optab><mode>_3"
-  [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
-       (unspec:SVE_FULL_SDI
+  [(set (match_operand:SVE_SDI 0 "register_operand")
+       (unspec:SVE_SDI
          [(match_operand:<VPRED> 1 "register_operand")
-          (SVE_INT_BINARY_SD:SVE_FULL_SDI
-            (match_operand:SVE_FULL_SDI 2 "register_operand")
-            (match_operand:SVE_FULL_SDI 3 "register_operand"))
+          (SVE_INT_BINARY_SD:SVE_SDI
+            (match_operand:SVE_SDI 2 "register_operand")
+            (match_operand:SVE_SDI 3 "register_operand"))
           (match_dup 3)]
          UNSPEC_SEL))]
   "TARGET_SVE"
@@ -4791,13 +4791,13 @@

 ;; Predicated integer division, merging with an independent value.
 (define_insn_and_rewrite 
"<mailto:@@-4791,13+4791,13@@;;Predicatedintegerdivision,mergingwithanindependentvalue.(define_insn_and_rewrite>*cond_<optab><mode>_any"
-  [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
-       (unspec:SVE_FULL_SDI
+  [(set (match_operand:SVE_SDI 0 "register_operand")
+       (unspec:SVE_SDI
          [(match_operand:<VPRED> 1 "register_operand")
-          (SVE_INT_BINARY_SD:SVE_FULL_SDI
-            (match_operand:SVE_FULL_SDI 2 "register_operand")
-            (match_operand:SVE_FULL_SDI 3 "register_operand"))
-          (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero")]
+          (SVE_INT_BINARY_SD:SVE_SDI
+            (match_operand:SVE_SDI 2 "register_operand")
+            (match_operand:SVE_SDI 3 "register_operand"))
+          (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero")]
          UNSPEC_SEL))]
   "TARGET_SVE
    && !rtx_equal_p (operands[2], operands[4])
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index c59fcd679d7..08ff6e42780 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -557,10 +557,17 @@
 ;; Fully-packed SVE integer vector modes that have 32-bit or 64-bit elements.
 (define_mode_iterator SVE_FULL_SDI [VNx4SI VNx2DI])

+;; SVE integer vector modes that have 32-bit or 64-bit elements.
+(define_mode_iterator SVE_SDI [VNx2SI SVE_FULL_SDI])
+
 ;; Fully-packed SVE and Advanced SIMD integer vector modes that have 32-bit or
 ;; 64-bit elements.
 (define_mode_iterator SVE_FULL_SDI_SIMD [SVE_FULL_SDI V4SI V2DI])

+;; SVE and full Advanced SIMD integer vector modes that have 32-bit or
+;; 64-bit elements.
+(define_mode_iterator SVE_SDI_SIMD [VNx2SI SVE_FULL_SDI_SIMD])
+

Hi Spencer,

Based on the definition of SVE_FULL_SDI_SIMD, I would have expected V2SI to 
also be in SVE_SDI_SIMD. I assume it is excluded because it is taken care of by 
another iterator already? If so it might be worth mentioning.

Looks good to me otherwise (but someone else needs to approve).

Remi


 ;; 2x and 4x tuples of the above, excluding 2x DI.
 (define_mode_iterator SVE_FULL_SIx2_SDIx4 [VNx8SI VNx16SI VNx8DI])

diff --git a/gcc/testsuite/g++.target/aarch64/sve/cond_arith_1.C 
b/gcc/testsuite/g++.target/aarch64/sve/cond_arith_1.C
index 0c6f640e35b..40ecb3a012e 100644
--- a/gcc/testsuite/g++.target/aarch64/sve/cond_arith_1.C
+++ b/gcc/testsuite/g++.target/aarch64/sve/cond_arith_1.C
@@ -9,10 +9,11 @@
 #define op_ior(A, B) ((A) | (B))
 #define op_xor(A, B) ((A) ^ (B))
 #define op_mul(A, B) ((A) * (B))
+#define op_div(A, B) ((A) / (B))
 #define op_max(A, B) ((A) > (B) ? (A) : (B))
 #define op_min(A, B) ((A) < (B) ? (A) : (B))

-#define TEST_SHIFT(TYPE, NAME) \
+#define TEST_OP(TYPE, NAME) \
   TYPE \
   NAME##_##TYPE##_reg (TYPE a, TYPE b, TYPE c) \
   { \
@@ -21,14 +22,14 @@

 #define TEST_TYPE(TYPE, SIZE) \
   typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \
-  TEST_SHIFT (TYPE##SIZE, add) \
-  TEST_SHIFT (TYPE##SIZE, sub) \
-  TEST_SHIFT (TYPE##SIZE, and) \
-  TEST_SHIFT (TYPE##SIZE, ior) \
-  TEST_SHIFT (TYPE##SIZE, xor) \
-  TEST_SHIFT (TYPE##SIZE, mul) \
-  TEST_SHIFT (TYPE##SIZE, min) \
-  TEST_SHIFT (TYPE##SIZE, max)
+  TEST_OP (TYPE##SIZE, add) \
+  TEST_OP (TYPE##SIZE, sub) \
+  TEST_OP (TYPE##SIZE, and) \
+  TEST_OP (TYPE##SIZE, ior) \
+  TEST_OP (TYPE##SIZE, xor) \
+  TEST_OP (TYPE##SIZE, mul) \
+  TEST_OP (TYPE##SIZE, min) \
+  TEST_OP (TYPE##SIZE, max)

 TEST_TYPE (int8_t, 32)
 TEST_TYPE (uint8_t, 32)
@@ -45,6 +46,9 @@ TEST_TYPE (uint16_t, 128)
 TEST_TYPE (int32_t, 128)
 TEST_TYPE (uint32_t, 128)

+TEST_OP (int32_t128, div)
+TEST_OP (uint32_t128, div)
+
 /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, 
z[0-9]+\.b\n} 6 } } */
 /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, 
z[0-9]+\.h\n} 4 } } */
 /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, 
z[0-9]+\.s\n} 2 } } */
@@ -85,5 +89,8 @@ TEST_TYPE (uint32_t, 128)
 /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
 /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */

+/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+
 /* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
 /* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/g++.target/aarch64/sve/cond_arith_2.C 
b/gcc/testsuite/g++.target/aarch64/sve/cond_arith_2.C
index 8965c949873..9e40249fc11 100644
--- a/gcc/testsuite/g++.target/aarch64/sve/cond_arith_2.C
+++ b/gcc/testsuite/g++.target/aarch64/sve/cond_arith_2.C
@@ -9,10 +9,11 @@
 #define op_ior(A, B) ((A) | (B))
 #define op_xor(A, B) ((A) ^ (B))
 #define op_mul(A, B) ((A) * (B))
+#define op_div(A, B) ((A) / (B))
 #define op_max(A, B) ((A) > (B) ? (A) : (B))
 #define op_min(A, B) ((A) < (B) ? (A) : (B))

-#define TEST_SHIFT(TYPE, NAME) \
+#define TEST_OP(TYPE, NAME) \
   TYPE \
   NAME##_##TYPE##_reg (TYPE a, TYPE b, TYPE c) \
   { \
@@ -21,14 +22,14 @@

 #define TEST_TYPE(TYPE, SIZE) \
   typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \
-  TEST_SHIFT (TYPE##SIZE, add) \
-  TEST_SHIFT (TYPE##SIZE, sub) \
-  TEST_SHIFT (TYPE##SIZE, and) \
-  TEST_SHIFT (TYPE##SIZE, ior) \
-  TEST_SHIFT (TYPE##SIZE, xor) \
-  TEST_SHIFT (TYPE##SIZE, mul) \
-  TEST_SHIFT (TYPE##SIZE, min) \
-  TEST_SHIFT (TYPE##SIZE, max)
+  TEST_OP (TYPE##SIZE, add) \
+  TEST_OP (TYPE##SIZE, sub) \
+  TEST_OP (TYPE##SIZE, and) \
+  TEST_OP (TYPE##SIZE, ior) \
+  TEST_OP (TYPE##SIZE, xor) \
+  TEST_OP (TYPE##SIZE, mul) \
+  TEST_OP (TYPE##SIZE, min) \
+  TEST_OP (TYPE##SIZE, max)

 TEST_TYPE (int8_t, 32)
 TEST_TYPE (uint8_t, 32)
@@ -45,6 +46,9 @@ TEST_TYPE (uint16_t, 128)
 TEST_TYPE (int32_t, 128)
 TEST_TYPE (uint32_t, 128)

+TEST_OP (int32_t128, div)
+TEST_OP (uint32_t128, div)
+
 /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, 
z[0-9]+\.b\n} 6 } } */
 /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, 
z[0-9]+\.h\n} 4 } } */
 /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, 
z[0-9]+\.s\n} 2 } } */
@@ -85,5 +89,8 @@ TEST_TYPE (uint32_t, 128)
 /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
 /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */

+/* { dg-final { scan-assembler-times {\tsdivr\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tudivr\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+
 /* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
 /* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/g++.target/aarch64/sve/cond_arith_3.C 
b/gcc/testsuite/g++.target/aarch64/sve/cond_arith_3.C
index 3aa8669fe3e..bbc7cc331fb 100644
--- a/gcc/testsuite/g++.target/aarch64/sve/cond_arith_3.C
+++ b/gcc/testsuite/g++.target/aarch64/sve/cond_arith_3.C
@@ -9,10 +9,11 @@
 #define op_ior(A, B) ((A) | (B))
 #define op_xor(A, B) ((A) ^ (B))
 #define op_mul(A, B) ((A) * (B))
+#define op_div(A, B) ((A) / (B))
 #define op_max(A, B) ((A) > (B) ? (A) : (B))
 #define op_min(A, B) ((A) < (B) ? (A) : (B))

-#define TEST_SHIFT(TYPE, NAME) \
+#define TEST_OP(TYPE, NAME) \
   TYPE \
   NAME##_##TYPE##_reg (TYPE a, TYPE b, TYPE c, TYPE d) \
   { \
@@ -21,14 +22,14 @@

 #define TEST_TYPE(TYPE, SIZE) \
   typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \
-  TEST_SHIFT (TYPE##SIZE, add) \
-  TEST_SHIFT (TYPE##SIZE, sub) \
-  TEST_SHIFT (TYPE##SIZE, and) \
-  TEST_SHIFT (TYPE##SIZE, ior) \
-  TEST_SHIFT (TYPE##SIZE, xor) \
-  TEST_SHIFT (TYPE##SIZE, mul) \
-  TEST_SHIFT (TYPE##SIZE, min) \
-  TEST_SHIFT (TYPE##SIZE, max)
+  TEST_OP (TYPE##SIZE, add) \
+  TEST_OP (TYPE##SIZE, sub) \
+  TEST_OP (TYPE##SIZE, and) \
+  TEST_OP (TYPE##SIZE, ior) \
+  TEST_OP (TYPE##SIZE, xor) \
+  TEST_OP (TYPE##SIZE, mul) \
+  TEST_OP (TYPE##SIZE, min) \
+  TEST_OP (TYPE##SIZE, max)

 TEST_TYPE (int8_t, 32)
 TEST_TYPE (uint8_t, 32)
@@ -45,6 +46,9 @@ TEST_TYPE (uint16_t, 128)
 TEST_TYPE (int32_t, 128)
 TEST_TYPE (uint32_t, 128)

+TEST_OP (int32_t128, div)
+TEST_OP (uint32_t128, div)
+
 /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, 
z[0-9]+\.b\n} 6 } } */
 /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, 
z[0-9]+\.h\n} 4 } } */
 /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, 
z[0-9]+\.s\n} 2 } } */
@@ -85,7 +89,10 @@ TEST_TYPE (uint32_t, 128)
 /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
 /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */

+/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+
 /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-7]/m, 
z[0-9]+\.b\n} 48 } } */
 /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h\n} 32 } } */
-/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 16 } } */
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 18 } } */
 /* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/g++.target/aarch64/sve/cond_arith_4.C 
b/gcc/testsuite/g++.target/aarch64/sve/cond_arith_4.C
index efa4b8953a3..fc799255e19 100644
--- a/gcc/testsuite/g++.target/aarch64/sve/cond_arith_4.C
+++ b/gcc/testsuite/g++.target/aarch64/sve/cond_arith_4.C
@@ -9,10 +9,11 @@
 #define op_ior(A, B) ((A) | (B))
 #define op_xor(A, B) ((A) ^ (B))
 #define op_mul(A, B) ((A) * (B))
+#define op_div(A, B) ((A) / (B))
 #define op_max(A, B) ((A) > (B) ? (A) : (B))
 #define op_min(A, B) ((A) < (B) ? (A) : (B))

-#define TEST_SHIFT(TYPE, NAME) \
+#define TEST_OP(TYPE, NAME) \
   TYPE \
   NAME##_##TYPE##_reg (TYPE a, TYPE b, TYPE c) \
   { \
@@ -21,14 +22,14 @@

 #define TEST_TYPE(TYPE, SIZE) \
   typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \
-  TEST_SHIFT (TYPE##SIZE, add) \
-  TEST_SHIFT (TYPE##SIZE, sub) \
-  TEST_SHIFT (TYPE##SIZE, and) \
-  TEST_SHIFT (TYPE##SIZE, ior) \
-  TEST_SHIFT (TYPE##SIZE, xor) \
-  TEST_SHIFT (TYPE##SIZE, mul) \
-  TEST_SHIFT (TYPE##SIZE, min) \
-  TEST_SHIFT (TYPE##SIZE, max)
+  TEST_OP (TYPE##SIZE, add) \
+  TEST_OP (TYPE##SIZE, sub) \
+  TEST_OP (TYPE##SIZE, and) \
+  TEST_OP (TYPE##SIZE, ior) \
+  TEST_OP (TYPE##SIZE, xor) \
+  TEST_OP (TYPE##SIZE, mul) \
+  TEST_OP (TYPE##SIZE, min) \
+  TEST_OP (TYPE##SIZE, max)

 TEST_TYPE (int8_t, 32)
 TEST_TYPE (uint8_t, 32)
@@ -45,6 +46,9 @@ TEST_TYPE (uint16_t, 128)
 TEST_TYPE (int32_t, 128)
 TEST_TYPE (uint32_t, 128)

+TEST_OP (int32_t128, div)
+TEST_OP (uint32_t128, div)
+
 /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, 
z[0-9]+\.b\n} 6 } } */
 /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, 
z[0-9]+\.h\n} 4 } } */
 /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, 
z[0-9]+\.s\n} 2 } } */
@@ -85,7 +89,10 @@ TEST_TYPE (uint32_t, 128)
 /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
 /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */

+/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+
 /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-7]/z, 
z[0-9]+\.b\n} 48 } } */
 /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, 
z[0-9]+\.h\n} 32 } } */
-/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, 
z[0-9]+\.s\n} 16 } } */
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, 
z[0-9]+\.s\n} 18 } } */
 /* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/div_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/div_2.c
new file mode 100644
index 00000000000..02483ec47de
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/div_2.c
@@ -0,0 +1,22 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O -msve-vector-bits=2048 -save-temps" } */
+
+#include <stdint.h>
+
+typedef int32_t v32si __attribute__((vector_size(128)));
+typedef uint32_t v32usi __attribute__((vector_size(128)));
+
+v32si
+test_sdiv (v32si x, v32si y)
+{
+    return x / y;
+}
+
+v32usi
+test_udiv (v32usi x, v32usi y)
+{
+    return x / y;
+}
+
+/* { dg-final { scan-assembler-times {\tsdivr?\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tudivr?\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
--
2.34.1

Re: [PATCH] aarch64: Support unpacked SVE integer division

Reply via email to