https://gcc.gnu.org/g:7cde140863edea536c676096cbc3d84a6d1424e4

commit r15-2405-g7cde140863edea536c676096cbc3d84a6d1424e4
Author: Jennifer Schmitz <jschm...@nvidia.com>
Date:   Tue Jul 16 01:59:50 2024 -0700

    SVE intrinsics: Add strength reduction for division by constant.
    
    This patch folds SVE division where all divisor elements are the same
    power of 2 to svasrd (signed) or svlsr (unsigned).
    Tests were added to check
    1) whether the transform is applied (existing test harness was amended), and
    2) correctness using runtime tests for all input types of svdiv; for signed
    and unsigned integers, several corner cases were covered.
    
    The patch was bootstrapped and regtested on aarch64-linux-gnu, no 
regression.
    OK for mainline?
    
    Signed-off-by: Jennifer Schmitz <jschm...@nvidia.com>
    
    gcc/
    
            * config/aarch64/aarch64-sve-builtins-base.cc (svdiv_impl::fold):
            Implement strength reduction.
    
    gcc/testsuite/
    
            * gcc.target/aarch64/sve/div_const_run.c: New test.
            * gcc.target/aarch64/sve/acle/asm/div_s32.c: Likewise.
            * gcc.target/aarch64/sve/acle/asm/div_s64.c: Likewise.
            * gcc.target/aarch64/sve/acle/asm/div_u32.c: Likewise.
            * gcc.target/aarch64/sve/acle/asm/div_u64.c: Likewise.

Diff:
---
 gcc/config/aarch64/aarch64-sve-builtins-base.cc    |  49 +++-
 .../gcc.target/aarch64/sve/acle/asm/div_s32.c      | 273 +++++++++++++++++++--
 .../gcc.target/aarch64/sve/acle/asm/div_s64.c      | 273 +++++++++++++++++++--
 .../gcc.target/aarch64/sve/acle/asm/div_u32.c      | 201 +++++++++++++--
 .../gcc.target/aarch64/sve/acle/asm/div_u64.c      | 201 +++++++++++++--
 .../gcc.target/aarch64/sve/div_const_run.c         |  91 +++++++
 6 files changed, 1031 insertions(+), 57 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc 
b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index a2268353ae31..d55bee0b72fa 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -746,6 +746,53 @@ public:
   }
 };
 
+class svdiv_impl : public rtx_code_function
+{
+public:
+  CONSTEXPR svdiv_impl ()
+    : rtx_code_function (DIV, UDIV, UNSPEC_COND_FDIV) {}
+
+  gimple *
+  fold (gimple_folder &f) const override
+  {
+    tree divisor = gimple_call_arg (f.call, 2);
+    tree divisor_cst = uniform_integer_cst_p (divisor);
+
+    if (!divisor_cst || !integer_pow2p (divisor_cst))
+      return NULL;
+
+    tree new_divisor;
+    gcall *call;
+
+    if (f.type_suffix (0).unsigned_p && tree_to_uhwi (divisor_cst) != 1)
+      {
+       function_instance instance ("svlsr", functions::svlsr,
+                                   shapes::binary_uint_opt_n, MODE_n,
+                                   f.type_suffix_ids, GROUP_none, f.pred);
+       call = f.redirect_call (instance);
+       tree d = INTEGRAL_TYPE_P (TREE_TYPE (divisor)) ? divisor : divisor_cst;
+       new_divisor = wide_int_to_tree (TREE_TYPE (d), tree_log2 (d));
+      }
+    else
+      {
+       if (tree_int_cst_sign_bit (divisor_cst)
+           || tree_to_shwi (divisor_cst) == 1)
+         return NULL;
+
+       function_instance instance ("svasrd", functions::svasrd,
+                                   shapes::shift_right_imm, MODE_n,
+                                   f.type_suffix_ids, GROUP_none, f.pred);
+       call = f.redirect_call (instance);
+       new_divisor = wide_int_to_tree (scalar_types[VECTOR_TYPE_svuint64_t],
+                                       tree_log2 (divisor_cst));
+      }
+
+    gimple_call_set_arg (call, 2, new_divisor);
+    return call;
+  }
+};
+
+
 class svdot_impl : public function_base
 {
 public:
@@ -3043,7 +3090,7 @@ FUNCTION (svcreate3, svcreate_impl, (3))
 FUNCTION (svcreate4, svcreate_impl, (4))
 FUNCTION (svcvt, svcvt_impl,)
 FUNCTION (svcvtnt, CODE_FOR_MODE0 (aarch64_sve_cvtnt),)
-FUNCTION (svdiv, rtx_code_function, (DIV, UDIV, UNSPEC_COND_FDIV))
+FUNCTION (svdiv, svdiv_impl,)
 FUNCTION (svdivr, rtx_code_function_rotated, (DIV, UDIV, UNSPEC_COND_FDIV))
 FUNCTION (svdot, svdot_impl,)
 FUNCTION (svdot_lane, svdotprod_lane_impl, (UNSPEC_SDOT, UNSPEC_UDOT,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s32.c 
b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s32.c
index c49ca1aa5243..d5a23bf07262 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s32.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s32.c
@@ -2,6 +2,8 @@
 
 #include "test_sve_acle.h"
 
+#define MAXPOW 1<<30
+
 /*
 ** div_s32_m_tied1:
 **     sdiv    z0\.s, p0/m, z0\.s, z1\.s
@@ -53,10 +55,27 @@ TEST_UNIFORM_ZX (div_w0_s32_m_untied, svint32_t, int32_t,
                 z0 = svdiv_n_s32_m (p0, z1, x0),
                 z0 = svdiv_m (p0, z1, x0))
 
+/*
+** div_1_s32_m_tied1:
+**     sel     z0\.s, p0, z0\.s, z0\.s
+**     ret
+*/
+TEST_UNIFORM_Z (div_1_s32_m_tied1, svint32_t,
+               z0 = svdiv_n_s32_m (p0, z0, 1),
+               z0 = svdiv_m (p0, z0, 1))
+
+/*
+** div_1_s32_m_untied:
+**     sel     z0\.s, p0, z1\.s, z1\.s
+**     ret
+*/
+TEST_UNIFORM_Z (div_1_s32_m_untied, svint32_t,
+               z0 = svdiv_n_s32_m (p0, z1, 1),
+               z0 = svdiv_m (p0, z1, 1))
+
 /*
 ** div_2_s32_m_tied1:
-**     mov     (z[0-9]+\.s), #2
-**     sdiv    z0\.s, p0/m, z0\.s, \1
+**     asrd    z0\.s, p0/m, z0\.s, #1
 **     ret
 */
 TEST_UNIFORM_Z (div_2_s32_m_tied1, svint32_t,
@@ -65,15 +84,75 @@ TEST_UNIFORM_Z (div_2_s32_m_tied1, svint32_t,
 
 /*
 ** div_2_s32_m_untied:
-**     mov     (z[0-9]+\.s), #2
 **     movprfx z0, z1
-**     sdiv    z0\.s, p0/m, z0\.s, \1
+**     asrd    z0\.s, p0/m, z0\.s, #1
 **     ret
 */
 TEST_UNIFORM_Z (div_2_s32_m_untied, svint32_t,
                z0 = svdiv_n_s32_m (p0, z1, 2),
                z0 = svdiv_m (p0, z1, 2))
 
+/*
+** div_3_s32_m_tied1:
+**     mov     (z[0-9]+\.s), #3
+**     sdiv    z0\.s, p0/m, z0\.s, \1
+**     ret
+*/
+TEST_UNIFORM_Z (div_3_s32_m_tied1, svint32_t,
+               z0 = svdiv_n_s32_m (p0, z0, 3),
+               z0 = svdiv_m (p0, z0, 3))
+
+/*
+** div_3_s32_m_untied:
+**     mov     (z[0-9]+\.s), #3
+**     movprfx z0, z1
+**     sdiv    z0\.s, p0/m, z0\.s, \1
+**     ret
+*/
+TEST_UNIFORM_Z (div_3_s32_m_untied, svint32_t,
+               z0 = svdiv_n_s32_m (p0, z1, 3),
+               z0 = svdiv_m (p0, z1, 3))
+
+/*
+** div_maxpow_s32_m_tied1:
+**     asrd    z0\.s, p0/m, z0\.s, #30
+**     ret
+*/
+TEST_UNIFORM_Z (div_maxpow_s32_m_tied1, svint32_t,
+               z0 = svdiv_n_s32_m (p0, z0, MAXPOW),
+               z0 = svdiv_m (p0, z0, MAXPOW))
+
+/*
+** div_maxpow_s32_m_untied:
+**     movprfx z0, z1
+**     asrd    z0\.s, p0/m, z0\.s, #30
+**     ret
+*/
+TEST_UNIFORM_Z (div_maxpow_s32_m_untied, svint32_t,
+               z0 = svdiv_n_s32_m (p0, z1, MAXPOW),
+               z0 = svdiv_m (p0, z1, MAXPOW))
+
+/*
+** div_intmin_s32_m_tied1:
+**     mov     (z[0-9]+\.s), #-2147483648
+**     sdiv    z0\.s, p0/m, z0\.s, \1
+**     ret
+*/
+TEST_UNIFORM_Z (div_intmin_s32_m_tied1, svint32_t,
+               z0 = svdiv_n_s32_m (p0, z0, INT32_MIN),
+               z0 = svdiv_m (p0, z0, INT32_MIN))
+
+/*
+** div_intmin_s32_m_untied:
+**     mov     (z[0-9]+\.s), #-2147483648
+**     movprfx z0, z1
+**     sdiv    z0\.s, p0/m, z0\.s, \1
+**     ret
+*/
+TEST_UNIFORM_Z (div_intmin_s32_m_untied, svint32_t,
+               z0 = svdiv_n_s32_m (p0, z1, INT32_MIN),
+               z0 = svdiv_m (p0, z1, INT32_MIN))
+
 /*
 ** div_s32_z_tied1:
 **     movprfx z0\.s, p0/z, z0\.s
@@ -137,19 +216,61 @@ TEST_UNIFORM_ZX (div_w0_s32_z_untied, svint32_t, int32_t,
                 z0 = svdiv_z (p0, z1, x0))
 
 /*
-** div_2_s32_z_tied1:
-**     mov     (z[0-9]+\.s), #2
+** div_1_s32_z_tied1:
+**     mov     (z[0-9]+\.s), #1
 **     movprfx z0\.s, p0/z, z0\.s
 **     sdiv    z0\.s, p0/m, z0\.s, \1
 **     ret
 */
+TEST_UNIFORM_Z (div_1_s32_z_tied1, svint32_t,
+               z0 = svdiv_n_s32_z (p0, z0, 1),
+               z0 = svdiv_z (p0, z0, 1))
+
+/*
+** div_1_s32_z_untied:
+**     mov     z0\.s, #1
+**     movprfx z0\.s, p0/z, z0\.s
+**     sdivr   z0\.s, p0/m, z0\.s, z1\.s
+**     ret
+*/
+TEST_UNIFORM_Z (div_1_s32_z_untied, svint32_t,
+               z0 = svdiv_n_s32_z (p0, z1, 1),
+               z0 = svdiv_z (p0, z1, 1))
+
+/*
+** div_2_s32_z_tied1:
+**     movprfx z0\.s, p0/z, z0\.s
+**     asrd    z0\.s, p0/m, z0\.s, #1
+**     ret
+*/
 TEST_UNIFORM_Z (div_2_s32_z_tied1, svint32_t,
                z0 = svdiv_n_s32_z (p0, z0, 2),
                z0 = svdiv_z (p0, z0, 2))
 
 /*
 ** div_2_s32_z_untied:
-**     mov     (z[0-9]+\.s), #2
+**     movprfx z0\.s, p0/z, z1\.s
+**     asrd    z0\.s, p0/m, z0\.s, #1
+**     ret
+*/
+TEST_UNIFORM_Z (div_2_s32_z_untied, svint32_t,
+               z0 = svdiv_n_s32_z (p0, z1, 2),
+               z0 = svdiv_z (p0, z1, 2))
+
+/*
+** div_3_s32_z_tied1:
+**     mov     (z[0-9]+\.s), #3
+**     movprfx z0\.s, p0/z, z0\.s
+**     sdiv    z0\.s, p0/m, z0\.s, \1
+**     ret
+*/
+TEST_UNIFORM_Z (div_3_s32_z_tied1, svint32_t,
+               z0 = svdiv_n_s32_z (p0, z0, 3),
+               z0 = svdiv_z (p0, z0, 3))
+
+/*
+** div_3_s32_z_untied:
+**     mov     (z[0-9]+\.s), #3
 ** (
 **     movprfx z0\.s, p0/z, z1\.s
 **     sdiv    z0\.s, p0/m, z0\.s, \1
@@ -159,9 +280,56 @@ TEST_UNIFORM_Z (div_2_s32_z_tied1, svint32_t,
 ** )
 **     ret
 */
-TEST_UNIFORM_Z (div_2_s32_z_untied, svint32_t,
-               z0 = svdiv_n_s32_z (p0, z1, 2),
-               z0 = svdiv_z (p0, z1, 2))
+TEST_UNIFORM_Z (div_3_s32_z_untied, svint32_t,
+               z0 = svdiv_n_s32_z (p0, z1, 3),
+               z0 = svdiv_z (p0, z1, 3))
+
+/*
+** div_maxpow_s32_z_tied1:
+**     movprfx z0\.s, p0/z, z0\.s
+**     asrd    z0\.s, p0/m, z0\.s, #30
+**     ret
+*/
+TEST_UNIFORM_Z (div_maxpow_s32_z_tied1, svint32_t,
+               z0 = svdiv_n_s32_z (p0, z0, MAXPOW),
+               z0 = svdiv_z (p0, z0, MAXPOW))
+
+/*
+** div_maxpow_s32_z_untied:
+**     movprfx z0\.s, p0/z, z1\.s
+**     asrd    z0\.s, p0/m, z0\.s, #30
+**     ret
+*/
+TEST_UNIFORM_Z (div_maxpow_s32_z_untied, svint32_t,
+               z0 = svdiv_n_s32_z (p0, z1, MAXPOW),
+               z0 = svdiv_z (p0, z1, MAXPOW))
+
+/*
+** div_intmin_s32_z_tied1:
+**     mov     (z[0-9]+\.s), #-2147483648
+**     movprfx z0\.s, p0/z, z0\.s
+**     sdiv    z0\.s, p0/m, z0\.s, \1
+**     ret
+*/
+TEST_UNIFORM_Z (div_intmin_s32_z_tied1, svint32_t,
+               z0 = svdiv_n_s32_z (p0, z0, INT32_MIN),
+               z0 = svdiv_z (p0, z0, INT32_MIN))
+
+/*
+** div_intmin_s32_z_untied:
+**     mov     (z[0-9]+\.s), #-2147483648
+** (
+**     movprfx z0\.s, p0/z, z1\.s
+**     sdiv    z0\.s, p0/m, z0\.s, \1
+** |
+**     movprfx z0\.s, p0/z, \1
+**     sdivr   z0\.s, p0/m, z0\.s, z1\.s
+** )
+**     ret
+*/
+TEST_UNIFORM_Z (div_intmin_s32_z_untied, svint32_t,
+               z0 = svdiv_n_s32_z (p0, z1, INT32_MIN),
+               z0 = svdiv_z (p0, z1, INT32_MIN))
 
 /*
 ** div_s32_x_tied1:
@@ -216,10 +384,26 @@ TEST_UNIFORM_ZX (div_w0_s32_x_untied, svint32_t, int32_t,
                 z0 = svdiv_n_s32_x (p0, z1, x0),
                 z0 = svdiv_x (p0, z1, x0))
 
+/*
+** div_1_s32_x_tied1:
+**     ret
+*/
+TEST_UNIFORM_Z (div_1_s32_x_tied1, svint32_t,
+               z0 = svdiv_n_s32_x (p0, z0, 1),
+               z0 = svdiv_x (p0, z0, 1))
+
+/*
+** div_1_s32_x_untied:
+**     mov     z0\.d, z1\.d 
+**     ret
+*/
+TEST_UNIFORM_Z (div_1_s32_x_untied, svint32_t,
+               z0 = svdiv_n_s32_x (p0, z1, 1),
+               z0 = svdiv_x (p0, z1, 1))
+
 /*
 ** div_2_s32_x_tied1:
-**     mov     (z[0-9]+\.s), #2
-**     sdiv    z0\.s, p0/m, z0\.s, \1
+**     asrd    z0\.s, p0/m, z0\.s, #1
 **     ret
 */
 TEST_UNIFORM_Z (div_2_s32_x_tied1, svint32_t,
@@ -228,10 +412,71 @@ TEST_UNIFORM_Z (div_2_s32_x_tied1, svint32_t,
 
 /*
 ** div_2_s32_x_untied:
-**     mov     z0\.s, #2
-**     sdivr   z0\.s, p0/m, z0\.s, z1\.s
+**     movprfx z0, z1 
+**     asrd    z0\.s, p0/m, z0\.s, #1
 **     ret
 */
 TEST_UNIFORM_Z (div_2_s32_x_untied, svint32_t,
                z0 = svdiv_n_s32_x (p0, z1, 2),
                z0 = svdiv_x (p0, z1, 2))
+
+/*
+** div_3_s32_x_tied1:
+**     mov     (z[0-9]+\.s), #3
+**     sdiv    z0\.s, p0/m, z0\.s, \1
+**     ret
+*/
+TEST_UNIFORM_Z (div_3_s32_x_tied1, svint32_t,
+               z0 = svdiv_n_s32_x (p0, z0, 3),
+               z0 = svdiv_x (p0, z0, 3))
+
+/*
+** div_3_s32_x_untied:
+**     mov     z0\.s, #3
+**     sdivr   z0\.s, p0/m, z0\.s, z1\.s
+**     ret
+*/
+TEST_UNIFORM_Z (div_3_s32_x_untied, svint32_t,
+               z0 = svdiv_n_s32_x (p0, z1, 3),
+               z0 = svdiv_x (p0, z1, 3))
+
+/*
+** div_maxpow_s32_x_tied1:
+**     asrd    z0\.s, p0/m, z0\.s, #30
+**     ret
+*/
+TEST_UNIFORM_Z (div_maxpow_s32_x_tied1, svint32_t,
+               z0 = svdiv_n_s32_x (p0, z0, MAXPOW),
+               z0 = svdiv_x (p0, z0, MAXPOW))
+
+/*
+** div_maxpow_s32_x_untied:
+**     movprfx z0, z1
+**     asrd    z0\.s, p0/m, z0\.s, #30
+**     ret
+*/
+TEST_UNIFORM_Z (div_maxpow_s32_x_untied, svint32_t,
+               z0 = svdiv_n_s32_x (p0, z1, MAXPOW),
+               z0 = svdiv_x (p0, z1, MAXPOW))
+
+/*
+** div_intmin_s32_x_tied1:
+**     mov     (z[0-9]+\.s), #-2147483648
+**     sdiv    z0\.s, p0/m, z0\.s, \1
+**     ret
+*/
+TEST_UNIFORM_Z (div_intmin_s32_x_tied1, svint32_t,
+               z0 = svdiv_n_s32_x (p0, z0, INT32_MIN),
+               z0 = svdiv_x (p0, z0, INT32_MIN))
+
+/*
+** div_intmin_s32_x_untied:
+**     mov     z0\.s, #-2147483648
+**     sdivr   z0\.s, p0/m, z0\.s, z1\.s
+**     ret
+*/
+TEST_UNIFORM_Z (div_intmin_s32_x_untied, svint32_t,
+               z0 = svdiv_n_s32_x (p0, z1, INT32_MIN),
+               z0 = svdiv_x (p0, z1, INT32_MIN))
+
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s64.c 
b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s64.c
index 464dca28d747..cfed6f9c1b31 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s64.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s64.c
@@ -2,6 +2,8 @@
 
 #include "test_sve_acle.h"
 
+#define MAXPOW 1ULL<<62
+
 /*
 ** div_s64_m_tied1:
 **     sdiv    z0\.d, p0/m, z0\.d, z1\.d
@@ -53,10 +55,27 @@ TEST_UNIFORM_ZX (div_x0_s64_m_untied, svint64_t, int64_t,
                 z0 = svdiv_n_s64_m (p0, z1, x0),
                 z0 = svdiv_m (p0, z1, x0))
 
+/*
+** div_1_s64_m_tied1:
+**     sel     z0\.d, p0, z0\.d, z0\.d
+**     ret
+*/
+TEST_UNIFORM_Z (div_1_s64_m_tied1, svint64_t,
+               z0 = svdiv_n_s64_m (p0, z0, 1),
+               z0 = svdiv_m (p0, z0, 1))
+
+/*
+** div_1_s64_m_untied:
+**     sel     z0\.d, p0, z1\.d, z1\.d
+**     ret
+*/
+TEST_UNIFORM_Z (div_1_s64_m_untied, svint64_t,
+               z0 = svdiv_n_s64_m (p0, z1, 1),
+               z0 = svdiv_m (p0, z1, 1))
+
 /*
 ** div_2_s64_m_tied1:
-**     mov     (z[0-9]+\.d), #2
-**     sdiv    z0\.d, p0/m, z0\.d, \1
+**     asrd    z0\.d, p0/m, z0\.d, #1
 **     ret
 */
 TEST_UNIFORM_Z (div_2_s64_m_tied1, svint64_t,
@@ -65,15 +84,75 @@ TEST_UNIFORM_Z (div_2_s64_m_tied1, svint64_t,
 
 /*
 ** div_2_s64_m_untied:
-**     mov     (z[0-9]+\.d), #2
 **     movprfx z0, z1
-**     sdiv    z0\.d, p0/m, z0\.d, \1
+**     asrd    z0\.d, p0/m, z0\.d, #1
 **     ret
 */
 TEST_UNIFORM_Z (div_2_s64_m_untied, svint64_t,
                z0 = svdiv_n_s64_m (p0, z1, 2),
                z0 = svdiv_m (p0, z1, 2))
 
+/*
+** div_3_s64_m_tied1:
+**     mov     (z[0-9]+\.d), #3
+**     sdiv    z0\.d, p0/m, z0\.d, \1
+**     ret
+*/
+TEST_UNIFORM_Z (div_3_s64_m_tied1, svint64_t,
+               z0 = svdiv_n_s64_m (p0, z0, 3),
+               z0 = svdiv_m (p0, z0, 3))
+
+/*
+** div_3_s64_m_untied:
+**     mov     (z[0-9]+\.d), #3
+**     movprfx z0, z1
+**     sdiv    z0\.d, p0/m, z0\.d, \1
+**     ret
+*/
+TEST_UNIFORM_Z (div_3_s64_m_untied, svint64_t,
+               z0 = svdiv_n_s64_m (p0, z1, 3),
+               z0 = svdiv_m (p0, z1, 3))
+
+/*
+** div_maxpow_s64_m_tied1:
+**     asrd    z0\.d, p0/m, z0\.d, #62
+**     ret
+*/
+TEST_UNIFORM_Z (div_maxpow_s64_m_tied1, svint64_t,
+               z0 = svdiv_n_s64_m (p0, z0, MAXPOW),
+               z0 = svdiv_m (p0, z0, MAXPOW))
+
+/*
+** div_maxpow_s64_m_untied:
+**     movprfx z0, z1
+**     asrd    z0\.d, p0/m, z0\.d, #62
+**     ret
+*/
+TEST_UNIFORM_Z (div_maxpow_s64_m_untied, svint64_t,
+               z0 = svdiv_n_s64_m (p0, z1, MAXPOW),
+               z0 = svdiv_m (p0, z1, MAXPOW))
+
+/*
+** div_intmin_s64_m_tied1:
+**     mov     (z[0-9]+\.d), #-9223372036854775808
+**     sdiv    z0\.d, p0/m, z0\.d, \1
+**     ret
+*/
+TEST_UNIFORM_Z (div_intmin_s64_m_tied1, svint64_t,
+               z0 = svdiv_n_s64_m (p0, z0, INT64_MIN),
+               z0 = svdiv_m (p0, z0, INT64_MIN))
+
+/*
+** div_intmin_s64_m_untied:
+**     mov     (z[0-9]+\.d), #-9223372036854775808
+**     movprfx z0, z1
+**     sdiv    z0\.d, p0/m, z0\.d, \1
+**     ret
+*/
+TEST_UNIFORM_Z (div_intmin_s64_m_untied, svint64_t,
+               z0 = svdiv_n_s64_m (p0, z1, INT64_MIN),
+               z0 = svdiv_m (p0, z1, INT64_MIN))
+
 /*
 ** div_s64_z_tied1:
 **     movprfx z0\.d, p0/z, z0\.d
@@ -137,19 +216,61 @@ TEST_UNIFORM_ZX (div_x0_s64_z_untied, svint64_t, int64_t,
                 z0 = svdiv_z (p0, z1, x0))
 
 /*
-** div_2_s64_z_tied1:
-**     mov     (z[0-9]+\.d), #2
+** div_1_s64_z_tied1:
+**     mov     (z[0-9]+\.d), #1
 **     movprfx z0\.d, p0/z, z0\.d
 **     sdiv    z0\.d, p0/m, z0\.d, \1
 **     ret
 */
+TEST_UNIFORM_Z (div_1_s64_z_tied1, svint64_t,
+               z0 = svdiv_n_s64_z (p0, z0, 1),
+               z0 = svdiv_z (p0, z0, 1))
+
+/*
+** div_1_s64_z_untied:
+**     mov     z0\.d, #1
+**     movprfx z0\.d, p0/z, z0\.d
+**     sdivr   z0\.d, p0/m, z0\.d, z1\.d
+**     ret
+*/
+TEST_UNIFORM_Z (div_1_s64_z_untied, svint64_t,
+               z0 = svdiv_n_s64_z (p0, z1, 1),
+               z0 = svdiv_z (p0, z1, 1))
+
+/*
+** div_2_s64_z_tied1:
+**     movprfx z0\.d, p0/z, z0\.d
+**     asrd    z0\.d, p0/m, z0\.d, #1
+**     ret
+*/
 TEST_UNIFORM_Z (div_2_s64_z_tied1, svint64_t,
                z0 = svdiv_n_s64_z (p0, z0, 2),
                z0 = svdiv_z (p0, z0, 2))
 
 /*
 ** div_2_s64_z_untied:
-**     mov     (z[0-9]+\.d), #2
+**     movprfx z0\.d, p0/z, z1\.d
+**     asrd    z0\.d, p0/m, z0\.d, #1
+**     ret
+*/
+TEST_UNIFORM_Z (div_2_s64_z_untied, svint64_t,
+               z0 = svdiv_n_s64_z (p0, z1, 2),
+               z0 = svdiv_z (p0, z1, 2))
+
+/*
+** div_3_s64_z_tied1:
+**     mov     (z[0-9]+\.d), #3
+**     movprfx z0\.d, p0/z, z0\.d
+**     sdiv    z0\.d, p0/m, z0\.d, \1
+**     ret
+*/
+TEST_UNIFORM_Z (div_3_s64_z_tied1, svint64_t,
+               z0 = svdiv_n_s64_z (p0, z0, 3),
+               z0 = svdiv_z (p0, z0, 3))
+
+/*
+** div_3_s64_z_untied:
+**     mov     (z[0-9]+\.d), #3
 ** (
 **     movprfx z0\.d, p0/z, z1\.d
 **     sdiv    z0\.d, p0/m, z0\.d, \1
@@ -159,9 +280,56 @@ TEST_UNIFORM_Z (div_2_s64_z_tied1, svint64_t,
 ** )
 **     ret
 */
-TEST_UNIFORM_Z (div_2_s64_z_untied, svint64_t,
-               z0 = svdiv_n_s64_z (p0, z1, 2),
-               z0 = svdiv_z (p0, z1, 2))
+TEST_UNIFORM_Z (div_3_s64_z_untied, svint64_t,
+               z0 = svdiv_n_s64_z (p0, z1, 3),
+               z0 = svdiv_z (p0, z1, 3))
+
+/*
+** div_maxpow_s64_z_tied1:
+**     movprfx z0\.d, p0/z, z0\.d
+**     asrd    z0\.d, p0/m, z0\.d, #62
+**     ret
+*/
+TEST_UNIFORM_Z (div_maxpow_s64_z_tied1, svint64_t,
+               z0 = svdiv_n_s64_z (p0, z0, MAXPOW),
+               z0 = svdiv_z (p0, z0, MAXPOW))
+
+/*
+** div_maxpow_s64_z_untied:
+**     movprfx z0\.d, p0/z, z1\.d
+**     asrd    z0\.d, p0/m, z0\.d, #62
+**     ret
+*/
+TEST_UNIFORM_Z (div_maxpow_s64_z_untied, svint64_t,
+               z0 = svdiv_n_s64_z (p0, z1, MAXPOW),
+               z0 = svdiv_z (p0, z1, MAXPOW))
+
+/*
+** div_intmin_s64_z_tied1:
+**     mov     (z[0-9]+\.d), #-9223372036854775808
+**     movprfx z0\.d, p0/z, z0\.d
+**     sdiv    z0\.d, p0/m, z0\.d, \1
+**     ret
+*/
+TEST_UNIFORM_Z (div_intmin_s64_z_tied1, svint64_t,
+               z0 = svdiv_n_s64_z (p0, z0, INT64_MIN),
+               z0 = svdiv_z (p0, z0, INT64_MIN))
+
+/*
+** div_intmin_s64_z_untied:
+**     mov     (z[0-9]+\.d), #-9223372036854775808
+** (
+**     movprfx z0\.d, p0/z, z1\.d
+**     sdiv    z0\.d, p0/m, z0\.d, \1
+** |
+**     movprfx z0\.d, p0/z, \1
+**     sdivr   z0\.d, p0/m, z0\.d, z1\.d
+** )
+**     ret
+*/
+TEST_UNIFORM_Z (div_intmin_s64_z_untied, svint64_t,
+               z0 = svdiv_n_s64_z (p0, z1, INT64_MIN),
+               z0 = svdiv_z (p0, z1, INT64_MIN))
 
 /*
 ** div_s64_x_tied1:
@@ -216,10 +384,26 @@ TEST_UNIFORM_ZX (div_x0_s64_x_untied, svint64_t, int64_t,
                 z0 = svdiv_n_s64_x (p0, z1, x0),
                 z0 = svdiv_x (p0, z1, x0))
 
+/*
+** div_1_s64_x_tied1:
+**     ret
+*/
+TEST_UNIFORM_Z (div_1_s64_x_tied1, svint64_t,
+               z0 = svdiv_n_s64_x (p0, z0, 1),
+               z0 = svdiv_x (p0, z0, 1))
+
+/*
+** div_1_s64_x_untied:
+**     mov     z0\.d, z1\.d 
+**     ret
+*/
+TEST_UNIFORM_Z (div_1_s64_x_untied, svint64_t,
+               z0 = svdiv_n_s64_x (p0, z1, 1),
+               z0 = svdiv_x (p0, z1, 1))
+
 /*
 ** div_2_s64_x_tied1:
-**     mov     (z[0-9]+\.d), #2
-**     sdiv    z0\.d, p0/m, z0\.d, \1
+**     asrd    z0\.d, p0/m, z0\.d, #1
 **     ret
 */
 TEST_UNIFORM_Z (div_2_s64_x_tied1, svint64_t,
@@ -228,10 +412,71 @@ TEST_UNIFORM_Z (div_2_s64_x_tied1, svint64_t,
 
 /*
 ** div_2_s64_x_untied:
-**     mov     z0\.d, #2
-**     sdivr   z0\.d, p0/m, z0\.d, z1\.d
+**     movprfx z0, z1 
+**     asrd    z0\.d, p0/m, z0\.d, #1
 **     ret
 */
 TEST_UNIFORM_Z (div_2_s64_x_untied, svint64_t,
                z0 = svdiv_n_s64_x (p0, z1, 2),
                z0 = svdiv_x (p0, z1, 2))
+
+/*
+** div_3_s64_x_tied1:
+**     mov     (z[0-9]+\.d), #3
+**     sdiv    z0\.d, p0/m, z0\.d, \1
+**     ret
+*/
+TEST_UNIFORM_Z (div_3_s64_x_tied1, svint64_t,
+               z0 = svdiv_n_s64_x (p0, z0, 3),
+               z0 = svdiv_x (p0, z0, 3))
+
+/*
+** div_3_s64_x_untied:
+**     mov     z0\.d, #3
+**     sdivr   z0\.d, p0/m, z0\.d, z1\.d
+**     ret
+*/
+TEST_UNIFORM_Z (div_3_s64_x_untied, svint64_t,
+               z0 = svdiv_n_s64_x (p0, z1, 3),
+               z0 = svdiv_x (p0, z1, 3))
+
+/*
+** div_maxpow_s64_x_tied1:
+**     asrd    z0\.d, p0/m, z0\.d, #62
+**     ret
+*/
+TEST_UNIFORM_Z (div_maxpow_s64_x_tied1, svint64_t,
+               z0 = svdiv_n_s64_x (p0, z0, MAXPOW),
+               z0 = svdiv_x (p0, z0, MAXPOW))
+
+/*
+** div_maxpow_s64_x_untied:
+**     movprfx z0, z1
+**     asrd    z0\.d, p0/m, z0\.d, #62
+**     ret
+*/
+TEST_UNIFORM_Z (div_maxpow_s64_x_untied, svint64_t,
+               z0 = svdiv_n_s64_x (p0, z1, MAXPOW),
+               z0 = svdiv_x (p0, z1, MAXPOW))
+
+/*
+** div_intmin_s64_x_tied1:
+**     mov     (z[0-9]+\.d), #-9223372036854775808
+**     sdiv    z0\.d, p0/m, z0\.d, \1
+**     ret
+*/
+TEST_UNIFORM_Z (div_intmin_s64_x_tied1, svint64_t,
+               z0 = svdiv_n_s64_x (p0, z0, INT64_MIN),
+               z0 = svdiv_x (p0, z0, INT64_MIN))
+
+/*
+** div_intmin_s64_x_untied:
+**     mov     z0\.d, #-9223372036854775808
+**     sdivr   z0\.d, p0/m, z0\.d, z1\.d
+**     ret
+*/
+TEST_UNIFORM_Z (div_intmin_s64_x_untied, svint64_t,
+               z0 = svdiv_n_s64_x (p0, z1, INT64_MIN),
+               z0 = svdiv_x (p0, z1, INT64_MIN))
+
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u32.c 
b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u32.c
index 232ccacf524f..9707664caf4c 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u32.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u32.c
@@ -2,6 +2,8 @@
 
 #include "test_sve_acle.h"
 
+#define MAXPOW 1<<31
+
 /*
 ** div_u32_m_tied1:
 **     udiv    z0\.s, p0/m, z0\.s, z1\.s
@@ -53,10 +55,27 @@ TEST_UNIFORM_ZX (div_w0_u32_m_untied, svuint32_t, uint32_t,
                 z0 = svdiv_n_u32_m (p0, z1, x0),
                 z0 = svdiv_m (p0, z1, x0))
 
+/*
+** div_1_u32_m_tied1:
+**     sel     z0\.s, p0, z0\.s, z0\.s
+**     ret
+*/
+TEST_UNIFORM_Z (div_1_u32_m_tied1, svuint32_t,
+               z0 = svdiv_n_u32_m (p0, z0, 1),
+               z0 = svdiv_m (p0, z0, 1))
+
+/*
+** div_1_u32_m_untied:
+**     sel     z0\.s, p0, z1\.s, z1\.s
+**     ret
+*/
+TEST_UNIFORM_Z (div_1_u32_m_untied, svuint32_t,
+               z0 = svdiv_n_u32_m (p0, z1, 1),
+               z0 = svdiv_m (p0, z1, 1))
+
 /*
 ** div_2_u32_m_tied1:
-**     mov     (z[0-9]+\.s), #2
-**     udiv    z0\.s, p0/m, z0\.s, \1
+**     lsr     z0\.s, p0/m, z0\.s, #1
 **     ret
 */
 TEST_UNIFORM_Z (div_2_u32_m_tied1, svuint32_t,
@@ -65,15 +84,54 @@ TEST_UNIFORM_Z (div_2_u32_m_tied1, svuint32_t,
 
 /*
 ** div_2_u32_m_untied:
-**     mov     (z[0-9]+\.s), #2
 **     movprfx z0, z1
-**     udiv    z0\.s, p0/m, z0\.s, \1
+**     lsr     z0\.s, p0/m, z0\.s, #1
 **     ret
 */
 TEST_UNIFORM_Z (div_2_u32_m_untied, svuint32_t,
                z0 = svdiv_n_u32_m (p0, z1, 2),
                z0 = svdiv_m (p0, z1, 2))
 
+/*
+** div_3_u32_m_tied1:
+**     mov     (z[0-9]+\.s), #3
+**     udiv    z0\.s, p0/m, z0\.s, \1
+**     ret
+*/
+TEST_UNIFORM_Z (div_3_u32_m_tied1, svuint32_t,
+               z0 = svdiv_n_u32_m (p0, z0, 3),
+               z0 = svdiv_m (p0, z0, 3))
+
+/*
+** div_3_u32_m_untied:
+**     mov     (z[0-9]+\.s), #3
+**     movprfx z0, z1
+**     udiv    z0\.s, p0/m, z0\.s, \1
+**     ret
+*/
+TEST_UNIFORM_Z (div_3_u32_m_untied, svuint32_t,
+               z0 = svdiv_n_u32_m (p0, z1, 3),
+               z0 = svdiv_m (p0, z1, 3))
+
+/*
+** div_maxpow_u32_m_tied1:
+**     lsr     z0\.s, p0/m, z0\.s, #31
+**     ret
+*/
+TEST_UNIFORM_Z (div_maxpow_u32_m_tied1, svuint32_t,
+               z0 = svdiv_n_u32_m (p0, z0, MAXPOW),
+               z0 = svdiv_m (p0, z0, MAXPOW))
+
+/*
+** div_maxpow_u32_m_untied:
+**     movprfx z0, z1
+**     lsr     z0\.s, p0/m, z0\.s, #31
+**     ret
+*/
+TEST_UNIFORM_Z (div_maxpow_u32_m_untied, svuint32_t,
+               z0 = svdiv_n_u32_m (p0, z1, MAXPOW),
+               z0 = svdiv_m (p0, z1, MAXPOW))
+
 /*
 ** div_u32_z_tied1:
 **     movprfx z0\.s, p0/z, z0\.s
@@ -137,19 +195,61 @@ TEST_UNIFORM_ZX (div_w0_u32_z_untied, svuint32_t, 
uint32_t,
                 z0 = svdiv_z (p0, z1, x0))
 
 /*
-** div_2_u32_z_tied1:
-**     mov     (z[0-9]+\.s), #2
+** div_1_u32_z_tied1:
+**     mov     (z[0-9]+\.s), #1
 **     movprfx z0\.s, p0/z, z0\.s
 **     udiv    z0\.s, p0/m, z0\.s, \1
 **     ret
 */
+TEST_UNIFORM_Z (div_1_u32_z_tied1, svuint32_t,
+               z0 = svdiv_n_u32_z (p0, z0, 1),
+               z0 = svdiv_z (p0, z0, 1))
+
+/*
+** div_1_u32_z_untied:
+**     mov     z0\.s, #1
+**     movprfx z0\.s, p0/z, z0\.s
+**     udivr   z0\.s, p0/m, z0\.s, z1\.s
+**     ret
+*/
+TEST_UNIFORM_Z (div_1_u32_z_untied, svuint32_t,
+               z0 = svdiv_n_u32_z (p0, z1, 1),
+               z0 = svdiv_z (p0, z1, 1))
+
+/*
+** div_2_u32_z_tied1:
+**     movprfx z0\.s, p0/z, z0\.s
+**     lsr     z0\.s, p0/m, z0\.s, #1
+**     ret
+*/
 TEST_UNIFORM_Z (div_2_u32_z_tied1, svuint32_t,
                z0 = svdiv_n_u32_z (p0, z0, 2),
                z0 = svdiv_z (p0, z0, 2))
 
 /*
 ** div_2_u32_z_untied:
-**     mov     (z[0-9]+\.s), #2
+**     movprfx z0\.s, p0/z, z1\.s
+**     lsr     z0\.s, p0/m, z0\.s, #1
+**     ret
+*/
+TEST_UNIFORM_Z (div_2_u32_z_untied, svuint32_t,
+               z0 = svdiv_n_u32_z (p0, z1, 2),
+               z0 = svdiv_z (p0, z1, 2))
+
+/*
+** div_3_u32_z_tied1:
+**     mov     (z[0-9]+\.s), #3
+**     movprfx z0\.s, p0/z, z0\.s
+**     udiv    z0\.s, p0/m, z0\.s, \1
+**     ret
+*/
+TEST_UNIFORM_Z (div_3_u32_z_tied1, svuint32_t,
+               z0 = svdiv_n_u32_z (p0, z0, 3),
+               z0 = svdiv_z (p0, z0, 3))
+
+/*
+** div_3_u32_z_untied:
+**     mov     (z[0-9]+\.s), #3
 ** (
 **     movprfx z0\.s, p0/z, z1\.s
 **     udiv    z0\.s, p0/m, z0\.s, \1
@@ -159,9 +259,29 @@ TEST_UNIFORM_Z (div_2_u32_z_tied1, svuint32_t,
 ** )
 **     ret
 */
-TEST_UNIFORM_Z (div_2_u32_z_untied, svuint32_t,
-               z0 = svdiv_n_u32_z (p0, z1, 2),
-               z0 = svdiv_z (p0, z1, 2))
+TEST_UNIFORM_Z (div_3_u32_z_untied, svuint32_t,
+               z0 = svdiv_n_u32_z (p0, z1, 3),
+               z0 = svdiv_z (p0, z1, 3))
+
+/*
+** div_maxpow_u32_z_tied1:
+**     movprfx z0\.s, p0/z, z0\.s
+**     lsr     z0\.s, p0/m, z0\.s, #31
+**     ret
+*/
+TEST_UNIFORM_Z (div_maxpow_u32_z_tied1, svuint32_t,
+               z0 = svdiv_n_u32_z (p0, z0, MAXPOW),
+               z0 = svdiv_z (p0, z0, MAXPOW))
+
+/*
+** div_maxpow_u32_z_untied:
+**     movprfx z0\.s, p0/z, z1\.s
+**     lsr     z0\.s, p0/m, z0\.s, #31
+**     ret
+*/
+TEST_UNIFORM_Z (div_maxpow_u32_z_untied, svuint32_t,
+               z0 = svdiv_n_u32_z (p0, z1, MAXPOW),
+               z0 = svdiv_z (p0, z1, MAXPOW))
 
 /*
 ** div_u32_x_tied1:
@@ -216,10 +336,26 @@ TEST_UNIFORM_ZX (div_w0_u32_x_untied, svuint32_t, 
uint32_t,
                 z0 = svdiv_n_u32_x (p0, z1, x0),
                 z0 = svdiv_x (p0, z1, x0))
 
+/*
+** div_1_u32_x_tied1:
+**     ret
+*/
+TEST_UNIFORM_Z (div_1_u32_x_tied1, svuint32_t,
+               z0 = svdiv_n_u32_x (p0, z0, 1),
+               z0 = svdiv_x (p0, z0, 1))
+
+/*
+** div_1_u32_x_untied:
+**     mov     z0\.d, z1\.d
+**     ret
+*/
+TEST_UNIFORM_Z (div_1_u32_x_untied, svuint32_t,
+               z0 = svdiv_n_u32_x (p0, z1, 1),
+               z0 = svdiv_x (p0, z1, 1))
+
 /*
 ** div_2_u32_x_tied1:
-**     mov     (z[0-9]+\.s), #2
-**     udiv    z0\.s, p0/m, z0\.s, \1
+**     lsr     z0\.s, z0\.s, #1
 **     ret
 */
 TEST_UNIFORM_Z (div_2_u32_x_tied1, svuint32_t,
@@ -228,10 +364,47 @@ TEST_UNIFORM_Z (div_2_u32_x_tied1, svuint32_t,
 
 /*
 ** div_2_u32_x_untied:
-**     mov     z0\.s, #2
-**     udivr   z0\.s, p0/m, z0\.s, z1\.s
+**     lsr     z0\.s, z1\.s, #1
 **     ret
 */
 TEST_UNIFORM_Z (div_2_u32_x_untied, svuint32_t,
                z0 = svdiv_n_u32_x (p0, z1, 2),
                z0 = svdiv_x (p0, z1, 2))
+
+/*
+** div_3_u32_x_tied1:
+**     mov     (z[0-9]+\.s), #3
+**     udiv    z0\.s, p0/m, z0\.s, \1
+**     ret
+*/
+TEST_UNIFORM_Z (div_3_u32_x_tied1, svuint32_t,
+               z0 = svdiv_n_u32_x (p0, z0, 3),
+               z0 = svdiv_x (p0, z0, 3))
+
+/*
+** div_3_u32_x_untied:
+**     mov     z0\.s, #3
+**     udivr   z0\.s, p0/m, z0\.s, z1\.s
+**     ret
+*/
+TEST_UNIFORM_Z (div_3_u32_x_untied, svuint32_t,
+               z0 = svdiv_n_u32_x (p0, z1, 3),
+               z0 = svdiv_x (p0, z1, 3))
+
+/*
+** div_maxpow_u32_x_tied1:
+**     lsr     z0\.s, z0\.s, #31
+**     ret
+*/
+TEST_UNIFORM_Z (div_maxpow_u32_x_tied1, svuint32_t,
+               z0 = svdiv_n_u32_x (p0, z0, MAXPOW),
+               z0 = svdiv_x (p0, z0, MAXPOW))
+
+/*
+** div_maxpow_u32_x_untied:
+**     lsr     z0\.s, z1\.s, #31
+**     ret
+*/
+TEST_UNIFORM_Z (div_maxpow_u32_x_untied, svuint32_t,
+               z0 = svdiv_n_u32_x (p0, z1, MAXPOW),
+               z0 = svdiv_x (p0, z1, MAXPOW))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u64.c 
b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u64.c
index ac7c026eea37..5247ebdac7ae 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u64.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u64.c
@@ -2,6 +2,8 @@
 
 #include "test_sve_acle.h"
 
+#define MAXPOW 1ULL<<63
+
 /*
 ** div_u64_m_tied1:
 **     udiv    z0\.d, p0/m, z0\.d, z1\.d
@@ -53,10 +55,27 @@ TEST_UNIFORM_ZX (div_x0_u64_m_untied, svuint64_t, uint64_t,
                 z0 = svdiv_n_u64_m (p0, z1, x0),
                 z0 = svdiv_m (p0, z1, x0))
 
+/*
+** div_1_u64_m_tied1:
+**     sel     z0\.d, p0, z0\.d, z0\.d
+**     ret
+*/
+TEST_UNIFORM_Z (div_1_u64_m_tied1, svuint64_t,
+               z0 = svdiv_n_u64_m (p0, z0, 1),
+               z0 = svdiv_m (p0, z0, 1))
+
+/*
+** div_1_u64_m_untied:
+**     sel     z0\.d, p0, z1\.d, z1\.d
+**     ret
+*/
+TEST_UNIFORM_Z (div_1_u64_m_untied, svuint64_t,
+               z0 = svdiv_n_u64_m (p0, z1, 1),
+               z0 = svdiv_m (p0, z1, 1))
+
 /*
 ** div_2_u64_m_tied1:
-**     mov     (z[0-9]+\.d), #2
-**     udiv    z0\.d, p0/m, z0\.d, \1
+**     lsr     z0\.d, p0/m, z0\.d, #1
 **     ret
 */
 TEST_UNIFORM_Z (div_2_u64_m_tied1, svuint64_t,
@@ -65,15 +84,54 @@ TEST_UNIFORM_Z (div_2_u64_m_tied1, svuint64_t,
 
 /*
 ** div_2_u64_m_untied:
-**     mov     (z[0-9]+\.d), #2
 **     movprfx z0, z1
-**     udiv    z0\.d, p0/m, z0\.d, \1
+**     lsr     z0\.d, p0/m, z0\.d, #1
 **     ret
 */
 TEST_UNIFORM_Z (div_2_u64_m_untied, svuint64_t,
                z0 = svdiv_n_u64_m (p0, z1, 2),
                z0 = svdiv_m (p0, z1, 2))
 
+/*
+** div_3_u64_m_tied1:
+**     mov     (z[0-9]+\.d), #3
+**     udiv    z0\.d, p0/m, z0\.d, \1
+**     ret
+*/
+TEST_UNIFORM_Z (div_3_u64_m_tied1, svuint64_t,
+               z0 = svdiv_n_u64_m (p0, z0, 3),
+               z0 = svdiv_m (p0, z0, 3))
+
+/*
+** div_3_u64_m_untied:
+**     mov     (z[0-9]+\.d), #3
+**     movprfx z0, z1
+**     udiv    z0\.d, p0/m, z0\.d, \1
+**     ret
+*/
+TEST_UNIFORM_Z (div_3_u64_m_untied, svuint64_t,
+               z0 = svdiv_n_u64_m (p0, z1, 3),
+               z0 = svdiv_m (p0, z1, 3))
+
+/*
+** div_maxpow_u64_m_tied1:
+**     lsr     z0\.d, p0/m, z0\.d, #63
+**     ret
+*/
+TEST_UNIFORM_Z (div_maxpow_u64_m_tied1, svuint64_t,
+               z0 = svdiv_n_u64_m (p0, z0, MAXPOW),
+               z0 = svdiv_m (p0, z0, MAXPOW))
+
+/*
+** div_maxpow_u64_m_untied:
+**     movprfx z0, z1
+**     lsr     z0\.d, p0/m, z0\.d, #63
+**     ret
+*/
+TEST_UNIFORM_Z (div_maxpow_u64_m_untied, svuint64_t,
+               z0 = svdiv_n_u64_m (p0, z1, MAXPOW),
+               z0 = svdiv_m (p0, z1, MAXPOW))
+
 /*
 ** div_u64_z_tied1:
 **     movprfx z0\.d, p0/z, z0\.d
@@ -137,19 +195,61 @@ TEST_UNIFORM_ZX (div_x0_u64_z_untied, svuint64_t, 
uint64_t,
                 z0 = svdiv_z (p0, z1, x0))
 
 /*
-** div_2_u64_z_tied1:
-**     mov     (z[0-9]+\.d), #2
+** div_1_u64_z_tied1:
+**     mov     (z[0-9]+\.d), #1
 **     movprfx z0\.d, p0/z, z0\.d
 **     udiv    z0\.d, p0/m, z0\.d, \1
 **     ret
 */
+TEST_UNIFORM_Z (div_1_u64_z_tied1, svuint64_t,
+               z0 = svdiv_n_u64_z (p0, z0, 1),
+               z0 = svdiv_z (p0, z0, 1))
+
+/*
+** div_1_u64_z_untied:
+**     mov     z0\.d, #1
+**     movprfx z0\.d, p0/z, z0\.d
+**     udivr   z0\.d, p0/m, z0\.d, z1\.d
+**     ret
+*/
+TEST_UNIFORM_Z (div_1_u64_z_untied, svuint64_t,
+               z0 = svdiv_n_u64_z (p0, z1, 1),
+               z0 = svdiv_z (p0, z1, 1))
+
+/*
+** div_2_u64_z_tied1:
+**     movprfx z0\.d, p0/z, z0\.d
+**     lsr     z0\.d, p0/m, z0\.d, #1
+**     ret
+*/
 TEST_UNIFORM_Z (div_2_u64_z_tied1, svuint64_t,
                z0 = svdiv_n_u64_z (p0, z0, 2),
                z0 = svdiv_z (p0, z0, 2))
 
 /*
 ** div_2_u64_z_untied:
-**     mov     (z[0-9]+\.d), #2
+**     movprfx z0\.d, p0/z, z1\.d
+**     lsr     z0\.d, p0/m, z0\.d, #1
+**     ret
+*/
+TEST_UNIFORM_Z (div_2_u64_z_untied, svuint64_t,
+               z0 = svdiv_n_u64_z (p0, z1, 2),
+               z0 = svdiv_z (p0, z1, 2))
+
+/*
+** div_3_u64_z_tied1:
+**     mov     (z[0-9]+\.d), #3
+**     movprfx z0\.d, p0/z, z0\.d
+**     udiv    z0\.d, p0/m, z0\.d, \1
+**     ret
+*/
+TEST_UNIFORM_Z (div_3_u64_z_tied1, svuint64_t,
+               z0 = svdiv_n_u64_z (p0, z0, 3),
+               z0 = svdiv_z (p0, z0, 3))
+
+/*
+** div_3_u64_z_untied:
+**     mov     (z[0-9]+\.d), #3
 ** (
 **     movprfx z0\.d, p0/z, z1\.d
 **     udiv    z0\.d, p0/m, z0\.d, \1
@@ -159,9 +259,29 @@ TEST_UNIFORM_Z (div_2_u64_z_tied1, svuint64_t,
 ** )
 **     ret
 */
-TEST_UNIFORM_Z (div_2_u64_z_untied, svuint64_t,
-               z0 = svdiv_n_u64_z (p0, z1, 2),
-               z0 = svdiv_z (p0, z1, 2))
+TEST_UNIFORM_Z (div_3_u64_z_untied, svuint64_t,
+               z0 = svdiv_n_u64_z (p0, z1, 3),
+               z0 = svdiv_z (p0, z1, 3))
+
+/*
+** div_maxpow_u64_z_tied1:
+**     movprfx z0\.d, p0/z, z0\.d
+**     lsr     z0\.d, p0/m, z0\.d, #63
+**     ret
+*/
+TEST_UNIFORM_Z (div_maxpow_u64_z_tied1, svuint64_t,
+               z0 = svdiv_n_u64_z (p0, z0, MAXPOW),
+               z0 = svdiv_z (p0, z0, MAXPOW))
+
+/*
+** div_maxpow_u64_z_untied:
+**     movprfx z0\.d, p0/z, z1\.d
+**     lsr     z0\.d, p0/m, z0\.d, #63
+**     ret
+*/
+TEST_UNIFORM_Z (div_maxpow_u64_z_untied, svuint64_t,
+               z0 = svdiv_n_u64_z (p0, z1, MAXPOW),
+               z0 = svdiv_z (p0, z1, MAXPOW))
 
 /*
 ** div_u64_x_tied1:
@@ -216,10 +336,26 @@ TEST_UNIFORM_ZX (div_x0_u64_x_untied, svuint64_t, 
uint64_t,
                 z0 = svdiv_n_u64_x (p0, z1, x0),
                 z0 = svdiv_x (p0, z1, x0))
 
+/*
+** div_1_u64_x_tied1:
+**     ret
+*/
+TEST_UNIFORM_Z (div_1_u64_x_tied1, svuint64_t,
+               z0 = svdiv_n_u64_x (p0, z0, 1),
+               z0 = svdiv_x (p0, z0, 1))
+
+/*
+** div_1_u64_x_untied:
+**     mov     z0\.d, z1\.d
+**     ret
+*/
+TEST_UNIFORM_Z (div_1_u64_x_untied, svuint64_t,
+               z0 = svdiv_n_u64_x (p0, z1, 1),
+               z0 = svdiv_x (p0, z1, 1))
+
 /*
 ** div_2_u64_x_tied1:
-**     mov     (z[0-9]+\.d), #2
-**     udiv    z0\.d, p0/m, z0\.d, \1
+**     lsr     z0\.d, z0\.d, #1
 **     ret
 */
 TEST_UNIFORM_Z (div_2_u64_x_tied1, svuint64_t,
@@ -228,10 +364,47 @@ TEST_UNIFORM_Z (div_2_u64_x_tied1, svuint64_t,
 
 /*
 ** div_2_u64_x_untied:
-**     mov     z0\.d, #2
-**     udivr   z0\.d, p0/m, z0\.d, z1\.d
+**     lsr     z0\.d, z1\.d, #1
 **     ret
 */
 TEST_UNIFORM_Z (div_2_u64_x_untied, svuint64_t,
                z0 = svdiv_n_u64_x (p0, z1, 2),
                z0 = svdiv_x (p0, z1, 2))
+
+/*
+** div_3_u64_x_tied1:
+**     mov     (z[0-9]+\.d), #3
+**     udiv    z0\.d, p0/m, z0\.d, \1
+**     ret
+*/
+TEST_UNIFORM_Z (div_3_u64_x_tied1, svuint64_t,
+               z0 = svdiv_n_u64_x (p0, z0, 3),
+               z0 = svdiv_x (p0, z0, 3))
+
+/*
+** div_3_u64_x_untied:
+**     mov     z0\.d, #3
+**     udivr   z0\.d, p0/m, z0\.d, z1\.d
+**     ret
+*/
+TEST_UNIFORM_Z (div_3_u64_x_untied, svuint64_t,
+               z0 = svdiv_n_u64_x (p0, z1, 3),
+               z0 = svdiv_x (p0, z1, 3))
+
+/*
+** div_maxpow_u64_x_tied1:
+**     lsr     z0\.d, z0\.d, #63
+**     ret
+*/
+TEST_UNIFORM_Z (div_maxpow_u64_x_tied1, svuint64_t,
+               z0 = svdiv_n_u64_x (p0, z0, MAXPOW),
+               z0 = svdiv_x (p0, z0, MAXPOW))
+
+/*
+** div_maxpow_u64_x_untied:
+**     lsr     z0\.d, z1\.d, #63
+**     ret
+*/
+TEST_UNIFORM_Z (div_maxpow_u64_x_untied, svuint64_t,
+               z0 = svdiv_n_u64_x (p0, z1, MAXPOW),
+               z0 = svdiv_x (p0, z1, MAXPOW))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/div_const_run.c 
b/gcc/testsuite/gcc.target/aarch64/sve/div_const_run.c
new file mode 100644
index 000000000000..c96bb2763dce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/div_const_run.c
@@ -0,0 +1,91 @@
+/* { dg-do run { target aarch64_sve128_hw } } */
+/* { dg-options "-O2 -msve-vector-bits=128" } */
+
+#include <arm_sve.h>
+#include <stdint.h>
+
+typedef svbool_t pred __attribute__((arm_sve_vector_bits(128)));
+typedef svfloat16_t svfloat16_ __attribute__((arm_sve_vector_bits(128)));
+typedef svfloat32_t svfloat32_ __attribute__((arm_sve_vector_bits(128)));
+typedef svfloat64_t svfloat64_ __attribute__((arm_sve_vector_bits(128)));
+typedef svint32_t svint32_ __attribute__((arm_sve_vector_bits(128)));
+typedef svint64_t svint64_ __attribute__((arm_sve_vector_bits(128)));
+typedef svuint32_t svuint32_ __attribute__((arm_sve_vector_bits(128)));
+typedef svuint64_t svuint64_ __attribute__((arm_sve_vector_bits(128)));
+
+#define F(T, TS, P, OP1, OP2)                                          \
+{                                                                      \
+  T##_t op1 = (T##_t) OP1;                                             \
+  T##_t op2 = (T##_t) OP2;                                             \
+  sv##T##_ res = svdiv_##P (pg, svdup_##TS (op1), svdup_##TS (op2));   \
+  sv##T##_ exp = svdup_##TS (op1 / op2);                               \
+  if (svptest_any (pg, svcmpne (pg, exp, res)))                                
\
+    __builtin_abort ();                                                        
\
+                                                                       \
+  sv##T##_ res_n = svdiv_##P (pg, svdup_##TS (op1), op2);              \
+  if (svptest_any (pg, svcmpne (pg, exp, res_n)))                      \
+    __builtin_abort ();                                                        
\
+}
+
+#define TEST_TYPES_1(T, TS)                                            \
+  F (T, TS, m, 79, 16)                                                 \
+  F (T, TS, z, 79, 16)                                                 \
+  F (T, TS, x, 79, 16)
+
+#define TEST_TYPES                                                     \
+  TEST_TYPES_1 (float16, f16)                                          \
+  TEST_TYPES_1 (float32, f32)                                          \
+  TEST_TYPES_1 (float64, f64)                                          \
+  TEST_TYPES_1 (int32, s32)                                            \
+  TEST_TYPES_1 (int64, s64)                                            \
+  TEST_TYPES_1 (uint32, u32)                                           \
+  TEST_TYPES_1 (uint64, u64)
+
+#define TEST_VALUES_S_1(B, OP1, OP2)                                   \
+  F (int##B, s##B, x, OP1, OP2)
+
+#define TEST_VALUES_S                                                  \
+  TEST_VALUES_S_1 (32, INT32_MIN, INT32_MIN)                           \
+  TEST_VALUES_S_1 (64, INT64_MIN, INT64_MIN)                           \
+  TEST_VALUES_S_1 (32, -7, 4)                                          \
+  TEST_VALUES_S_1 (64, -7, 4)                                          \
+  TEST_VALUES_S_1 (32, INT32_MAX, (1 << 30))                           \
+  TEST_VALUES_S_1 (64, INT64_MAX, (1ULL << 62))                                
\
+  TEST_VALUES_S_1 (32, INT32_MIN, (1 << 30))                           \
+  TEST_VALUES_S_1 (64, INT64_MIN, (1ULL << 62))                                
\
+  TEST_VALUES_S_1 (32, INT32_MAX, 1)                                   \
+  TEST_VALUES_S_1 (64, INT64_MAX, 1)                                   \
+  TEST_VALUES_S_1 (32, INT32_MIN, 16)                                  \
+  TEST_VALUES_S_1 (64, INT64_MIN, 16)                                  \
+  TEST_VALUES_S_1 (32, INT32_MAX, -5)                                  \
+  TEST_VALUES_S_1 (64, INT64_MAX, -5)                                  \
+  TEST_VALUES_S_1 (32, INT32_MIN, -4)                                  \
+  TEST_VALUES_S_1 (64, INT64_MIN, -4)
+
+#define TEST_VALUES_U_1(B, OP1, OP2)                                   \
+  F (uint##B, u##B, x, OP1, OP2)
+
+#define TEST_VALUES_U                                                  \
+  TEST_VALUES_U_1 (32, UINT32_MAX, UINT32_MAX)                         \
+  TEST_VALUES_U_1 (64, UINT64_MAX, UINT64_MAX)                         \
+  TEST_VALUES_U_1 (32, UINT32_MAX, (1 << 31))                          \
+  TEST_VALUES_U_1 (64, UINT64_MAX, (1ULL << 63))                       \
+  TEST_VALUES_U_1 (32, 7, 4)                                           \
+  TEST_VALUES_U_1 (64, 7, 4)                                           \
+  TEST_VALUES_U_1 (32, 7, 3)                                           \
+  TEST_VALUES_U_1 (64, 7, 3)                                           \
+  TEST_VALUES_U_1 (32, 11, 1)                                          \
+  TEST_VALUES_U_1 (64, 11, 1)
+
+#define TEST_VALUES                                                    \
+  TEST_VALUES_S                                                                
\
+  TEST_VALUES_U
+
+int
+main (void)
+{
+  const pred pg = svptrue_b8 ();
+  TEST_TYPES
+  TEST_VALUES
+  return 0;
+}

Reply via email to