The ASRD instruction on SVE performs an arithmetic shift right by an immediate
for divide.
This patch enables the use of ASRD with Neon modes.
For example:
int in[N], out[N];
void
foo (void)
{
for (int i = 0; i < N; i++)
out[i] = in[i] / 4;
}
compiles to:
ldr q31, [x1, x0]
cmlt v30.16b, v31.16b, #0
and z30.b, z30.b, 3
add v30.16b, v30.16b, v31.16b
sshr v30.16b, v30.16b, 2
str q30, [x0, x2]
add x0, x0, 16
cmp x0, 1024
but can just be:
ldp q30, q31, [x0], 32
asrd z31.b, p7/m, z31.b, #2
asrd z30.b, p7/m, z30.b, #2
stp q30, q31, [x1], 32
cmp x0, x2
The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression.
OK for mainline?
Signed-off-by: Soumya AR <[email protected]>
gcc/ChangeLog:
* config/aarch64/aarch64-sve.md: Extended sdiv_pow2<mode>3 and
*sdiv_pow2<mode>3 to support Neon modes.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/sve/sve-asrd.c: New test.
---
gcc/config/aarch64/aarch64-sve.md | 25 ++++-----
.../gcc.target/aarch64/sve/sve-asrd.c | 54 +++++++++++++++++++
2 files changed, 67 insertions(+), 12 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/sve-asrd.c
diff --git a/gcc/config/aarch64/aarch64-sve.md
b/gcc/config/aarch64/aarch64-sve.md
index affdb24a93d..96effe4abed 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -4972,34 +4972,35 @@
;; Unpredicated ASRD.
(define_expand "sdiv_pow2<mode>3"
- [(set (match_operand:SVE_I 0 "register_operand")
- (unspec:SVE_I
+ [(set (match_operand:SVE_VDQ_I 0 "register_operand")
+ (unspec:SVE_VDQ_I
[(match_dup 3)
- (unspec:SVE_I
- [(match_operand:SVE_I 1 "register_operand")
+ (unspec:SVE_VDQ_I
+ [(match_operand:SVE_VDQ_I 1 "register_operand")
(match_operand 2 "aarch64_simd_rshift_imm")]
UNSPEC_ASRD)]
UNSPEC_PRED_X))]
"TARGET_SVE"
{
- operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+ operands[3] = aarch64_ptrue_reg (<VPRED>mode,
+ GET_MODE_UNIT_SIZE (<MODE>mode));
}
)
;; Predicated ASRD.
(define_insn "*sdiv_pow2<mode>3"
- [(set (match_operand:SVE_I 0 "register_operand")
- (unspec:SVE_I
+ [(set (match_operand:SVE_VDQ_I 0 "register_operand")
+ (unspec:SVE_VDQ_I
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_I
- [(match_operand:SVE_I 2 "register_operand")
- (match_operand:SVE_I 3 "aarch64_simd_rshift_imm")]
+ (unspec:SVE_VDQ_I
+ [(match_operand:SVE_VDQ_I 2 "register_operand")
+ (match_operand:SVE_VDQ_I 3 "aarch64_simd_rshift_imm")]
UNSPEC_ASRD)]
UNSPEC_PRED_X))]
"TARGET_SVE"
{@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
- [ w , Upl , 0 ; * ] asrd\t%0.<Vetype>, %1/m,
%0.<Vetype>, #%3
- [ ?&w , Upl , w ; yes ] movprfx\t%0,
%2\;asrd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+ [ w , Upl , 0 ; * ] asrd\t%Z0.<Vetype>, %1/m,
%Z0.<Vetype>, #%3
+ [ ?&w , Upl , w ; yes ] movprfx\t%Z0,
%Z2\;asrd\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, #%3
}
)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/sve-asrd.c
b/gcc/testsuite/gcc.target/aarch64/sve/sve-asrd.c
new file mode 100644
index 00000000000..00aa8b2380d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/sve-asrd.c
@@ -0,0 +1,54 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast --param aarch64-autovec-preference=asimd-only" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include <stdint.h>
+#define N 1024
+
+#define FUNC(M) \
+M in_##M[N]; \
+M out_##M[N]; \
+void asrd_##M() { \
+ for (int i = 0; i < N; i++) \
+ out_##M[i] = in_##M[i] / 4; \
+}
+
+/*
+** asrd_int8_t:
+** ...
+** ptrue (p[0-7]).b, vl1
+** ...
+** asrd z[0-9]+\.b, \1/m, z[0-9]+\.b, #2
+** ...
+*/
+FUNC(int8_t)
+
+/*
+** asrd_int16_t:
+** ...
+** ptrue (p[0-7]).b, vl2
+** ...
+** asrd z[0-9]+\.h, \1/m, z[0-9]+\.h, #2
+** ...
+*/
+FUNC(int16_t)
+
+/*
+** asrd_int32_t:
+** ...
+** ptrue (p[0-7]).b, vl4
+** ...
+** asrd z[0-9]+\.s, \1/m, z[0-9]+\.s, #2
+** ...
+*/
+FUNC(int32_t)
+
+/*
+** asrd_int64_t:
+** ...
+** ptrue (p[0-7]).b, vl8
+** ...
+** asrd z[0-9]+\.d, \1/m, z[0-9]+\.d, #2
+** ...
+*/
+FUNC(int64_t)
--
2.43.2