...Patch attached...
Alan Lawrence wrote:
Following recent vectorizer changes to reductions via shifts, AArch64 will now
reduce loops such as this
unsigned char in[8] = {1, 3, 5, 7, 9, 11, 13, 15};
int
main (unsigned char argc, char **argv)
{
unsigned char prod = 1;
/* Prevent constant propagation of the entire loop below. */
asm volatile ( : : : memory);
for (unsigned char i = 0; i 8; i++)
prod *= in[i];
if (prod != 17)
__builtin_printf(Failed %d\n, prod);
return 0;
}
using an 'ext' instruction from aarch64_expand_vec_perm_const:
main:
adrpx0, .LANCHOR0
moviv2.2s, 0=== note reg used here
ldr d1, [x0, #:lo12:.LANCHOR0]
ext v0.8b, v1.8b, v2.8b, #4
mul v1.8b, v1.8b, v0.8b
ext v0.8b, v1.8b, v2.8b, #2
mul v0.8b, v1.8b, v0.8b
ext v2.8b, v0.8b, v2.8b, #1
mul v0.8b, v0.8b, v2.8b
umovw1, v0.b[0]
The 'ext' works for both 64-bit vectors, and 128-bit vectors; but for 64-bit
vectors, we can do slightly better using ushr; this patch improves the above to:
main:
adrpx0, .LANCHOR0
ldr d0, [x0, #:lo12:.LANCHOR0]
ushr d1, d0, 32
mul v0.8b, v0.8b, v1.8b
ushr d1, d0, 16
mul v0.8b, v0.8b, v1.8b
ushr d1, d0, 8
mul v0.8b, v0.8b, v1.8b
umovw1, v0.b[0]
...
Tested with bootstrap + check-gcc on aarch64-none-linux-gnu.
Cross-testing of check-gcc on aarch64_be-none-elf in progress.
Ok if no regressions on big-endian?
Cheers,
--Alan
gcc/ChangeLog:
* config/aarch64/aarch64-simd.md (vec_shrmode): New.
gcc/testsuite/ChangeLog:
* lib/target-supports.exp
(check_effective_target_whole_vector_shift): Add aarch64{,_be}.
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index ef196e4b6fb39c0d2fd9ebfee76abab8369b1e92..397cb5186dd4ff000307f3b14bb4964d84c79469 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -779,6 +779,21 @@
}
)
+;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
+(define_insn vec_shr_mode
+ [(set (match_operand:VD 0 register_operand =w)
+(lshiftrt:VD (match_operand:VD 1 register_operand w)
+ (match_operand:SI 2 immediate_operand i)))]
+ TARGET_SIMD
+ {
+if (BYTES_BIG_ENDIAN)
+ return ushl %d0, %d1, %2;
+else
+ return ushr %d0, %d1, %2;
+ }
+ [(set_attr type neon_shift_imm)]
+)
+
(define_insn aarch64_simd_vec_setv2di
[(set (match_operand:V2DI 0 register_operand =w,w)
(vec_merge:V2DI
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 3361c2f9e8d98c5d1cc194617db6281127db2277..464c910777a53867110b462f121c02525d8dd140 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -3335,6 +3335,7 @@ proc check_effective_target_vect_shift { } {
proc check_effective_target_whole_vector_shift { } {
if { [istarget i?86-*-*] || [istarget x86_64-*-*]
|| [istarget ia64-*-*]
+ || [istarget aarch64*-*-*]
|| ([check_effective_target_arm32]
[check_effective_target_arm_little_endian])
|| ([istarget mips*-*-*]