Re: [PATCH][AArch64]Add vec_shr pattern for 64-bit vectors using ush{l,r}; enable tests.

2014-11-21 Thread Marcus Shawcroft
On 14 November 2014 15:46, Alan Lawrence alan.lawre...@arm.com wrote:

 gcc/ChangeLog:

 * config/aarch64/aarch64-simd.md (vec_shrmode): New.

 gcc/testsuite/ChangeLog:

 * lib/target-supports.exp
 (check_effective_target_whole_vector_shift): Add aarch64{,_be}.

OK /Marcus


Re: [PATCH][AArch64]Add vec_shr pattern for 64-bit vectors using ush{l,r}; enable tests.

2014-11-17 Thread Alan Lawrence

I confirm no regressions on aarch64_be-none-elf.

--Alan

Alan Lawrence wrote:

...Patch attached...

Alan Lawrence wrote:
Following recent vectorizer changes to reductions via shifts, AArch64 will now 
reduce loops such as this


unsigned char in[8] = {1, 3, 5, 7, 9, 11, 13, 15};

int
main (unsigned char argc, char **argv)
{
   unsigned char prod = 1;

   /* Prevent constant propagation of the entire loop below.  */
   asm volatile ( : : : memory);

   for (unsigned char i = 0; i  8; i++)
 prod *= in[i];

   if (prod != 17)
   __builtin_printf(Failed %d\n, prod);

   return 0;
}

using an 'ext' instruction from aarch64_expand_vec_perm_const:

main:
 adrpx0, .LANCHOR0
 moviv2.2s, 0=== note reg used here
 ldr d1, [x0, #:lo12:.LANCHOR0]
 ext v0.8b, v1.8b, v2.8b, #4
 mul v1.8b, v1.8b, v0.8b
 ext v0.8b, v1.8b, v2.8b, #2
 mul v0.8b, v1.8b, v0.8b
 ext v2.8b, v0.8b, v2.8b, #1
 mul v0.8b, v0.8b, v2.8b
 umovw1, v0.b[0]

The 'ext' works for both 64-bit vectors, and 128-bit vectors; but for 64-bit 
vectors, we can do slightly better using ushr; this patch improves the above to:


main:
 adrpx0, .LANCHOR0
 ldr d0, [x0, #:lo12:.LANCHOR0]
 ushr d1, d0, 32
 mul v0.8b, v0.8b, v1.8b
 ushr d1, d0, 16
 mul v0.8b, v0.8b, v1.8b
 ushr d1, d0, 8
 mul v0.8b, v0.8b, v1.8b
 umovw1, v0.b[0]
...

Tested with bootstrap + check-gcc on aarch64-none-linux-gnu.
Cross-testing of check-gcc on aarch64_be-none-elf in progress.

Ok if no regressions on big-endian?

Cheers,
--Alan

gcc/ChangeLog:

* config/aarch64/aarch64-simd.md (vec_shrmode): New.

gcc/testsuite/ChangeLog:

* lib/target-supports.exp
(check_effective_target_whole_vector_shift): Add aarch64{,_be}.







Re: [PATCH][AArch64]Add vec_shr pattern for 64-bit vectors using ush{l,r}; enable tests.

2014-11-14 Thread Alan Lawrence


...Patch attached...

Alan Lawrence wrote:
Following recent vectorizer changes to reductions via shifts, AArch64 will now 
reduce loops such as this


unsigned char in[8] = {1, 3, 5, 7, 9, 11, 13, 15};

int
main (unsigned char argc, char **argv)
{
   unsigned char prod = 1;

   /* Prevent constant propagation of the entire loop below.  */
   asm volatile ( : : : memory);

   for (unsigned char i = 0; i  8; i++)
 prod *= in[i];

   if (prod != 17)
   __builtin_printf(Failed %d\n, prod);

   return 0;
}

using an 'ext' instruction from aarch64_expand_vec_perm_const:

main:
 adrpx0, .LANCHOR0
 moviv2.2s, 0=== note reg used here
 ldr d1, [x0, #:lo12:.LANCHOR0]
 ext v0.8b, v1.8b, v2.8b, #4
 mul v1.8b, v1.8b, v0.8b
 ext v0.8b, v1.8b, v2.8b, #2
 mul v0.8b, v1.8b, v0.8b
 ext v2.8b, v0.8b, v2.8b, #1
 mul v0.8b, v0.8b, v2.8b
 umovw1, v0.b[0]

The 'ext' works for both 64-bit vectors, and 128-bit vectors; but for 64-bit 
vectors, we can do slightly better using ushr; this patch improves the above to:


main:
 adrpx0, .LANCHOR0
 ldr d0, [x0, #:lo12:.LANCHOR0]
 ushr d1, d0, 32
 mul v0.8b, v0.8b, v1.8b
 ushr d1, d0, 16
 mul v0.8b, v0.8b, v1.8b
 ushr d1, d0, 8
 mul v0.8b, v0.8b, v1.8b
 umovw1, v0.b[0]
...

Tested with bootstrap + check-gcc on aarch64-none-linux-gnu.
Cross-testing of check-gcc on aarch64_be-none-elf in progress.

Ok if no regressions on big-endian?

Cheers,
--Alan

gcc/ChangeLog:

* config/aarch64/aarch64-simd.md (vec_shrmode): New.

gcc/testsuite/ChangeLog:

* lib/target-supports.exp
(check_effective_target_whole_vector_shift): Add aarch64{,_be}.



diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index ef196e4b6fb39c0d2fd9ebfee76abab8369b1e92..397cb5186dd4ff000307f3b14bb4964d84c79469 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -779,6 +779,21 @@
   }
 )
 
+;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
+(define_insn vec_shr_mode
+  [(set (match_operand:VD 0 register_operand =w)
+(lshiftrt:VD (match_operand:VD 1 register_operand w)
+		 (match_operand:SI 2 immediate_operand i)))]
+  TARGET_SIMD
+  {
+if (BYTES_BIG_ENDIAN)
+  return ushl %d0, %d1, %2;
+else
+  return ushr %d0, %d1, %2;
+  }
+  [(set_attr type neon_shift_imm)]
+)
+
 (define_insn aarch64_simd_vec_setv2di
   [(set (match_operand:V2DI 0 register_operand =w,w)
 (vec_merge:V2DI
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 3361c2f9e8d98c5d1cc194617db6281127db2277..464c910777a53867110b462f121c02525d8dd140 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -3335,6 +3335,7 @@ proc check_effective_target_vect_shift { } {
 proc check_effective_target_whole_vector_shift { } {
 if { [istarget i?86-*-*] || [istarget x86_64-*-*]
 	 || [istarget ia64-*-*]
+	 || [istarget aarch64*-*-*]
 	 || ([check_effective_target_arm32]
 	  [check_effective_target_arm_little_endian])
 	 || ([istarget mips*-*-*]