Re: [AArch64] Logical vector shift right conformance

2014-03-24 Thread James Greenhalgh
On Thu, Mar 20, 2014 at 12:59:27PM +, Marcus Shawcroft wrote:
 On 25 February 2014 11:58, Alex Velenko alex.vele...@arm.com wrote:
  Hi,
 
  This patch fixes a bug in vshr_n_u64 and vshrd_n_u64 intrinsic
  behavior in case of shift by 64. Shift by 64 is strictly defined in ACLE to
  use ushr instruction intended by those intrinsics.
 
  The testcase provided also tests the behavior for intrinsics mentioned
  above with values other then 64. Besides, the test checks that an illeagal
  ushr shift by 0 is not generated, expecting the test to compile and run
  correctly generating instructions other than ushr.
 
  The patch was tested for LE and BE with no regressions.
 
  Is given patch ok for stage-4?
 
 I think this is OK for stage-4, but leave 24h before committing to
 allow the RM's opportunity to object / comment.
 

I've committed this on Alex' behalf as revision 208789.

Thanks,
James



Re: [AArch64] Logical vector shift right conformance

2014-03-20 Thread Marcus Shawcroft
On 25 February 2014 11:58, Alex Velenko alex.vele...@arm.com wrote:
 Hi,

 This patch fixes a bug in vshr_n_u64 and vshrd_n_u64 intrinsic
 behavior in case of shift by 64. Shift by 64 is strictly defined in ACLE to
 use ushr instruction intended by those intrinsics.

 The testcase provided also tests the behavior for intrinsics mentioned
 above with values other then 64. Besides, the test checks that an illeagal
 ushr shift by 0 is not generated, expecting the test to compile and run
 correctly generating instructions other than ushr.

 The patch was tested for LE and BE with no regressions.

 Is given patch ok for stage-4?

I think this is OK for stage-4, but leave 24h before committing to
allow the RM's opportunity to object / comment.

Cheers
/Marcus


[AArch64] Logical vector shift right conformance

2014-02-25 Thread Alex Velenko

Hi,

This patch fixes a bug in vshr_n_u64 and vshrd_n_u64 intrinsic
behavior in case of shift by 64. Shift by 64 is strictly defined in ACLE 
to use ushr instruction intended by those intrinsics.


The testcase provided also tests the behavior for intrinsics mentioned
above with values other then 64. Besides, the test checks that an 
illeagal ushr shift by 0 is not generated, expecting the test to compile 
and run correctly generating instructions other than ushr.


The patch was tested for LE and BE with no regressions.

Is given patch ok for stage-4?

Thanks,
Alex

gcc/

2014-02-25  Alex Velenko  alex.vele...@arm.com

* config/aarch64/aarch64-simd-builtins.def (lshr): DI mode excluded.
(lshr_simd): DI mode added.
* config/aarch64/aarch64-simd.md (aarch64_lshr_simddi): New pattern.
(aarch64_ushr_simddi): Likewise.
* config/aarch64/aarch64.md (UNSPEC_USHR64): New unspec.
* config/aarch64/arm_neon.h (vshr_n_u64): Intrinsic fixed.
(vshrd_n_u64): Likewise.

gcc/testsuite/

2014-02-25  Alex Velenko  alex.vele...@arm.com

* gcc.target/aarch64/ushr64_1.c: New testcase.
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index ebab2ce8347a4425977c5cbd0f285c3ff1d9f2f1..ac5522cac00e6dd8a808ac3c68b4fa8cc15d9120 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -183,6 +183,10 @@ aarch64_types_getlane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
 #define TYPES_GETLANE (aarch64_types_getlane_qualifiers)
 #define TYPES_SHIFTIMM (aarch64_types_getlane_qualifiers)
 static enum aarch64_type_qualifiers
+aarch64_types_unsigned_shift_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate };
+#define TYPES_USHIFTIMM (aarch64_types_unsigned_shift_qualifiers)
+static enum aarch64_type_qualifiers
 aarch64_types_setlane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate };
 #define TYPES_SETLANE (aarch64_types_setlane_qualifiers)
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index e5f71b479ccfd1a9cbf84aed0f96b49762053f59..c9b7570e565979cb454d594c84e625380419d0e6 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -192,7 +192,8 @@
 
   BUILTIN_VDQ_I (SHIFTIMM, ashr, 3)
   VAR1 (SHIFTIMM, ashr_simd, 0, di)
-  BUILTIN_VSDQ_I_DI (SHIFTIMM, lshr, 3)
+  BUILTIN_VDQ_I (SHIFTIMM, lshr, 3)
+  VAR1 (USHIFTIMM, lshr_simd, 0, di)
   /* Implemented by aarch64_surshr_nmode.  */
   BUILTIN_VSDQ_I_DI (SHIFTIMM, srshr_n, 0)
   BUILTIN_VSDQ_I_DI (SHIFTIMM, urshr_n, 0)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 4dffb59e856aeaafb79007255d3b91a73ef1ef13..6048d605c72e6a43b9a004a8bc89dbfa89f3ed5b 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -724,6 +724,31 @@
   DONE;
 })
 
+(define_expand aarch64_lshr_simddi
+  [(match_operand:DI 0 register_operand =w)
+   (match_operand:DI 1 register_operand w)
+   (match_operand:SI 2 aarch64_shift_imm64_di )]
+  TARGET_SIMD
+  {
+if (INTVAL (operands[2]) == 64)
+  emit_insn (gen_aarch64_ushr_simddi (operands[0], operands[1]));
+else
+  emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
+DONE;
+  }
+)
+
+;; SIMD shift by 64.  This pattern is a special case as standard pattern does
+;; not handle NEON shifts by 64.
+(define_insn aarch64_ushr_simddi
+  [(set (match_operand:DI 0 register_operand =w)
+(unspec:DI
+  [(match_operand:DI 1 register_operand w)] UNSPEC_USHR64))]
+  TARGET_SIMD
+  ushr\t%d0, %d1, 64
+  [(set_attr type neon_shift_imm)]
+)
+
 (define_expand vec_setmode
   [(match_operand:VQ_S 0 register_operand)
(match_operand:VEL 1 register_operand)
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 99a6ac8fcbdcd24a0ea18cc037bef9cf72070281..c86a29d8e7f8df21f25e14d22df1c3e8c37c907f 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -101,6 +101,7 @@
 UNSPEC_TLS
 UNSPEC_TLSDESC
 UNSPEC_USHL_2S
+UNSPEC_USHR64
 UNSPEC_VSTRUCTDUMMY
 ])
 
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 6af99361b8e265f66026dc506cfc23f044d153b4..612b899f31584378844f1b82353e8d1dd3d5ec61 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -23364,7 +23364,7 @@ vshr_n_u32 (uint32x2_t __a, const int __b)
 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
 vshr_n_u64 (uint64x1_t __a, const int __b)
 {
-  return (uint64x1_t) __builtin_aarch64_lshrdi ((int64x1_t) __a, __b);
+  return __builtin_aarch64_lshr_simddi_uus ( __a, __b);
 }
 
 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
@@ -23421,10 +23421,10 @@ vshrd_n_s64 (int64x1_t __a, const int