Re: [AArch64] Implement vcopy intrinsics.

2013-09-16 Thread Marcus Shawcroft

On 13/09/13 19:39, James Greenhalgh wrote:


Hi,

This patch adds intrinsics for vcopyq_laneq_pfsu8,16,32,64.

These are implemented in an optimal way using the vget_lane and vset_lane
intrinsics and a combine pattern.

I've added a testcase and run a full regression run for aarch64-none-elf.

OK?

Thanks,
James


OK
/Marcus




[AArch64] Implement vcopy intrinsics.

2013-09-13 Thread James Greenhalgh

Hi,

This patch adds intrinsics for vcopyq_laneq_pfsu8,16,32,64.

These are implemented in an optimal way using the vget_lane and vset_lane
intrinsics and a combine pattern.

I've added a testcase and run a full regression run for aarch64-none-elf.

OK?

Thanks,
James

---
gcc/

2013-09-13  James Greenhalgh  james.greenha...@arm.com

* config/aarch64/aarch64-simd.md
(*aarch64_simd_vec_copy_lanemode): New.
(*aarch64_simd_vec_copy_lane_vswap_width_namemode): Likewise.
* config/aarch64/arm_neon.h
(vcopyq_lane_pfsu8,16,32,64): Remove asm implementations.
(vcopyq_laneq_pfsu8,16,32,64): Implement optimally.

gcc/testsuite

2013-09-13  James Greenhalgh  james.greenha...@arm.com

* gcc.target/aarch64/vect_copy_lane_1.c: New.
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index f13cd5b7cdbdff95bbc378a76a6dd05de031487d..9703dd934a2f8335ffc5086e8a421db609fe0236 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -750,6 +750,54 @@ (define_insn aarch64_simd_vec_setmode
(set_attr simd_mode MODE)]
 )
 
+(define_insn_and_split *aarch64_simd_vec_copy_lanemode
+  [(set (match_operand:VALL 0 register_operand =w)
+	(vec_merge:VALL
+	(vec_duplicate:VALL
+	  (vec_select:VEL
+		(match_operand:VALL 3 register_operand w)
+		(parallel
+		  [(match_operand:SI 4 immediate_operand i)])))
+	(match_operand:VALL 1 register_operand 0)
+	(match_operand:SI 2 immediate_operand i)))]
+  TARGET_SIMD
+  ins\t%0.Vetype[%p2], %3.Vetype[%4];
+  reload_completed
+REGNO (operands[0]) == REGNO (operands[3])
+(exact_log2 (INTVAL (operands[2])) == INTVAL (operands[4]))
+  [(const_int 0)]
+  {
+emit_note (NOTE_INSN_DELETED);
+DONE;
+  }
+  [(set_attr simd_type simd_ins)
+   (set_attr simd_mode MODE)]
+)
+
+(define_insn_and_split *aarch64_simd_vec_copy_lane_vswap_width_namemode
+  [(set (match_operand:VALL 0 register_operand =w)
+	(vec_merge:VALL
+	(vec_duplicate:VALL
+	  (vec_select:VEL
+		(match_operand:VSWAP_WIDTH 3 register_operand w)
+		(parallel
+		  [(match_operand:SI 4 immediate_operand i)])))
+	(match_operand:VALL 1 register_operand 0)
+	(match_operand:SI 2 immediate_operand i)))]
+  TARGET_SIMD
+  ins\t%0.Vetype[%p2], %3.Vetype[%4];
+  reload_completed
+REGNO (operands[0]) == REGNO (operands[3])
+(exact_log2 (INTVAL (operands[2])) == INTVAL (operands[4]))
+  [(const_int 0)]
+  {
+emit_note (NOTE_INSN_DELETED);
+DONE;
+  }
+  [(set_attr simd_type simd_ins)
+   (set_attr simd_mode MODE)]
+)
+
 (define_insn aarch64_simd_lshrmode
  [(set (match_operand:VDQ 0 register_operand =w)
(lshiftrt:VDQ (match_operand:VDQ 1 register_operand w)
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 6335ddf..64f8825 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -5538,162 +5538,6 @@ vcntq_u8 (uint8x16_t a)
   return result;
 }
 
-#define vcopyq_lane_f32(a, b, c, d) \
-  __extension__ \
-({  \
-   float32x4_t c_ = (c);\
-   float32x4_t a_ = (a);\
-   float32x4_t result;  \
-   __asm__ (ins %0.s[%2], %3.s[%4]\
-: =w(result)  \
-: 0(a_), i(b), w(c_), i(d)  \
-: /* No clobbers */);   \
-   result;  \
- })
-
-#define vcopyq_lane_f64(a, b, c, d) \
-  __extension__ \
-({  \
-   float64x2_t c_ = (c);\
-   float64x2_t a_ = (a);\
-   float64x2_t result;  \
-   __asm__ (ins %0.d[%2], %3.d[%4]\
-: =w(result)  \
-: 0(a_), i(b), w(c_), i(d)  \
-: /* No clobbers */);   \
-   result;  \
- })
-
-#define vcopyq_lane_p8(a, b, c, d)  \
-  __extension__ \
-({  \
-   poly8x16_t c_ = (c); \
-   poly8x16_t a_ = (a);