Hi,
This patch adds intrinsics for vcopyq_laneq_pfsu8,16,32,64.
These are implemented in an optimal way using the vget_lane and vset_lane
intrinsics and a combine pattern.
I've added a testcase and run a full regression run for aarch64-none-elf.
OK?
Thanks,
James
---
gcc/
2013-09-13 James Greenhalgh james.greenha...@arm.com
* config/aarch64/aarch64-simd.md
(*aarch64_simd_vec_copy_lanemode): New.
(*aarch64_simd_vec_copy_lane_vswap_width_namemode): Likewise.
* config/aarch64/arm_neon.h
(vcopyq_lane_pfsu8,16,32,64): Remove asm implementations.
(vcopyq_laneq_pfsu8,16,32,64): Implement optimally.
gcc/testsuite
2013-09-13 James Greenhalgh james.greenha...@arm.com
* gcc.target/aarch64/vect_copy_lane_1.c: New.
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index f13cd5b7cdbdff95bbc378a76a6dd05de031487d..9703dd934a2f8335ffc5086e8a421db609fe0236 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -750,6 +750,54 @@ (define_insn aarch64_simd_vec_setmode
(set_attr simd_mode MODE)]
)
+(define_insn_and_split *aarch64_simd_vec_copy_lanemode
+ [(set (match_operand:VALL 0 register_operand =w)
+ (vec_merge:VALL
+ (vec_duplicate:VALL
+ (vec_select:VEL
+ (match_operand:VALL 3 register_operand w)
+ (parallel
+ [(match_operand:SI 4 immediate_operand i)])))
+ (match_operand:VALL 1 register_operand 0)
+ (match_operand:SI 2 immediate_operand i)))]
+ TARGET_SIMD
+ ins\t%0.Vetype[%p2], %3.Vetype[%4];
+ reload_completed
+REGNO (operands[0]) == REGNO (operands[3])
+(exact_log2 (INTVAL (operands[2])) == INTVAL (operands[4]))
+ [(const_int 0)]
+ {
+emit_note (NOTE_INSN_DELETED);
+DONE;
+ }
+ [(set_attr simd_type simd_ins)
+ (set_attr simd_mode MODE)]
+)
+
+(define_insn_and_split *aarch64_simd_vec_copy_lane_vswap_width_namemode
+ [(set (match_operand:VALL 0 register_operand =w)
+ (vec_merge:VALL
+ (vec_duplicate:VALL
+ (vec_select:VEL
+ (match_operand:VSWAP_WIDTH 3 register_operand w)
+ (parallel
+ [(match_operand:SI 4 immediate_operand i)])))
+ (match_operand:VALL 1 register_operand 0)
+ (match_operand:SI 2 immediate_operand i)))]
+ TARGET_SIMD
+ ins\t%0.Vetype[%p2], %3.Vetype[%4];
+ reload_completed
+REGNO (operands[0]) == REGNO (operands[3])
+(exact_log2 (INTVAL (operands[2])) == INTVAL (operands[4]))
+ [(const_int 0)]
+ {
+emit_note (NOTE_INSN_DELETED);
+DONE;
+ }
+ [(set_attr simd_type simd_ins)
+ (set_attr simd_mode MODE)]
+)
+
(define_insn aarch64_simd_lshrmode
[(set (match_operand:VDQ 0 register_operand =w)
(lshiftrt:VDQ (match_operand:VDQ 1 register_operand w)
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 6335ddf..64f8825 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -5538,162 +5538,6 @@ vcntq_u8 (uint8x16_t a)
return result;
}
-#define vcopyq_lane_f32(a, b, c, d) \
- __extension__ \
-({ \
- float32x4_t c_ = (c);\
- float32x4_t a_ = (a);\
- float32x4_t result; \
- __asm__ (ins %0.s[%2], %3.s[%4]\
-: =w(result) \
-: 0(a_), i(b), w(c_), i(d) \
-: /* No clobbers */); \
- result; \
- })
-
-#define vcopyq_lane_f64(a, b, c, d) \
- __extension__ \
-({ \
- float64x2_t c_ = (c);\
- float64x2_t a_ = (a);\
- float64x2_t result; \
- __asm__ (ins %0.d[%2], %3.d[%4]\
-: =w(result) \
-: 0(a_), i(b), w(c_), i(d) \
-: /* No clobbers */); \
- result; \
- })
-
-#define vcopyq_lane_p8(a, b, c, d) \
- __extension__ \
-({ \
- poly8x16_t c_ = (c); \
- poly8x16_t a_ = (a);