Hi,
This patch adds a pattern to support the FADDP (scalar) instruction.
Before the patch, the C code
typedef double v2df __attribute__((vector_size (16)));
double
foo (v2df x)
{
return x[1] + x[0];
}
generated:
foo:
dup d1, v0.d[0]
dup d0, v0.d[1]
fadd d0, d1, d0
ret
After patch:
foo:
faddp d0, v0.2d
ret
Bootstrapped and done regression tests on aarch64-none-linux-gnu -
no issues found.
Best wishes,
Elen
gcc/ChangeLog:
2019-04-24 Elen Kalda <elen.ka...@arm.com>
* config/aarch64/aarch64-simd.md (*aarch64_faddp<mode>): New.
gcc/testsuite/ChangeLog:
2019-04-24 Elen Kalda <elen.ka...@arm.com>
* gcc.target/aarch64/simd/scalar_faddp.c: New test.
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index e3852c5d182b70978d7603225fce55c0b8ee2894..89fedc6cb3f0c6eb74c6f8d0b21cedb5ae20a095 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -2372,6 +2372,21 @@
[(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
)
+(define_insn "*aarch64_faddp<mode>"
+ [(set (match_operand:<VEL> 0 "register_operand" "=w")
+ (plus:<VEL>
+ (vec_select:<VEL> (match_operand:VHSDF 1 "register_operand" "w")
+ (parallel[(match_operand 2 "const_int_operand" "n")]))
+ (vec_select:<VEL> (match_dup:VHSDF 1)
+ (parallel[(match_operand 3 "const_int_operand" "n")]))))]
+ "TARGET_SIMD
+ && ((INTVAL (operands[2]) == 0 && INTVAL (operands[3]) == 1)
+ || (INTVAL (operands[2]) == 1 && INTVAL (operands[3]) == 0))"
+ "faddp\t%<Vetype>0, %1.2<Vetype>"
+ [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
+)
+
+
(define_insn "aarch64_reduc_plus_internal<mode>"
[(set (match_operand:VDQV 0 "register_operand" "=w")
(unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/scalar_faddp.c b/gcc/testsuite/gcc.target/aarch64/simd/scalar_faddp.c
new file mode 100644
index 0000000000000000000000000000000000000000..2396286d483c16c8b70b16fa08bffeb15f034a93
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/scalar_faddp.c
@@ -0,0 +1,31 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_ok } */
+/* { dg-add-options arm_v8_2a_fp16_scalar } */
+/* { dg-additional-options "-save-temps -O1" } */
+/* { dg-final { scan-assembler-not "dup" } } */
+
+
+typedef double v2df __attribute__((vector_size (16)));
+typedef float v4sf __attribute__((vector_size (16)));
+typedef __fp16 v8hf __attribute__((vector_size (16)));
+
+double
+foo (v2df x)
+{
+ return x[1] + x[0];
+}
+/* { dg-final { scan-assembler-times {faddp\td[0-9]+, v[0-9]+.2d} 1 } } */
+
+float
+foo1 (v4sf x)
+{
+ return x[0] + x[1];
+}
+/* { dg-final { scan-assembler-times {faddp\ts[0-9]+, v[0-9]+.2s} 1 } } */
+
+__fp16
+foo2 (v8hf x)
+{
+ return x[0] + x[1];
+}
+/* { dg-final { scan-assembler-times {faddp\th[0-9]+, v[0-9]+.2h} 1 } } */