[PATCH][AArch64] Make use of FADDP in simple reductions

Elen Kalda Wed, 08 May 2019 06:38:10 -0700

Hi,

This patch adds a pattern to support the FADDP (scalar) instruction.


Before the patch, the C code

typedef double v2df __attribute__((vector_size (16)));

double
foo (v2df x)
{
  return x[1] + x[0];
}

generated:
foo:
        dup     d1, v0.d[0]
        dup     d0, v0.d[1]
        fadd    d0, d1, d0
        ret

After patch:
foo:
        faddp   d0, v0.2d
        ret


Bootstrapped and done regression tests on aarch64-none-linux-gnu - 
no issues found.

Best wishes,
Elen


gcc/ChangeLog:

2019-04-24  Elen Kalda  <elen.ka...@arm.com>

        * config/aarch64/aarch64-simd.md (*aarch64_faddp<mode>): New.

gcc/testsuite/ChangeLog:

2019-04-24  Elen Kalda  <elen.ka...@arm.com>

        * gcc.target/aarch64/simd/scalar_faddp.c: New test.

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index e3852c5d182b70978d7603225fce55c0b8ee2894..89fedc6cb3f0c6eb74c6f8d0b21cedb5ae20a095 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -2372,6 +2372,21 @@
   [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
 )
 
+(define_insn "*aarch64_faddp<mode>"
+  [(set (match_operand:<VEL> 0 "register_operand" "=w")
+    (plus:<VEL>
+      (vec_select:<VEL> (match_operand:VHSDF 1 "register_operand" "w")
+		(parallel[(match_operand 2 "const_int_operand" "n")]))
+      (vec_select:<VEL> (match_dup:VHSDF 1)
+		(parallel[(match_operand 3 "const_int_operand" "n")]))))]
+  "TARGET_SIMD
+  && ((INTVAL (operands[2]) == 0 && INTVAL (operands[3]) == 1)
+    || (INTVAL (operands[2]) == 1 && INTVAL (operands[3]) == 0))"
+  "faddp\t%<Vetype>0, %1.2<Vetype>"
+  [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
+)
+
+
 (define_insn "aarch64_reduc_plus_internal<mode>"
  [(set (match_operand:VDQV 0 "register_operand" "=w")
        (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/scalar_faddp.c b/gcc/testsuite/gcc.target/aarch64/simd/scalar_faddp.c
new file mode 100644
index 0000000000000000000000000000000000000000..2396286d483c16c8b70b16fa08bffeb15f034a93
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/scalar_faddp.c
@@ -0,0 +1,31 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_ok } */
+/* { dg-add-options arm_v8_2a_fp16_scalar } */
+/* { dg-additional-options "-save-temps -O1" } */
+/* { dg-final { scan-assembler-not "dup" } } */
+
+
+typedef double v2df __attribute__((vector_size (16)));
+typedef float v4sf __attribute__((vector_size (16)));
+typedef __fp16 v8hf __attribute__((vector_size (16)));
+
+double
+foo (v2df x)
+{
+  return x[1] + x[0];
+}
+/* { dg-final { scan-assembler-times {faddp\td[0-9]+, v[0-9]+.2d} 1 } } */
+
+float
+foo1 (v4sf x)
+{
+  return x[0] + x[1];
+}
+/* { dg-final { scan-assembler-times {faddp\ts[0-9]+, v[0-9]+.2s} 1 } } */
+
+__fp16
+foo2 (v8hf x)
+{
+  return x[0] + x[1];
+}
+/* { dg-final { scan-assembler-times {faddp\th[0-9]+, v[0-9]+.2h} 1 } } */

[PATCH][AArch64] Make use of FADDP in simple reductions

Reply via email to