On 25/06/12 15:59, Matthew Gretton-Dann wrote: > All, > > This patch adds vectoriser support for VFMA to the ARM Neon backend. > > Note that the VFP VFNMA and VFNMS instructions do not have Neon > equivalents. > > OK?
Sorry, no. The neon versions of FMA do not handle denormalized values, so this needs to reject vectorization unless flag_unsafe_math_optimizations is true. R. > > gcc/ChangeLog: > > 2012-06-25 Matthew Gretton-Dann <matthew.gretton-d...@arm.com> > > * config/arm/neon.md (fma<mode>4): New pattern. > (*fmsub<mode>4): Likewise. > > 2012-06-25 Matthew Gretton-Dann <matthew.gretton-d...@arm.com> > > * gcc.target/arm/neon-vfma-1.c: New testcase. > * gcc.target/arm/neon-vfms-1.c: Likewise. > * lib/target-supports.exp (add_options_for_arm_neonv2): New > function. > (check_effective_target_arm_neonv2_ok_nocache): Likewise. > (check_effective_target_arm_neonv2_ok): Likewise. > (check_effective_target_arm_neonv2_hw): Likewise. > (check_effective_target_arm_neonv2): Likewise. > > Thanks, > > Matt > > > 0002-Add-vectorizer-support-for-VFMA.txt > > > diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md > index 4568dea..4d12fb3 100644 > --- a/gcc/config/arm/neon.md > +++ b/gcc/config/arm/neon.md > @@ -711,6 +711,33 @@ > (const_string > "neon_mla_qqq_32_qqd_32_scalar")))))] > ) > > +;; Fused multiply-accumulate > +(define_insn "fma<mode>4" > + [(set (match_operand:VCVTF 0 "register_operand" "=w") > + (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w") > + (match_operand:VCVTF 2 "register_operand" "w") > + (match_operand:VCVTF 3 "register_operand" "0")))] > + "TARGET_NEON && TARGET_FMA" > + "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" > + [(set (attr "neon_type") > + (if_then_else (match_test "<Is_d_reg>") > + (const_string "neon_fp_vmla_ddd") > + (const_string "neon_fp_vmla_qqq")))] > +) > + > +(define_insn "*fmsub<mode>4" > + [(set (match_operand:VCVTF 0 "register_operand" "=w") > + (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w")) > + (match_operand:VCVTF 2 "register_operand" "w") > + (match_operand:VCVTF 3 "register_operand" "0")))] > + "TARGET_NEON && TARGET_FMA" > + "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" > + [(set (attr "neon_type") > + (if_then_else (match_test "<Is_d_reg>") > + (const_string "neon_fp_vmla_ddd") > + (const_string "neon_fp_vmla_qqq")))] > +) > + > (define_insn "ior<mode>3" > [(set (match_operand:VDQ 0 "s_register_operand" "=w,w") > (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0") > diff --git a/gcc/testsuite/gcc.target/arm/neon-vfma-1.c > b/gcc/testsuite/gcc.target/arm/neon-vfma-1.c > new file mode 100644 > index 0000000..a003a82 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/arm/neon-vfma-1.c > @@ -0,0 +1,12 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target arm_neonv2_ok } */ > +/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ > +/* { dg-add-options arm_neonv2 } */ > +/* { dg-final { scan-assembler "vfma\\.f32\[ \]+\[dDqQ]" } } */ > + > +/* Verify that VFMA is used. */ > +void f1(int n, float a, float x[], float y[]) { > + int i; > + for (i = 0; i < n; ++i) > + y[i] = a * x[i] + y[i]; > +} > diff --git a/gcc/testsuite/gcc.target/arm/neon-vfms-1.c > b/gcc/testsuite/gcc.target/arm/neon-vfms-1.c > new file mode 100644 > index 0000000..8cefd8a > --- /dev/null > +++ b/gcc/testsuite/gcc.target/arm/neon-vfms-1.c > @@ -0,0 +1,12 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target arm_neonv2_ok } */ > +/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ > +/* { dg-add-options arm_neonv2 } */ > +/* { dg-final { scan-assembler "vfms\\.f32\[ \]+\[dDqQ]" } } */ > + > +/* Verify that VFMS is used. */ > +void f1(int n, float a, float x[], float y[]) { > + int i; > + for (i = 0; i < n; ++i) > + y[i] = a * -x[i] + y[i]; > +} > diff --git a/gcc/testsuite/lib/target-supports.exp > b/gcc/testsuite/lib/target-supports.exp > index bc5baa7..9fc8a5c 100644 > --- a/gcc/testsuite/lib/target-supports.exp > +++ b/gcc/testsuite/lib/target-supports.exp > @@ -2082,6 +2082,19 @@ proc add_options_for_arm_neon { flags } { > return "$flags $et_arm_neon_flags" > } > > +# Add the options needed for NEON. We need either -mfloat-abi=softfp > +# or -mfloat-abi=hard, but if one is already specified by the > +# multilib, use it. Similarly, if a -mfpu option already enables > +# NEON, do not add -mfpu=neon. > + > +proc add_options_for_arm_neonv2 { flags } { > + if { ! [check_effective_target_arm_neonv2_ok] } { > + return "$flags" > + } > + global et_arm_neonv2_flags > + return "$flags $et_arm_neonv2_flags" > +} > + > # Return 1 if this is an ARM target supporting -mfpu=neon > # -mfloat-abi=softfp or equivalent options. Some multilibs may be > # incompatible with these options. Also set et_arm_neon_flags to the > @@ -2110,6 +2123,38 @@ proc check_effective_target_arm_neon_ok { } { > check_effective_target_arm_neon_ok_nocache] > } > > +# Return 1 if this is an ARM target supporting -mfpu=neon-vfpv4 > +# -mfloat-abi=softfp or equivalent options. Some multilibs may be > +# incompatible with these options. Also set et_arm_neonv2_flags to the > +# best options to add. > + > +proc check_effective_target_arm_neonv2_ok_nocache { } { > + global et_arm_neonv2_flags > + set et_arm_neonv2_flags "" > + if { [check_effective_target_arm32] } { > + foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon-vfpv4" > "-mfpu=neon-vfpv4 -mfloat-abi=softfp"} { > + if { [check_no_compiler_messages_nocache arm_neonv2_ok object { > + #include "arm_neon.h" > + float32x2_t > + foo (float32x2_t a, float32x2_t b, float32x2_t c) > + { > + return vfma_f32 (a, b, c); > + } > + } "$flags"] } { > + set et_arm_neonv2_flags $flags > + return 1 > + } > + } > + } > + > + return 0 > +} > + > +proc check_effective_target_arm_neonv2_ok { } { > + return [check_cached_effective_target arm_neonv2_ok \ > + check_effective_target_arm_neonv2_ok_nocache] > +} > + > # Add the options needed for NEON. We need either -mfloat-abi=softfp > # or -mfloat-abi=hard, but if one is already specified by the > # multilib, use it. > @@ -2301,6 +2346,21 @@ proc check_effective_target_arm_neon_hw { } { > } [add_options_for_arm_neon ""]] > } > > +proc check_effective_target_arm_neonv2_hw { } { > + return [check_runtime arm_neon_hwv2_available { > + #include "arm_neon.h" > + int > + main (void) > + { > + float32x2_t a, b, c; > + asm ("vfma.f32 %P0, %P1, %P2" > + : "=w" (a) > + : "w" (b), "w" (c)); > + return 0; > + } > + } [add_options_for_arm_neonv2 ""]] > +} > + > # Return 1 if this is a ARM target with NEON enabled. > > proc check_effective_target_arm_neon { } { > @@ -2317,6 +2377,24 @@ proc check_effective_target_arm_neon { } { > } > } > > +proc check_effective_target_arm_neonv2 { } { > + if { [check_effective_target_arm32] } { > + return [check_no_compiler_messages arm_neon object { > + #ifndef __ARM_NEON__ > + #error not NEON > + #else > + #ifndef __ARM_FEATURE_FMA > + #error not NEONv2 > + #else > + int dummy; > + #endif > + #endif > + }] > + } else { > + return 0 > + } > +} > + > # Return 1 if this a Loongson-2E or -2F target using an ABI that supports > # the Loongson vector modes. > >