On Thu, May 24, 2018 at 10:07 AM Richard Sandiford <
richard.sandif...@linaro.org> wrote:

> The folds in r260348 kicked in before vectorisation, which hurts
> for two reasons:

> (1) the current suboptimal handling of nothrow meant that we could
>      drop the flag early and so prevent if-conversion

> (2) some architectures provide more scalar forms than vector forms
>      (true for Advanced SIMD)

> (1) is a bug in itself that needs to be fixed eventually, but delaying
> the folds is still needed for (2).

> Tested on aarch64-linux-gnu (with and without SVE), aarch64_be-elf
> and x86_64-linux-gnu.  OK to install?

OK.

Richard.

> (Patch is mostly just reindent.)

> Richard


> 2018-05-24  Richard Sandiford  <richard.sandif...@linaro.org>

> gcc/
>          * match.pd: Delay FMA folds until after vectorization.

> gcc/testsuite/
>          * gcc.dg/vect/vect-fma-1.c: New test.

> Index: gcc/match.pd
> ===================================================================
> --- gcc/match.pd        2018-05-18 09:26:37.735714314 +0100
> +++ gcc/match.pd        2018-05-24 09:05:10.432158893 +0100
> @@ -4703,59 +4703,60 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>                        wi::to_wide (@ipos) + isize))
>       (BIT_FIELD_REF @0 @rsize @rpos)))))

> -(for fmas (FMA)
> +(if (canonicalize_math_after_vectorization_p ())
> + (for fmas (FMA)
> +  (simplify
> +   (fmas:c (negate @0) @1 @2)
> +   (IFN_FNMA @0 @1 @2))
> +  (simplify
> +   (fmas @0 @1 (negate @2))
> +   (IFN_FMS @0 @1 @2))
> +  (simplify
> +   (fmas:c (negate @0) @1 (negate @2))
> +   (IFN_FNMS @0 @1 @2))
> +  (simplify
> +   (negate (fmas@3 @0 @1 @2))
> +   (if (single_use (@3))
> +    (IFN_FNMS @0 @1 @2))))
> +
> + (simplify
> +  (IFN_FMS:c (negate @0) @1 @2)
> +  (IFN_FNMS @0 @1 @2))
>    (simplify
> -  (fmas:c (negate @0) @1 @2)
> +  (IFN_FMS @0 @1 (negate @2))
> +  (IFN_FMA @0 @1 @2))
> + (simplify
> +  (IFN_FMS:c (negate @0) @1 (negate @2))
>     (IFN_FNMA @0 @1 @2))
>    (simplify
> -  (fmas @0 @1 (negate @2))
> -  (IFN_FMS @0 @1 @2))
> +  (negate (IFN_FMS@3 @0 @1 @2))
> +   (if (single_use (@3))
> +    (IFN_FNMA @0 @1 @2)))
> +
> + (simplify
> +  (IFN_FNMA:c (negate @0) @1 @2)
> +  (IFN_FMA @0 @1 @2))
>    (simplify
> -  (fmas:c (negate @0) @1 (negate @2))
> +  (IFN_FNMA @0 @1 (negate @2))
>     (IFN_FNMS @0 @1 @2))
>    (simplify
> -  (negate (fmas@3 @0 @1 @2))
> +  (IFN_FNMA:c (negate @0) @1 (negate @2))
> +  (IFN_FMS @0 @1 @2))
> + (simplify
> +  (negate (IFN_FNMA@3 @0 @1 @2))
>     (if (single_use (@3))
> -   (IFN_FNMS @0 @1 @2))))
> +   (IFN_FMS @0 @1 @2)))

> -(simplify
> - (IFN_FMS:c (negate @0) @1 @2)
> - (IFN_FNMS @0 @1 @2))
> -(simplify
> - (IFN_FMS @0 @1 (negate @2))
> - (IFN_FMA @0 @1 @2))
> -(simplify
> - (IFN_FMS:c (negate @0) @1 (negate @2))
> - (IFN_FNMA @0 @1 @2))
> -(simplify
> - (negate (IFN_FMS@3 @0 @1 @2))
> + (simplify
> +  (IFN_FNMS:c (negate @0) @1 @2)
> +  (IFN_FMS @0 @1 @2))
> + (simplify
> +  (IFN_FNMS @0 @1 (negate @2))
> +  (IFN_FNMA @0 @1 @2))
> + (simplify
> +  (IFN_FNMS:c (negate @0) @1 (negate @2))
> +  (IFN_FMA @0 @1 @2))
> + (simplify
> +  (negate (IFN_FNMS@3 @0 @1 @2))
>     (if (single_use (@3))
> -   (IFN_FNMA @0 @1 @2)))
> -
> -(simplify
> - (IFN_FNMA:c (negate @0) @1 @2)
> - (IFN_FMA @0 @1 @2))
> -(simplify
> - (IFN_FNMA @0 @1 (negate @2))
> - (IFN_FNMS @0 @1 @2))
> -(simplify
> - (IFN_FNMA:c (negate @0) @1 (negate @2))
> - (IFN_FMS @0 @1 @2))
> -(simplify
> - (negate (IFN_FNMA@3 @0 @1 @2))
> - (if (single_use (@3))
> -  (IFN_FMS @0 @1 @2)))
> -
> -(simplify
> - (IFN_FNMS:c (negate @0) @1 @2)
> - (IFN_FMS @0 @1 @2))
> -(simplify
> - (IFN_FNMS @0 @1 (negate @2))
> - (IFN_FNMA @0 @1 @2))
> -(simplify
> - (IFN_FNMS:c (negate @0) @1 (negate @2))
> - (IFN_FMA @0 @1 @2))
> -(simplify
> - (negate (IFN_FNMS@3 @0 @1 @2))
> - (if (single_use (@3))
> -  (IFN_FMA @0 @1 @2)))
> +   (IFN_FMA @0 @1 @2))))
> Index: gcc/testsuite/gcc.dg/vect/vect-fma-1.c
> ===================================================================
> --- /dev/null   2018-04-20 16:19:46.369131350 +0100
> +++ gcc/testsuite/gcc.dg/vect/vect-fma-1.c      2018-05-24
09:05:10.432158893 +0100
> @@ -0,0 +1,58 @@
> +/* { dg-require-effective-target scalar_all_fma } */
> +
> +#include "tree-vect.h"
> +
> +#define N (VECTOR_BITS * 11 / 64 + 3)
> +
> +#define DEF(INV)                                       \
> +  void __attribute__ ((noipa))                         \
> +  f_##INV (double *restrict a, double *restrict b,     \
> +          double *restrict c, double *restrict d)      \
> +  {                                                    \
> +    for (int i = 0; i < N; ++i)                                \
> +      {                                                        \
> +       double mb = (INV & 1 ? -b[i] : b[i]);           \
> +       double mc = c[i];                               \
> +       double md = (INV & 2 ? -d[i] : d[i]);           \
> +       double fma = __builtin_fma (mb, mc, md);        \
> +       a[i] = (INV & 4 ? -fma : fma);                  \
> +      }                                                        \
> +  }
> +
> +#define TEST(INV)                                      \
> +  {                                                    \
> +    f_##INV (a, b, c, d);                              \
> +    for (int i = 0; i < N; ++i)                                \
> +      {                                                        \
> +       double mb = (INV & 1 ? -b[i] : b[i]);           \
> +       double mc = c[i];                               \
> +       double md = (INV & 2 ? -d[i] : d[i]);           \
> +       double fma = __builtin_fma (mb, mc, md);        \
> +       double expected = (INV & 4 ? -fma : fma);       \
> +       if (a[i] != expected)                           \
> +         __builtin_abort ();                           \
> +       asm volatile ("" ::: "memory");                 \
> +      }                                                        \
> +  }
> +
> +#define FOR_EACH_INV(T)        \
> +  T (0) T (1) T (2) T (3) T (4) T (5) T (6) T (7)
> +
> +FOR_EACH_INV (DEF)
> +
> +int
> +main (void)
> +{
> +  double a[N], b[N], c[N], d[N];
> +  for (int i = 0; i < N; ++i)
> +    {
> +      b[i] = i % 17;
> +      c[i] = i % 9 + 11;
> +      d[i] = i % 13 + 14;
> +      asm volatile ("" ::: "memory");
> +    }
> +  FOR_EACH_INV (TEST)
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 8 "vect" { target
vect_double } } } */

Reply via email to