Hi!

As mentioned in the PR, sw_absorption.fppized.f90 relies on pow in
x = log10 (something) - y;
for (...)
  {
    x = x + y;
    z = pow (10.0, x);
  }
where x + y in the first iteration is exactly -3 to be >= 0.001,
unfortunately with the pow(cst, x) -> exp (log (cst) * x) optimization
with -Ofast and -flto this returns something a few ulps smaller than that
and the benchmark fails.

In the PR I've attached quite large patch that attempts to optimize the
case using x = x * cst;, unfortunately even for -Ofast measures that
generates quite big relative errors when the loop has 400 iterations.

So, instead this simple patch just tries to detect the case where we
have on some edge pow (10.0, integer) and just doesn't attempt to optimize
it in that case to exp.  If glibc folks add an optimized exp10 eventually,
we can switch it later on to emitting exp10 instead.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2018-03-27  Jakub Jelinek  <ja...@redhat.com>

        PR tree-optimization/82004
        * generic-match-head.c (optimize_pow_to_exp): New function.
        * gimple-match-head.c (optimize_pow_to_exp): New function.
        * match.pd (pow(C,x) -> exp(log(C)*x)): Don't fold if
        optimize_pow_to_exp is false.

        * gcc.dg/pr82004.c: New test.

--- gcc/generic-match-head.c.jj 2018-02-13 09:33:31.089560180 +0100
+++ gcc/generic-match-head.c    2018-03-27 18:28:36.663913272 +0200
@@ -77,3 +77,11 @@ canonicalize_math_after_vectorization_p
 {
   return false;
 }
+
+/* Return true if pow(cst, x) should be optimized into exp(log(cst) * x).  */
+
+static bool
+optimize_pow_to_exp (tree arg0, tree arg1)
+{
+  return false;
+}
--- gcc/gimple-match-head.c.jj  2018-02-13 09:33:31.107560174 +0100
+++ gcc/gimple-match-head.c     2018-03-27 18:48:21.205369113 +0200
@@ -840,3 +840,55 @@ canonicalize_math_after_vectorization_p
 {
   return !cfun || (cfun->curr_properties & PROP_gimple_lvec) != 0;
 }
+
+/* Return true if pow(cst, x) should be optimized into exp(log(cst) * x).
+   As a workaround for SPEC CPU2017 628.pop2_s, don't do it if arg0
+   is 10.0, arg1 = phi_res + cst1 and phi_res = PHI <cst2, ...>
+   where cst1 + cst2 is an exact integer, because then pow (10.0, arg1)
+   will likely be exact, while exp (log (10.0) * arg1) might be not. */
+
+static bool
+optimize_pow_to_exp (tree arg0, tree arg1)
+{
+  gcc_assert (TREE_CODE (arg0) == REAL_CST);
+  REAL_VALUE_TYPE ten;
+  real_from_integer (&ten, TYPE_MODE (TREE_TYPE (arg0)), 10, SIGNED);
+  if (!real_identical (TREE_REAL_CST_PTR (arg0), &ten))
+    return true;
+
+  if (TREE_CODE (arg1) != SSA_NAME)
+    return true;
+
+  gimple *def = SSA_NAME_DEF_STMT (arg1);
+  if (!is_gimple_assign (def)
+      || gimple_assign_rhs_code (def) != PLUS_EXPR
+      || TREE_CODE (gimple_assign_rhs1 (def)) != SSA_NAME
+      || TREE_CODE (gimple_assign_rhs2 (def)) != REAL_CST)
+    return true;
+
+  gphi *phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def)));
+  if (!phi)
+    return true;
+
+  tree cst = NULL_TREE;
+  int n = gimple_phi_num_args (phi);
+  for (int i = 0; i < n; i++)
+    {
+      tree arg = PHI_ARG_DEF (phi, i);
+      if (TREE_CODE (arg) != REAL_CST)
+       continue;
+      else if (cst == NULL_TREE)
+       cst = arg;
+      else if (!operand_equal_p (cst, arg, 0))
+       return true;
+    }
+
+  tree cst2 = const_binop (PLUS_EXPR, TREE_TYPE (cst), cst,
+                          gimple_assign_rhs2 (def));
+  if (cst2
+      && TREE_CODE (cst2) == REAL_CST
+      && real_isinteger (TREE_REAL_CST_PTR (cst2),
+                        TYPE_MODE (TREE_TYPE (cst2))))
+    return false;
+  return true;
+}
--- gcc/match.pd.jj     2018-03-13 09:12:29.579110925 +0100
+++ gcc/match.pd        2018-03-27 18:29:38.292936995 +0200
@@ -4016,7 +4016,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
           because exp(log(C)*x), while faster, will have worse precision
           and if x folds into a constant too, that is unnecessary
           pessimization.  */
-       && canonicalize_math_after_vectorization_p ())
+       && canonicalize_math_after_vectorization_p ()
+       && optimize_pow_to_exp (@0, @1))
     (with {
        const REAL_VALUE_TYPE *const value = TREE_REAL_CST_PTR (@0);
        bool use_exp2 = false;
--- gcc/testsuite/gcc.dg/pr82004.c.jj   2018-03-27 18:02:27.135309786 +0200
+++ gcc/testsuite/gcc.dg/pr82004.c      2018-03-27 17:10:49.070052010 +0200
@@ -0,0 +1,32 @@
+/* PR tree-optimization/82004 */
+/* { dg-do run } */
+/* { dg-options "-Ofast" } */
+
+extern double log10 (double);
+extern double pow (double, double);
+
+__attribute__((noipa)) void
+bar (double x)
+{
+  if (x < 0.001)
+    __builtin_abort ();
+  asm volatile ("" : : : "memory");
+}
+
+int
+main ()
+{
+  double d = 0.001;
+  double e = 10.0;
+  double f = (log10 (e) - log10 (d)) / 400.0;
+  double g = log10 (d) - f;
+  volatile int q = 0;
+  int i;
+  if (__builtin_expect (q == 0, 0))
+    for (i = 0; i < 400; ++i)
+      {
+        g = g + f;
+        bar (pow (10.0, g));
+      }
+  return 0;  
+}

        Jakub

Reply via email to