On Wed, Mar 28, 2018 at 09:46:52AM +0200, Richard Biener wrote: > > +/* Return true if pow(cst, x) should be optimized into exp(log(cst) * x). > > */ > > + > > +static bool > > +optimize_pow_to_exp (tree arg0, tree arg1) > > +{ > > + return false; > > +} > > So instead of this simply guard the pattern with #if GIMPLE as we already > do for some. That saves in generic-match.c code size.
Ok. > > --- gcc/gimple-match-head.c.jj 2018-02-13 09:33:31.107560174 +0100 > > +++ gcc/gimple-match-head.c 2018-03-27 18:48:21.205369113 +0200 > > @@ -840,3 +840,55 @@ canonicalize_math_after_vectorization_p > > { > > return !cfun || (cfun->curr_properties & PROP_gimple_lvec) != 0; > > } > > + > > +/* Return true if pow(cst, x) should be optimized into exp(log(cst) * x). > > + As a workaround for SPEC CPU2017 628.pop2_s, don't do it if arg0 > > + is 10.0, arg1 = phi_res + cst1 and phi_res = PHI <cst2, ...> > > + where cst1 + cst2 is an exact integer, because then pow (10.0, arg1) > > + will likely be exact, while exp (log (10.0) * arg1) might be not. */ > > So this looks somewhat like a very specific SPEC hack to me. > Can we make this sligtly more generic and consider > > pow (integer, cst1 [+- cst2]) > > where the 2nd arg is an exact integer? Basically allow not only > 10 for arg0 but any real_is_integer plus allow arg1 to be directly > fed by the PHI, not only via an intermediate plus (here also consider > a minus, negate or other binary op we can fed to const_binop). Like this? I'm just handling PLUS_EXPR and MINUS_EXPR and no +/- at all. Furthermore, I've moved it to the exp replacement only, if we do exp2, it is fine to optimize it even if cst1 + cst2 is integer, it is just the exp and log that isn't exact for originally integral values. > Btw, did anybody file a defect with SPEC for this? I don't really have any SPEC experience, so will defer that to those involved with SPEC benchmarking. The fix would be to add some small epsilon to the if (chlcnc(m) .le. chlamnt .and. & chlamnt .le. chlcnc(m+1) ) then comparisons, but bet they'll argue that the upstream pop does it that way. 2018-03-28 Jakub Jelinek <ja...@redhat.com> PR tree-optimization/82004 * gimple-match-head.c (optimize_pow_to_exp): New function. * match.pd (pow(C,x) -> exp(log(C)*x)): Wrap with #if GIMPLE. Don't fold to exp if optimize_pow_to_exp is false. * gcc.dg/pr82004.c: New test. --- gcc/gimple-match-head.c.jj 2018-03-27 19:08:32.047824741 +0200 +++ gcc/gimple-match-head.c 2018-03-28 15:30:42.687565271 +0200 @@ -840,3 +840,71 @@ canonicalize_math_after_vectorization_p { return !cfun || (cfun->curr_properties & PROP_gimple_lvec) != 0; } + +/* Return true if pow(cst, x) should be optimized into exp(log(cst) * x). + As a workaround for SPEC CPU2017 628.pop2_s, don't do it if arg0 + is an exact integer, arg1 = phi_res +/- cst1 and phi_res = PHI <cst2, ...> + where cst2 +/- cst1 is an exact integer, because then pow (arg0, arg1) + will likely be exact, while exp (log (arg0) * arg1) might be not. + Also don't do it if arg1 is phi_res above and cst2 is an exact integer. */ + +static bool +optimize_pow_to_exp (tree arg0, tree arg1) +{ + gcc_assert (TREE_CODE (arg0) == REAL_CST); + if (!real_isinteger (TREE_REAL_CST_PTR (arg0), TYPE_MODE (TREE_TYPE (arg0)))) + return true; + + if (TREE_CODE (arg1) != SSA_NAME) + return true; + + gimple *def = SSA_NAME_DEF_STMT (arg1); + gphi *phi = dyn_cast <gphi *> (def); + tree cst1 = NULL_TREE; + enum tree_code code = ERROR_MARK; + if (!phi) + { + if (!is_gimple_assign (def)) + return true; + code = gimple_assign_rhs_code (def); + switch (code) + { + case PLUS_EXPR: + case MINUS_EXPR: + break; + default: + return true; + } + if (TREE_CODE (gimple_assign_rhs1 (def)) != SSA_NAME + || TREE_CODE (gimple_assign_rhs2 (def)) != REAL_CST) + return true; + + cst1 = gimple_assign_rhs2 (def); + + phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def))); + if (!phi) + return true; + } + + tree cst2 = NULL_TREE; + int n = gimple_phi_num_args (phi); + for (int i = 0; i < n; i++) + { + tree arg = PHI_ARG_DEF (phi, i); + if (TREE_CODE (arg) != REAL_CST) + continue; + else if (cst2 == NULL_TREE) + cst2 = arg; + else if (!operand_equal_p (cst2, arg, 0)) + return true; + } + + if (cst1 && cst2) + cst2 = const_binop (code, TREE_TYPE (cst2), cst2, cst1); + if (cst2 + && TREE_CODE (cst2) == REAL_CST + && real_isinteger (TREE_REAL_CST_PTR (cst2), + TYPE_MODE (TREE_TYPE (cst2)))) + return false; + return true; +} --- gcc/match.pd.jj 2018-03-27 19:08:36.336826318 +0200 +++ gcc/match.pd 2018-03-28 15:19:50.208231654 +0200 @@ -4006,6 +4006,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) /* pow(C,x) -> exp(log(C)*x) if C > 0, or if C is a positive power of 2, pow(C,x) -> exp2(log2(C)*x). */ +#if GIMPLE (for pows (POW) exps (EXP) logs (LOG) @@ -4035,8 +4036,10 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) } } (if (!use_exp2) - (exps (mult (logs @0) @1)) + (if (optimize_pow_to_exp (@0, @1)) + (exps (mult (logs @0) @1))) (exp2s (mult (log2s @0) @1))))))) +#endif /* pow(C,x)*expN(y) -> expN(logN(C)*x+y) if C > 0. */ (for pows (POW) --- gcc/testsuite/gcc.dg/pr82004.c.jj 2018-03-28 15:16:09.417118760 +0200 +++ gcc/testsuite/gcc.dg/pr82004.c 2018-03-28 15:16:09.417118760 +0200 @@ -0,0 +1,32 @@ +/* PR tree-optimization/82004 */ +/* { dg-do run } */ +/* { dg-options "-Ofast" } */ + +extern double log10 (double); +extern double pow (double, double); + +__attribute__((noipa)) void +bar (double x) +{ + if (x < 0.001) + __builtin_abort (); + asm volatile ("" : : : "memory"); +} + +int +main () +{ + double d = 0.001; + double e = 10.0; + double f = (log10 (e) - log10 (d)) / 400.0; + double g = log10 (d) - f; + volatile int q = 0; + int i; + if (__builtin_expect (q == 0, 0)) + for (i = 0; i < 400; ++i) + { + g = g + f; + bar (pow (10.0, g)); + } + return 0; +} Jakub