On Mon, 12 Feb 2018, Jakub Jelinek wrote: > On Sat, Feb 10, 2018 at 03:26:46PM +0100, Jakub Jelinek wrote: > > If use_exp2 is true and (cfun->curr_properties & PROP_gimple_lvec) == 0, > > don't fold it? Then I guess if we vectorize or slp vectorize the pow > > as vector pow, we'd need to match.pd it into the exp (log (vec_cst) * x). > > Here is an updated patch, that defers it for pow (0x2.0pN, x) until after > vectorization and adds tree-vect-patterns.c matcher that will handle it > during vectorization (that one using exp, because we don't have exp2 > vectorized). > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
Ok. Thanks, Richard. > 2018-02-12 Jakub Jelinek <ja...@redhat.com> > > PR middle-end/84309 > * match.pd (pow(C,x) -> exp(log(C)*x)): Optimize instead into > exp2(log2(C)*x) if C is a power of 2 and c99 runtime is available. > * generic-match-head.c (canonicalize_math_after_vectorization_p): New > inline function. > * gimple-match-head.c (canonicalize_math_after_vectorization_p): New > inline function. > * omp-simd-clone.h: New file. > * omp-simd-clone.c: Include omp-simd-clone.h. > (expand_simd_clones): No longer static. > * tree-vect-patterns.c: Include fold-const-call.h, attribs.h, > cgraph.h and omp-simd-clone.h. > (vect_recog_pow_pattern): Optimize pow(C,x) to exp(log(C)*x). > (vect_recog_widen_shift_pattern): Formatting fix. > (vect_pattern_recog_1): Don't check optab for calls. > > * gcc.dg/pr84309.c: New test. > * gcc.target/i386/pr84309.c: New test. > > --- gcc/match.pd.jj 2018-02-09 19:11:26.910070491 +0100 > +++ gcc/match.pd 2018-02-12 14:15:05.653779352 +0100 > @@ -3992,15 +3992,36 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > (logs (pows @0 @1)) > (mult @1 (logs @0)))) > > - /* pow(C,x) -> exp(log(C)*x) if C > 0. */ > + /* pow(C,x) -> exp(log(C)*x) if C > 0, > + or if C is a positive power of 2, > + pow(C,x) -> exp2(log2(C)*x). */ > (for pows (POW) > exps (EXP) > logs (LOG) > + exp2s (EXP2) > + log2s (LOG2) > (simplify > (pows REAL_CST@0 @1) > - (if (real_compare (GT_EXPR, TREE_REAL_CST_PTR (@0), &dconst0) > - && real_isfinite (TREE_REAL_CST_PTR (@0))) > - (exps (mult (logs @0) @1))))) > + (if (real_compare (GT_EXPR, TREE_REAL_CST_PTR (@0), &dconst0) > + && real_isfinite (TREE_REAL_CST_PTR (@0))) > + (with { > + const REAL_VALUE_TYPE *const value = TREE_REAL_CST_PTR (@0); > + bool use_exp2 = false; > + if (targetm.libc_has_function (function_c99_misc) > + && value->cl == rvc_normal) > + { > + REAL_VALUE_TYPE frac_rvt = *value; > + SET_REAL_EXP (&frac_rvt, 1); > + if (real_equal (&frac_rvt, &dconst1)) > + use_exp2 = true; > + } > + } > + (if (!use_exp2) > + (exps (mult (logs @0) @1)) > + /* As libmvec doesn't have a vectorized exp2, defer optimizing > + this until after vectorization. */ > + (if (canonicalize_math_after_vectorization_p ()) > + (exps (mult (logs @0) @1)))))))) > > (for sqrts (SQRT) > cbrts (CBRT) > --- gcc/generic-match-head.c.jj 2018-01-03 10:19:55.454534005 +0100 > +++ gcc/generic-match-head.c 2018-02-12 14:13:27.088784495 +0100 > @@ -68,3 +68,12 @@ canonicalize_math_p () > { > return true; > } > + > +/* Return true if math operations that are beneficial only after > + vectorization should be canonicalized. */ > + > +static inline bool > +canonicalize_math_after_vectorization_p () > +{ > + return false; > +} > --- gcc/gimple-match-head.c.jj 2018-01-03 10:19:55.931534081 +0100 > +++ gcc/gimple-match-head.c 2018-02-12 14:14:17.352781873 +0100 > @@ -831,3 +831,12 @@ canonicalize_math_p () > { > return !cfun || (cfun->curr_properties & PROP_gimple_opt_math) == 0; > } > + > +/* Return true if math operations that are beneficial only after > + vectorization should be canonicalized. */ > + > +static inline bool > +canonicalize_math_after_vectorization_p () > +{ > + return !cfun || (cfun->curr_properties & PROP_gimple_lvec) != 0; > +} > --- gcc/omp-simd-clone.h.jj 2018-02-12 18:11:01.843931808 +0100 > +++ gcc/omp-simd-clone.h 2018-02-12 18:12:13.901948041 +0100 > @@ -0,0 +1,26 @@ > +/* OMP constructs' SIMD clone supporting code. > + > + Copyright (C) 2005-2018 Free Software Foundation, Inc. > + > +This file is part of GCC. > + > +GCC is free software; you can redistribute it and/or modify it under > +the terms of the GNU General Public License as published by the Free > +Software Foundation; either version 3, or (at your option) any later > +version. > + > +GCC is distributed in the hope that it will be useful, but WITHOUT ANY > +WARRANTY; without even the implied warranty of MERCHANTABILITY or > +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License > +for more details. > + > +You should have received a copy of the GNU General Public License > +along with GCC; see the file COPYING3. If not see > +<http://www.gnu.org/licenses/>. */ > + > +#ifndef GCC_OMP_SIMD_CLONE_H > +#define GCC_OMP_SIMD_CLONE_H > + > +extern void expand_simd_clones (struct cgraph_node *); > + > +#endif /* GCC_OMP_SIMD_CLONE_H */ > --- gcc/omp-simd-clone.c.jj 2018-01-25 16:31:35.464138243 +0100 > +++ gcc/omp-simd-clone.c 2018-02-12 18:10:48.214928742 +0100 > @@ -50,6 +50,7 @@ along with GCC; see the file COPYING3. > #include "varasm.h" > #include "stringpool.h" > #include "attribs.h" > +#include "omp-simd-clone.h" > > /* Return the number of elements in vector type VECTYPE, which is associated > with a SIMD clone. At present these always have a constant length. */ > @@ -1568,7 +1569,7 @@ simd_clone_adjust (struct cgraph_node *n > /* If the function in NODE is tagged as an elemental SIMD function, > create the appropriate SIMD clones. */ > > -static void > +void > expand_simd_clones (struct cgraph_node *node) > { > tree attr = lookup_attribute ("omp declare simd", > --- gcc/tree-vect-patterns.c.jj 2018-01-23 14:48:52.783269685 +0100 > +++ gcc/tree-vect-patterns.c 2018-02-12 18:15:49.730996661 +0100 > @@ -41,6 +41,10 @@ along with GCC; see the file COPYING3. > #include "builtins.h" > #include "internal-fn.h" > #include "case-cfn-macros.h" > +#include "fold-const-call.h" > +#include "attribs.h" > +#include "cgraph.h" > +#include "omp-simd-clone.h" > > /* Pattern recognition functions */ > static gimple *vect_recog_widen_sum_pattern (vec<gimple *> *, tree *, > @@ -1049,7 +1053,7 @@ vect_recog_pow_pattern (vec<gimple *> *s > tree *type_out) > { > gimple *last_stmt = (*stmts)[0]; > - tree base, exp = NULL; > + tree base, exp; > gimple *stmt; > tree var; > > @@ -1060,17 +1064,77 @@ vect_recog_pow_pattern (vec<gimple *> *s > { > CASE_CFN_POW: > CASE_CFN_POWI: > - base = gimple_call_arg (last_stmt, 0); > - exp = gimple_call_arg (last_stmt, 1); > - if (TREE_CODE (exp) != REAL_CST > - && TREE_CODE (exp) != INTEGER_CST) > - return NULL; > break; > > default: > return NULL; > } > > + base = gimple_call_arg (last_stmt, 0); > + exp = gimple_call_arg (last_stmt, 1); > + if (TREE_CODE (exp) != REAL_CST > + && TREE_CODE (exp) != INTEGER_CST) > + { > + if (flag_unsafe_math_optimizations > + && TREE_CODE (base) == REAL_CST > + && !gimple_call_internal_p (last_stmt)) > + { > + combined_fn log_cfn; > + built_in_function exp_bfn; > + switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt))) > + { > + case BUILT_IN_POW: > + log_cfn = CFN_BUILT_IN_LOG; > + exp_bfn = BUILT_IN_EXP; > + break; > + case BUILT_IN_POWF: > + log_cfn = CFN_BUILT_IN_LOGF; > + exp_bfn = BUILT_IN_EXPF; > + break; > + case BUILT_IN_POWL: > + log_cfn = CFN_BUILT_IN_LOGL; > + exp_bfn = BUILT_IN_EXPL; > + break; > + default: > + return NULL; > + } > + tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base); > + tree exp_decl = builtin_decl_implicit (exp_bfn); > + /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd > + does that, but if C is a power of 2, we want to use > + exp2 (log2 (C) * x) in the non-vectorized version, but for > + vectorization we don't have vectorized exp2. */ > + if (logc > + && TREE_CODE (logc) == REAL_CST > + && exp_decl > + && lookup_attribute ("omp declare simd", > + DECL_ATTRIBUTES (exp_decl))) > + { > + cgraph_node *node = cgraph_node::get_create (exp_decl); > + if (node->simd_clones == NULL) > + { > + if (node->definition) > + return NULL; > + expand_simd_clones (node); > + if (node->simd_clones == NULL) > + return NULL; > + } > + stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); > + tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL); > + gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc); > + new_pattern_def_seq (stmt_vinfo, g); > + *type_in = TREE_TYPE (base); > + *type_out = NULL_TREE; > + tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL); > + g = gimple_build_call (exp_decl, 1, def); > + gimple_call_set_lhs (g, res); > + return g; > + } > + } > + > + return NULL; > + } > + > /* We now have a pow or powi builtin function call with a constant > exponent. */ > > @@ -1744,8 +1808,8 @@ vect_recog_widen_shift_pattern (vec<gimp > > /* Pattern supported. Create a stmt to be used to replace the pattern. */ > var = vect_recog_temp_ssa_var (type, NULL); > - pattern_stmt = > - gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1); > + pattern_stmt > + = gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1); > if (wstmt) > { > stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); > @@ -4439,10 +4503,6 @@ vect_pattern_recog_1 (vect_recog_func *r > } > else > { > - machine_mode vec_mode; > - enum insn_code icode; > - optab optab; > - > /* Check target support */ > type_in = get_vectype_for_scalar_type (type_in); > if (!type_in) > @@ -4456,19 +4516,18 @@ vect_pattern_recog_1 (vect_recog_func *r > pattern_vectype = type_out; > > if (is_gimple_assign (pattern_stmt)) > - code = gimple_assign_rhs_code (pattern_stmt); > - else > - { > - gcc_assert (is_gimple_call (pattern_stmt)); > - code = CALL_EXPR; > + { > + enum insn_code icode; > + code = gimple_assign_rhs_code (pattern_stmt); > + optab optab = optab_for_tree_code (code, type_in, optab_default); > + machine_mode vec_mode = TYPE_MODE (type_in); > + if (!optab > + || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing > + || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out))) > + return false; > } > - > - optab = optab_for_tree_code (code, type_in, optab_default); > - vec_mode = TYPE_MODE (type_in); > - if (!optab > - || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing > - || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out))) > - return false; > + else > + gcc_assert (is_gimple_call (pattern_stmt)); > } > > /* Found a vectorizable pattern. */ > --- gcc/testsuite/gcc.dg/pr84309.c.jj 2018-02-12 12:24:22.214522183 +0100 > +++ gcc/testsuite/gcc.dg/pr84309.c 2018-02-12 12:24:22.214522183 +0100 > @@ -0,0 +1,14 @@ > +/* PR middle-end/84309 */ > +/* { dg-do run { target c99_runtime } } */ > +/* { dg-options "-O2 -ffast-math" } */ > + > +int > +main () > +{ > + unsigned long a = 1024; > + unsigned long b = 16 * 1024; > + unsigned long c = __builtin_pow (2, (__builtin_log2 (a) + __builtin_log2 > (b)) / 2); > + if (c != 4096) > + __builtin_abort (); > + return 0; > +} > --- gcc/testsuite/gcc.target/i386/pr84309.c.jj 2018-02-12 > 18:20:15.819056596 +0100 > +++ gcc/testsuite/gcc.target/i386/pr84309.c 2018-02-12 18:21:00.462066648 > +0100 > @@ -0,0 +1,16 @@ > +/* PR middle-end/84309 */ > +/* { dg-do compile } */ > +/* { dg-options "-Ofast -mavx" } */ > + > +double pow (double, double) __attribute__((simd)); > +double exp (double) __attribute__((simd)); > +extern double a[1024], b[1024]; > + > +void > +foo (void) > +{ > + for (int i = 0; i < 1024; ++i) > + a[i] = pow (2.0, b[i]); > +} > + > +/* { dg-final { scan-assembler "_ZGVcN4v_exp" } } */ > > > Jakub > > -- Richard Biener <rguent...@suse.de> SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)