On Sat, Feb 10, 2018 at 03:26:46PM +0100, Jakub Jelinek wrote: > If use_exp2 is true and (cfun->curr_properties & PROP_gimple_lvec) == 0, > don't fold it? Then I guess if we vectorize or slp vectorize the pow > as vector pow, we'd need to match.pd it into the exp (log (vec_cst) * x).
Here is an updated patch, that defers it for pow (0x2.0pN, x) until after vectorization and adds tree-vect-patterns.c matcher that will handle it during vectorization (that one using exp, because we don't have exp2 vectorized). Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2018-02-12 Jakub Jelinek <ja...@redhat.com> PR middle-end/84309 * match.pd (pow(C,x) -> exp(log(C)*x)): Optimize instead into exp2(log2(C)*x) if C is a power of 2 and c99 runtime is available. * generic-match-head.c (canonicalize_math_after_vectorization_p): New inline function. * gimple-match-head.c (canonicalize_math_after_vectorization_p): New inline function. * omp-simd-clone.h: New file. * omp-simd-clone.c: Include omp-simd-clone.h. (expand_simd_clones): No longer static. * tree-vect-patterns.c: Include fold-const-call.h, attribs.h, cgraph.h and omp-simd-clone.h. (vect_recog_pow_pattern): Optimize pow(C,x) to exp(log(C)*x). (vect_recog_widen_shift_pattern): Formatting fix. (vect_pattern_recog_1): Don't check optab for calls. * gcc.dg/pr84309.c: New test. * gcc.target/i386/pr84309.c: New test. --- gcc/match.pd.jj 2018-02-09 19:11:26.910070491 +0100 +++ gcc/match.pd 2018-02-12 14:15:05.653779352 +0100 @@ -3992,15 +3992,36 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (logs (pows @0 @1)) (mult @1 (logs @0)))) - /* pow(C,x) -> exp(log(C)*x) if C > 0. */ + /* pow(C,x) -> exp(log(C)*x) if C > 0, + or if C is a positive power of 2, + pow(C,x) -> exp2(log2(C)*x). */ (for pows (POW) exps (EXP) logs (LOG) + exp2s (EXP2) + log2s (LOG2) (simplify (pows REAL_CST@0 @1) - (if (real_compare (GT_EXPR, TREE_REAL_CST_PTR (@0), &dconst0) - && real_isfinite (TREE_REAL_CST_PTR (@0))) - (exps (mult (logs @0) @1))))) + (if (real_compare (GT_EXPR, TREE_REAL_CST_PTR (@0), &dconst0) + && real_isfinite (TREE_REAL_CST_PTR (@0))) + (with { + const REAL_VALUE_TYPE *const value = TREE_REAL_CST_PTR (@0); + bool use_exp2 = false; + if (targetm.libc_has_function (function_c99_misc) + && value->cl == rvc_normal) + { + REAL_VALUE_TYPE frac_rvt = *value; + SET_REAL_EXP (&frac_rvt, 1); + if (real_equal (&frac_rvt, &dconst1)) + use_exp2 = true; + } + } + (if (!use_exp2) + (exps (mult (logs @0) @1)) + /* As libmvec doesn't have a vectorized exp2, defer optimizing + this until after vectorization. */ + (if (canonicalize_math_after_vectorization_p ()) + (exps (mult (logs @0) @1)))))))) (for sqrts (SQRT) cbrts (CBRT) --- gcc/generic-match-head.c.jj 2018-01-03 10:19:55.454534005 +0100 +++ gcc/generic-match-head.c 2018-02-12 14:13:27.088784495 +0100 @@ -68,3 +68,12 @@ canonicalize_math_p () { return true; } + +/* Return true if math operations that are beneficial only after + vectorization should be canonicalized. */ + +static inline bool +canonicalize_math_after_vectorization_p () +{ + return false; +} --- gcc/gimple-match-head.c.jj 2018-01-03 10:19:55.931534081 +0100 +++ gcc/gimple-match-head.c 2018-02-12 14:14:17.352781873 +0100 @@ -831,3 +831,12 @@ canonicalize_math_p () { return !cfun || (cfun->curr_properties & PROP_gimple_opt_math) == 0; } + +/* Return true if math operations that are beneficial only after + vectorization should be canonicalized. */ + +static inline bool +canonicalize_math_after_vectorization_p () +{ + return !cfun || (cfun->curr_properties & PROP_gimple_lvec) != 0; +} --- gcc/omp-simd-clone.h.jj 2018-02-12 18:11:01.843931808 +0100 +++ gcc/omp-simd-clone.h 2018-02-12 18:12:13.901948041 +0100 @@ -0,0 +1,26 @@ +/* OMP constructs' SIMD clone supporting code. + + Copyright (C) 2005-2018 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#ifndef GCC_OMP_SIMD_CLONE_H +#define GCC_OMP_SIMD_CLONE_H + +extern void expand_simd_clones (struct cgraph_node *); + +#endif /* GCC_OMP_SIMD_CLONE_H */ --- gcc/omp-simd-clone.c.jj 2018-01-25 16:31:35.464138243 +0100 +++ gcc/omp-simd-clone.c 2018-02-12 18:10:48.214928742 +0100 @@ -50,6 +50,7 @@ along with GCC; see the file COPYING3. #include "varasm.h" #include "stringpool.h" #include "attribs.h" +#include "omp-simd-clone.h" /* Return the number of elements in vector type VECTYPE, which is associated with a SIMD clone. At present these always have a constant length. */ @@ -1568,7 +1569,7 @@ simd_clone_adjust (struct cgraph_node *n /* If the function in NODE is tagged as an elemental SIMD function, create the appropriate SIMD clones. */ -static void +void expand_simd_clones (struct cgraph_node *node) { tree attr = lookup_attribute ("omp declare simd", --- gcc/tree-vect-patterns.c.jj 2018-01-23 14:48:52.783269685 +0100 +++ gcc/tree-vect-patterns.c 2018-02-12 18:15:49.730996661 +0100 @@ -41,6 +41,10 @@ along with GCC; see the file COPYING3. #include "builtins.h" #include "internal-fn.h" #include "case-cfn-macros.h" +#include "fold-const-call.h" +#include "attribs.h" +#include "cgraph.h" +#include "omp-simd-clone.h" /* Pattern recognition functions */ static gimple *vect_recog_widen_sum_pattern (vec<gimple *> *, tree *, @@ -1049,7 +1053,7 @@ vect_recog_pow_pattern (vec<gimple *> *s tree *type_out) { gimple *last_stmt = (*stmts)[0]; - tree base, exp = NULL; + tree base, exp; gimple *stmt; tree var; @@ -1060,17 +1064,77 @@ vect_recog_pow_pattern (vec<gimple *> *s { CASE_CFN_POW: CASE_CFN_POWI: - base = gimple_call_arg (last_stmt, 0); - exp = gimple_call_arg (last_stmt, 1); - if (TREE_CODE (exp) != REAL_CST - && TREE_CODE (exp) != INTEGER_CST) - return NULL; break; default: return NULL; } + base = gimple_call_arg (last_stmt, 0); + exp = gimple_call_arg (last_stmt, 1); + if (TREE_CODE (exp) != REAL_CST + && TREE_CODE (exp) != INTEGER_CST) + { + if (flag_unsafe_math_optimizations + && TREE_CODE (base) == REAL_CST + && !gimple_call_internal_p (last_stmt)) + { + combined_fn log_cfn; + built_in_function exp_bfn; + switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt))) + { + case BUILT_IN_POW: + log_cfn = CFN_BUILT_IN_LOG; + exp_bfn = BUILT_IN_EXP; + break; + case BUILT_IN_POWF: + log_cfn = CFN_BUILT_IN_LOGF; + exp_bfn = BUILT_IN_EXPF; + break; + case BUILT_IN_POWL: + log_cfn = CFN_BUILT_IN_LOGL; + exp_bfn = BUILT_IN_EXPL; + break; + default: + return NULL; + } + tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base); + tree exp_decl = builtin_decl_implicit (exp_bfn); + /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd + does that, but if C is a power of 2, we want to use + exp2 (log2 (C) * x) in the non-vectorized version, but for + vectorization we don't have vectorized exp2. */ + if (logc + && TREE_CODE (logc) == REAL_CST + && exp_decl + && lookup_attribute ("omp declare simd", + DECL_ATTRIBUTES (exp_decl))) + { + cgraph_node *node = cgraph_node::get_create (exp_decl); + if (node->simd_clones == NULL) + { + if (node->definition) + return NULL; + expand_simd_clones (node); + if (node->simd_clones == NULL) + return NULL; + } + stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); + tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL); + gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc); + new_pattern_def_seq (stmt_vinfo, g); + *type_in = TREE_TYPE (base); + *type_out = NULL_TREE; + tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL); + g = gimple_build_call (exp_decl, 1, def); + gimple_call_set_lhs (g, res); + return g; + } + } + + return NULL; + } + /* We now have a pow or powi builtin function call with a constant exponent. */ @@ -1744,8 +1808,8 @@ vect_recog_widen_shift_pattern (vec<gimp /* Pattern supported. Create a stmt to be used to replace the pattern. */ var = vect_recog_temp_ssa_var (type, NULL); - pattern_stmt = - gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1); + pattern_stmt + = gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1); if (wstmt) { stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); @@ -4439,10 +4503,6 @@ vect_pattern_recog_1 (vect_recog_func *r } else { - machine_mode vec_mode; - enum insn_code icode; - optab optab; - /* Check target support */ type_in = get_vectype_for_scalar_type (type_in); if (!type_in) @@ -4456,19 +4516,18 @@ vect_pattern_recog_1 (vect_recog_func *r pattern_vectype = type_out; if (is_gimple_assign (pattern_stmt)) - code = gimple_assign_rhs_code (pattern_stmt); - else - { - gcc_assert (is_gimple_call (pattern_stmt)); - code = CALL_EXPR; + { + enum insn_code icode; + code = gimple_assign_rhs_code (pattern_stmt); + optab optab = optab_for_tree_code (code, type_in, optab_default); + machine_mode vec_mode = TYPE_MODE (type_in); + if (!optab + || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing + || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out))) + return false; } - - optab = optab_for_tree_code (code, type_in, optab_default); - vec_mode = TYPE_MODE (type_in); - if (!optab - || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing - || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out))) - return false; + else + gcc_assert (is_gimple_call (pattern_stmt)); } /* Found a vectorizable pattern. */ --- gcc/testsuite/gcc.dg/pr84309.c.jj 2018-02-12 12:24:22.214522183 +0100 +++ gcc/testsuite/gcc.dg/pr84309.c 2018-02-12 12:24:22.214522183 +0100 @@ -0,0 +1,14 @@ +/* PR middle-end/84309 */ +/* { dg-do run { target c99_runtime } } */ +/* { dg-options "-O2 -ffast-math" } */ + +int +main () +{ + unsigned long a = 1024; + unsigned long b = 16 * 1024; + unsigned long c = __builtin_pow (2, (__builtin_log2 (a) + __builtin_log2 (b)) / 2); + if (c != 4096) + __builtin_abort (); + return 0; +} --- gcc/testsuite/gcc.target/i386/pr84309.c.jj 2018-02-12 18:20:15.819056596 +0100 +++ gcc/testsuite/gcc.target/i386/pr84309.c 2018-02-12 18:21:00.462066648 +0100 @@ -0,0 +1,16 @@ +/* PR middle-end/84309 */ +/* { dg-do compile } */ +/* { dg-options "-Ofast -mavx" } */ + +double pow (double, double) __attribute__((simd)); +double exp (double) __attribute__((simd)); +extern double a[1024], b[1024]; + +void +foo (void) +{ + for (int i = 0; i < 1024; ++i) + a[i] = pow (2.0, b[i]); +} + +/* { dg-final { scan-assembler "_ZGVcN4v_exp" } } */ Jakub