Hi All, this patch implements a optimization rewriting
x * copysign (1.0, y) to: x ^ (y & (1 << sign_bit_position)) This is only done when not honoring signaling NaNs. This transormation is done at ssa mult widening time and is gated on the a check for the optab "xorsign". If the optab is not available then copysign is expanded as normal. If the optab exists then the expression is replaced with a call to the internal function XORSIGN. This patch is a revival of a previous patches https://gcc.gnu.org/ml/gcc-patches/2015-10/msg00069.html https://gcc.gnu.org/ml/gcc-patches/2017-06/msg00749.html Bootstrapped on both aarch64-none-linux-gnu and x86_64 with no issues. Regression done on aarch64-none-linux-gnu and no regressions. Ok for trunk? gcc/ 2017-08-03 Tamar Christina <tamar.christ...@arm.com> Andrew Pinski <pins...@gmail.com> PR middle-end/19706 * internal-fn.def (XORSIGN): New. * optabs.def (xorsign_optab): New. * tree-ssa-math-opts.c (is_copysign_call_with_1): New. (convert_expand_mult_copysign): New. (pass_optimize_widening_mul::execute): Call convert_expand_mult_copysign. --
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index a9a3f7606eb2a79f64dab1b7fdeef0d308e3061d..58e5f4a322a92ccb842ab05cc4213933ffa59679 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -129,6 +129,8 @@ DEF_INTERNAL_FLT_FN (REMAINDER, ECF_CONST, remainder, binary) DEF_INTERNAL_FLT_FN (SCALB, ECF_CONST, scalb, binary) DEF_INTERNAL_FLT_FN (FMIN, ECF_CONST, fmin, binary) DEF_INTERNAL_FLT_FN (FMAX, ECF_CONST, fmax, binary) +DEF_INTERNAL_OPTAB_FN (XORSIGN, ECF_CONST, xorsign, binary) + /* FP scales. */ DEF_INTERNAL_FLT_FN (LDEXP, ECF_CONST, ldexp, binary) diff --git a/gcc/optabs.def b/gcc/optabs.def index f21f2267ec2118d5cd0e74b18721525a564d16f2..54afe2d796ee9af3bd7b25d93eb0789a70e47c7b 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -255,6 +255,7 @@ OPTAB_D (asin_optab, "asin$a2") OPTAB_D (atan2_optab, "atan2$a3") OPTAB_D (atan_optab, "atan$a2") OPTAB_D (copysign_optab, "copysign$F$a3") +OPTAB_D (xorsign_optab, "xorsign$F$a3") OPTAB_D (cos_optab, "cos$a2") OPTAB_D (exp10_optab, "exp10$a2") OPTAB_D (exp2_optab, "exp2$a2") diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c index 7ac1659fa0670b7080685f3f9513939807073a63..780a7f76ce756cfe025e80845208b00568eda56c 100644 --- a/gcc/tree-ssa-math-opts.c +++ b/gcc/tree-ssa-math-opts.c @@ -3145,6 +3145,96 @@ is_widening_mult_p (gimple *stmt, return true; } +/* Check to see if the CALL statement is an invocation of copysign + with 1. being the first argument. */ +static bool +is_copysign_call_with_1 (gimple *call) +{ + if (!is_gimple_call (call)) + return false; + + enum combined_fn code = gimple_call_combined_fn (call); + + if (code == CFN_LAST) + return false; + + gcall *c = as_a<gcall*> (call); + + if (builtin_fn_p (code)) + { + switch (as_builtin_fn (code)) + { + CASE_FLT_FN (BUILT_IN_COPYSIGN): + CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN): + return real_onep (gimple_call_arg (c, 0)); + default: + return false; + } + } + + if (internal_fn_p (code)) + { + switch (as_internal_fn (code)) + { + case IFN_COPYSIGN: + return real_onep (gimple_call_arg (c, 0)); + default: + return false; + } + } + + return false; +} + +/* Try to expand the pattern x * copysign (1, y) into xorsign (x, y). + This only happens when the the xorsign optab is defined, if the + pattern is not a xorsign pattern or if expansion fails FALSE is + returned, otherwise TRUE is returned. */ +static bool +convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi) +{ + tree treeop0, treeop1, lhs, type; + location_t loc = gimple_location (stmt); + lhs = gimple_assign_lhs (stmt); + treeop0 = gimple_assign_rhs1 (stmt); + treeop1 = gimple_assign_rhs2 (stmt); + type = TREE_TYPE (lhs); + machine_mode mode = TYPE_MODE (type); + + if (HONOR_SNANS (type)) + return false; + + if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME) + { + gimple *call0 = SSA_NAME_DEF_STMT (treeop0); + if (!is_copysign_call_with_1 (call0)) + { + /* IPA sometimes inlines and then extracts the function again, + resulting in an incorrect order, so check both ways. */ + call0 = SSA_NAME_DEF_STMT (treeop1); + if (!is_copysign_call_with_1 (call0)) + return false; + + treeop1 = treeop0; + } + + if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing) + return false; + + gcall *c = as_a<gcall*> (call0); + treeop0 = gimple_call_arg (c, 1); + + gcall *call_stmt + = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1,treeop0); + gimple_set_lhs (call_stmt, lhs); + gimple_set_location (call_stmt, loc); + gsi_insert_after (gsi, call_stmt, GSI_SAME_STMT); + return true; + } + + return false; +} + /* Process a single gimple statement STMT, which has a MULT_EXPR as its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return value is true iff we converted the statement. */ @@ -4122,6 +4212,11 @@ pass_optimize_widening_mul::execute (function *fun) release_defs (stmt); continue; } + if (convert_expand_mult_copysign (stmt, &gsi)) + { + gsi_remove (&gsi, true); + continue; + } break; case PLUS_EXPR: