Hi All,
this patch implements a optimization rewriting
x * copysign (1.0, y)
to:
x ^ (y & (1 << sign_bit_position))
This is only done when not honoring signaling NaNs.
This transormation is done at ssa mult widening time and
is gated on the a check for the optab "xorsign".
If the optab is not available then copysign is expanded as normal.
If the optab exists then the expression is replaced with
a call to the internal function XORSIGN.
This patch is a revival of a previous patches
https://gcc.gnu.org/ml/gcc-patches/2015-10/msg00069.html
https://gcc.gnu.org/ml/gcc-patches/2017-06/msg00749.html
Bootstrapped on both aarch64-none-linux-gnu and x86_64 with no issues.
Regression done on aarch64-none-linux-gnu and no regressions.
Ok for trunk?
gcc/
2017-08-03 Tamar Christina <[email protected]>
Andrew Pinski <[email protected]>
PR middle-end/19706
* internal-fn.def (XORSIGN): New.
* optabs.def (xorsign_optab): New.
* tree-ssa-math-opts.c (is_copysign_call_with_1): New.
(convert_expand_mult_copysign): New.
(pass_optimize_widening_mul::execute): Call
convert_expand_mult_copysign.
--
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index a9a3f7606eb2a79f64dab1b7fdeef0d308e3061d..58e5f4a322a92ccb842ab05cc4213933ffa59679 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -129,6 +129,8 @@ DEF_INTERNAL_FLT_FN (REMAINDER, ECF_CONST, remainder, binary)
DEF_INTERNAL_FLT_FN (SCALB, ECF_CONST, scalb, binary)
DEF_INTERNAL_FLT_FN (FMIN, ECF_CONST, fmin, binary)
DEF_INTERNAL_FLT_FN (FMAX, ECF_CONST, fmax, binary)
+DEF_INTERNAL_OPTAB_FN (XORSIGN, ECF_CONST, xorsign, binary)
+
/* FP scales. */
DEF_INTERNAL_FLT_FN (LDEXP, ECF_CONST, ldexp, binary)
diff --git a/gcc/optabs.def b/gcc/optabs.def
index f21f2267ec2118d5cd0e74b18721525a564d16f2..54afe2d796ee9af3bd7b25d93eb0789a70e47c7b 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -255,6 +255,7 @@ OPTAB_D (asin_optab, "asin$a2")
OPTAB_D (atan2_optab, "atan2$a3")
OPTAB_D (atan_optab, "atan$a2")
OPTAB_D (copysign_optab, "copysign$F$a3")
+OPTAB_D (xorsign_optab, "xorsign$F$a3")
OPTAB_D (cos_optab, "cos$a2")
OPTAB_D (exp10_optab, "exp10$a2")
OPTAB_D (exp2_optab, "exp2$a2")
diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
index 7ac1659fa0670b7080685f3f9513939807073a63..780a7f76ce756cfe025e80845208b00568eda56c 100644
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -3145,6 +3145,96 @@ is_widening_mult_p (gimple *stmt,
return true;
}
+/* Check to see if the CALL statement is an invocation of copysign
+ with 1. being the first argument. */
+static bool
+is_copysign_call_with_1 (gimple *call)
+{
+ if (!is_gimple_call (call))
+ return false;
+
+ enum combined_fn code = gimple_call_combined_fn (call);
+
+ if (code == CFN_LAST)
+ return false;
+
+ gcall *c = as_a<gcall*> (call);
+
+ if (builtin_fn_p (code))
+ {
+ switch (as_builtin_fn (code))
+ {
+ CASE_FLT_FN (BUILT_IN_COPYSIGN):
+ CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN):
+ return real_onep (gimple_call_arg (c, 0));
+ default:
+ return false;
+ }
+ }
+
+ if (internal_fn_p (code))
+ {
+ switch (as_internal_fn (code))
+ {
+ case IFN_COPYSIGN:
+ return real_onep (gimple_call_arg (c, 0));
+ default:
+ return false;
+ }
+ }
+
+ return false;
+}
+
+/* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
+ This only happens when the the xorsign optab is defined, if the
+ pattern is not a xorsign pattern or if expansion fails FALSE is
+ returned, otherwise TRUE is returned. */
+static bool
+convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi)
+{
+ tree treeop0, treeop1, lhs, type;
+ location_t loc = gimple_location (stmt);
+ lhs = gimple_assign_lhs (stmt);
+ treeop0 = gimple_assign_rhs1 (stmt);
+ treeop1 = gimple_assign_rhs2 (stmt);
+ type = TREE_TYPE (lhs);
+ machine_mode mode = TYPE_MODE (type);
+
+ if (HONOR_SNANS (type))
+ return false;
+
+ if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME)
+ {
+ gimple *call0 = SSA_NAME_DEF_STMT (treeop0);
+ if (!is_copysign_call_with_1 (call0))
+ {
+ /* IPA sometimes inlines and then extracts the function again,
+ resulting in an incorrect order, so check both ways. */
+ call0 = SSA_NAME_DEF_STMT (treeop1);
+ if (!is_copysign_call_with_1 (call0))
+ return false;
+
+ treeop1 = treeop0;
+ }
+
+ if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing)
+ return false;
+
+ gcall *c = as_a<gcall*> (call0);
+ treeop0 = gimple_call_arg (c, 1);
+
+ gcall *call_stmt
+ = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1,treeop0);
+ gimple_set_lhs (call_stmt, lhs);
+ gimple_set_location (call_stmt, loc);
+ gsi_insert_after (gsi, call_stmt, GSI_SAME_STMT);
+ return true;
+ }
+
+ return false;
+}
+
/* Process a single gimple statement STMT, which has a MULT_EXPR as
its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return
value is true iff we converted the statement. */
@@ -4122,6 +4212,11 @@ pass_optimize_widening_mul::execute (function *fun)
release_defs (stmt);
continue;
}
+ if (convert_expand_mult_copysign (stmt, &gsi))
+ {
+ gsi_remove (&gsi, true);
+ continue;
+ }
break;
case PLUS_EXPR: