Re: Optimize n?rotate(x,n):x

2014-05-14 Thread Jeff Law

On 05/01/14 15:52, Marc Glisse wrote:

Hello,

here is the latest version. Reviewers seemed happy with different
versions, so I went for the simplest one. We only give up on the
transformation if we are optimizing for speed, the short-cut has
probability 50% and the operation the branch is avoiding is expensive
(i.e. only divisions, with the current estimate_num_insns). The optab
checks are gone, they should eventually be added to estimate_num_insns
instead.

I believe this is covered by the previous ok, but I won't commit
anything before Tuesday.

It would have been more general (and shorter) to call fold after
substituting the tested value and see if the result matches the other
phi argument (it might handle (a!=b)?a|b:a for instance), instead of the
*_element_p tests. I may try that later, but I guess the current patch
is good enough for now.


2014-05-06  Marc Glisse  marc.gli...@inria.fr

 PR tree-optimization/59100
gcc/
 * tree-ssa-phiopt.c: Include tree-inline.h.
 (neutral_element_p, absorbing_element_p): New functions.
 (value_replacement): Handle conditional binary operations with a
 neutral or absorbing element.
gcc/testsuite/
 * gcc.dg/tree-ssa/phi-opt-12.c: New file.
 * gcc.dg/tree-ssa/phi-opt-13.c: Likewise.
Going with the simpler one is a good default IMHO.  As you note, you can 
always follow-up with refinements if the additional cases turn out to be 
important.


OK for the trunk.

jeff



Re: Optimize n?rotate(x,n):x

2014-05-01 Thread Marc Glisse

Hello,

here is the latest version. Reviewers seemed happy with different 
versions, so I went for the simplest one. We only give up on the 
transformation if we are optimizing for speed, the short-cut has 
probability 50% and the operation the branch is avoiding is expensive 
(i.e. only divisions, with the current estimate_num_insns). The optab 
checks are gone, they should eventually be added to estimate_num_insns 
instead.


I believe this is covered by the previous ok, but I won't commit 
anything before Tuesday.


It would have been more general (and shorter) to call fold after 
substituting the tested value and see if the result matches the other phi 
argument (it might handle (a!=b)?a|b:a for instance), instead of the 
*_element_p tests. I may try that later, but I guess the current patch is 
good enough for now.



2014-05-06  Marc Glisse  marc.gli...@inria.fr

PR tree-optimization/59100
gcc/
* tree-ssa-phiopt.c: Include tree-inline.h.
(neutral_element_p, absorbing_element_p): New functions.
(value_replacement): Handle conditional binary operations with a
neutral or absorbing element.
gcc/testsuite/
* gcc.dg/tree-ssa/phi-opt-12.c: New file.
* gcc.dg/tree-ssa/phi-opt-13.c: Likewise.

--
Marc GlisseIndex: gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c
===
--- gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c  (revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c  (working copy)
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options -O -fdump-tree-phiopt1 } */
+
+int f(int a, int b, int c) {
+  if (c  5) return c;
+  if (a == 0) return b;
+  return a + b;
+}
+
+unsigned rot(unsigned x, int n) {
+  const int bits = __CHAR_BIT__ * __SIZEOF_INT__;
+  return (n == 0) ? x : ((x  n) | (x  (bits - n)));
+}
+
+unsigned m(unsigned a, unsigned b) {
+  if (a == 0)
+return 0;
+  else
+return a  b;
+}
+
+/* { dg-final { scan-tree-dump-times goto 2 phiopt1 } } */
+/* { dg-final { cleanup-tree-dump phiopt1 } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/phi-opt-13.c
===
--- gcc/testsuite/gcc.dg/tree-ssa/phi-opt-13.c  (revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/phi-opt-13.c  (working copy)
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options -O2 -fdump-tree-optimized } */
+
+// Division is expensive
+long f(long a, long b) {
+  if (__builtin_expect(b == 1, 1)) return a;
+  return a / b;
+}
+
+/* { dg-final { scan-tree-dump-times goto  2 optimized } } */
+/* { dg-final { cleanup-tree-dump optimized } } */
Index: gcc/tree-ssa-phiopt.c
===
--- gcc/tree-ssa-phiopt.c   (revision 209979)
+++ gcc/tree-ssa-phiopt.c   (working copy)
@@ -47,20 +47,21 @@ along with GCC; see the file COPYING3.
 #include tree-pass.h
 #include langhooks.h
 #include domwalk.h
 #include cfgloop.h
 #include tree-data-ref.h
 #include gimple-pretty-print.h
 #include insn-config.h
 #include expr.h
 #include optabs.h
 #include tree-scalar-evolution.h
+#include tree-inline.h
 
 #ifndef HAVE_conditional_move
 #define HAVE_conditional_move (0)
 #endif
 
 static unsigned int tree_ssa_phiopt_worker (bool, bool);
 static bool conditional_replacement (basic_block, basic_block,
 edge, edge, gimple, tree, tree);
 static int value_replacement (basic_block, basic_block,
  edge, edge, gimple, tree, tree);
@@ -652,20 +653,78 @@ operand_equal_for_value_replacement (con
   if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp))
 return true;
 
   tmp = gimple_assign_rhs2 (def);
   if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp))
 return true;
 
   return false;
 }
 
+/* Returns true if ARG is a neutral element for operation CODE
+   on the RIGHT side.  */
+
+static bool
+neutral_element_p (tree_code code, tree arg, bool right)
+{
+  switch (code)
+{
+case PLUS_EXPR:
+case BIT_IOR_EXPR:
+case BIT_XOR_EXPR:
+  return integer_zerop (arg);
+
+case LROTATE_EXPR:
+case RROTATE_EXPR:
+case LSHIFT_EXPR:
+case RSHIFT_EXPR:
+case MINUS_EXPR:
+case POINTER_PLUS_EXPR:
+  return right  integer_zerop (arg);
+
+case MULT_EXPR:
+  return integer_onep (arg);
+
+case TRUNC_DIV_EXPR:
+case CEIL_DIV_EXPR:
+case FLOOR_DIV_EXPR:
+case ROUND_DIV_EXPR:
+case EXACT_DIV_EXPR:
+  return right  integer_onep (arg);
+
+case BIT_AND_EXPR:
+  return integer_all_onesp (arg);
+
+default:
+  return false;
+}
+}
+
+/* Returns true if ARG is an absorbing element for operation CODE.  */
+
+static bool
+absorbing_element_p (tree_code code, tree arg)
+{
+  switch (code)
+{
+case BIT_IOR_EXPR:
+  return integer_all_onesp (arg);
+
+case MULT_EXPR:
+case BIT_AND_EXPR:
+  return integer_zerop (arg);
+
+default:
+  return false;

Re: Optimize n?rotate(x,n):x

2014-04-23 Thread Marc Glisse

Honza, any comment on Richard's question?

On Tue, 15 Apr 2014, Richard Biener wrote:


On Mon, Apr 14, 2014 at 6:40 PM, Marc Glisse marc.gli...@inria.fr wrote:

On Mon, 14 Apr 2014, Richard Biener wrote:


+  /* If the special case has a high probability, keep it.  */
+  if (EDGE_PRED (middle_bb, 0)-probability  PROB_EVEN)



I suppose Honza has a comment on how to test this properly
(not sure if -probability or -frequency is always initialized properly).
for example single_likely_edge tests profile_status_for_fn !=
PROFILE_ABSENT (and uses a fixed probability value ...).
Anyway, the comparison looks backwards to me, but maybe I'm
missing sth - I'd use = PROB_LIKELY ;)



Maybe the comment is confusing? middle_bb contains the expensive operation
(say a/b) that the special case skips entirely. If the division happens in
less than 50% of cases (that's the proba of the edge going from cond to
middle_bb), then doing the comparison+jump may be cheaper and I abort the
optimization. At least the testcase with __builtin_expect should prove that
I didn't do it backwards.


Ah, indeed.  My mistake.


value-prof seems to use 50% as the cut-off where it may become interesting
to special case division, hence my choice of PROB_EVEN. I am not sure which
way you want to use PROB_LIKELY (80%). If we have more than 80% chances of
executing the division, always perform it? Or if we have more than 80%
chances of skipping the division, keep the branch?


Ok, if it's from value-prof then that's fine.

The patch is ok if Honza doesn't have any comments on whether it's ok
to look at -probability unconditionally.

Thanks,
Richard.


Attached is the latest version (passed the testsuite).
Index: gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c
===
--- gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c  (revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c  (working copy)
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options -O -fdump-tree-phiopt1 } */
+
+int f(int a, int b, int c) {
+  if (c  5) return c;
+  if (a == 0) return b;
+  return a + b;
+}
+
+unsigned rot(unsigned x, int n) {
+  const int bits = __CHAR_BIT__ * __SIZEOF_INT__;
+  return (n == 0) ? x : ((x  n) | (x  (bits - n)));
+}
+
+unsigned m(unsigned a, unsigned b) {
+  if (a == 0)
+return 0;
+  else
+return a  b;
+}
+
+/* { dg-final { scan-tree-dump-times goto 2 phiopt1 } } */
+/* { dg-final { cleanup-tree-dump phiopt1 } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/phi-opt-13.c
===
--- gcc/testsuite/gcc.dg/tree-ssa/phi-opt-13.c  (revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/phi-opt-13.c  (working copy)
@@ -0,0 +1,19 @@
+/* { dg-do compile { target x86_64-*-* } } */
+/* { dg-options -O2 -fdump-tree-optimized } */
+
+int f(int a, int b) {
+  if (__builtin_expect(a == 0, 1)) return b;
+  return a + b;
+}
+
+// optab_handler can handle if(b==1) but not a/b
+// so we consider a/b too expensive.
+unsigned __int128 g(unsigned __int128 a, unsigned __int128 b) {
+  if (b == 1)
+return a;
+  else
+return a / b;
+}
+
+/* { dg-final { scan-tree-dump-times goto  4 optimized } } */
+/* { dg-final { cleanup-tree-dump optimized } } */
Index: gcc/tree-ssa-phiopt.c
===
--- gcc/tree-ssa-phiopt.c   (revision 209353)
+++ gcc/tree-ssa-phiopt.c   (working copy)
@@ -140,20 +140,37 @@ static bool gate_hoist_loads (void);
x = PHI (CONST, a)

Gets replaced with:
  bb0:
  bb2:
t1 = a == CONST;
t2 = b  c;
t3 = t1  t2;
x = a;

+
+   It also replaces
+
+ bb0:
+   if (a != 0) goto bb1; else goto bb2;
+ bb1:
+   c = a + b;
+ bb2:
+   x = PHI c (bb1), b (bb0), ...;
+
+   with
+
+ bb0:
+   c = a + b;
+ bb2:
+   x = PHI c (bb0), ...;
+
ABS Replacement
---

This transformation, implemented in abs_replacement, replaces

  bb0:
if (a = 0) goto bb2; else goto bb1;
  bb1:
x = -a;
  bb2:
@@ -809,20 +826,103 @@ operand_equal_for_value_replacement (con
   if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp))
 return true;

   tmp = gimple_assign_rhs2 (def);
   if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp))
 return true;

   return false;
 }

+/* Returns true if ARG is a neutral element for operation CODE
+   on the RIGHT side.  */
+
+static bool
+neutral_element_p (tree_code code, tree arg, bool right)
+{
+  switch (code)
+{
+case PLUS_EXPR:
+case BIT_IOR_EXPR:
+case BIT_XOR_EXPR:
+  return integer_zerop (arg);
+
+case LROTATE_EXPR:
+case RROTATE_EXPR:
+case LSHIFT_EXPR:
+case RSHIFT_EXPR:
+case MINUS_EXPR:
+case POINTER_PLUS_EXPR:
+  return right  integer_zerop (arg);
+
+case MULT_EXPR:
+  return integer_onep (arg);
+
+case TRUNC_DIV_EXPR:
+case CEIL_DIV_EXPR:
+

Re: Optimize n?rotate(x,n):x

2014-04-23 Thread Jan Hubicka
 Honza, any comment on Richard's question?
 
 On Tue, 15 Apr 2014, Richard Biener wrote:
 
 On Mon, Apr 14, 2014 at 6:40 PM, Marc Glisse marc.gli...@inria.fr wrote:
 On Mon, 14 Apr 2014, Richard Biener wrote:
 
 +  /* If the special case has a high probability, keep it.  */
 +  if (EDGE_PRED (middle_bb, 0)-probability  PROB_EVEN)
 
 
 I suppose Honza has a comment on how to test this properly
 (not sure if -probability or -frequency is always initialized properly).
 for example single_likely_edge tests profile_status_for_fn !=
 PROFILE_ABSENT (and uses a fixed probability value ...).
 Anyway, the comparison looks backwards to me, but maybe I'm
 missing sth - I'd use = PROB_LIKELY ;)
 
 
 Maybe the comment is confusing? middle_bb contains the expensive operation
 (say a/b) that the special case skips entirely. If the division happens in
 less than 50% of cases (that's the proba of the edge going from cond to
 middle_bb), then doing the comparison+jump may be cheaper and I abort the
 optimization. At least the testcase with __builtin_expect should prove that
 I didn't do it backwards.
 
 Ah, indeed.  My mistake.
 
 value-prof seems to use 50% as the cut-off where it may become interesting
 to special case division, hence my choice of PROB_EVEN. I am not sure which
 way you want to use PROB_LIKELY (80%). If we have more than 80% chances of
 executing the division, always perform it? Or if we have more than 80%
 chances of skipping the division, keep the branch?
 
 Ok, if it's from value-prof then that's fine.
 
 The patch is ok if Honza doesn't have any comments on whether it's ok
 to look at -probability unconditionally.
 
 Thanks,
 Richard.
 
 +
 +  /* Now optimize (x != 0) ? x + y : y to just y.
 + The following condition is too restrictive, there can easily be
 another
 + stmt in middle_bb, for instance a CONVERT_EXPR for the second
 argument.  */
 +  gimple assign = last_and_only_stmt (middle_bb);
 +  if (!assign || gimple_code (assign) != GIMPLE_ASSIGN
 +  || gimple_assign_rhs_class (assign) != GIMPLE_BINARY_RHS
 +  || (!INTEGRAL_TYPE_P (TREE_TYPE (arg0))
 +  !POINTER_TYPE_P (TREE_TYPE (arg0
 +return 0;
 +
 +  /* assign may call a libgcc routine, which is slow.  */
 +  if (!is_cheap_stmt (assign)  is_cheap_stmt (cond))
 +return 0;
 +
 +  /* If the special case has a high probability, keep it.  */
 +  if (EDGE_PRED (middle_bb, 0)-probability  PROB_EVEN)
 +return 0;

Well, I would expect this transformation to be win always, + operation
is virtually for free.

Concerning profile - you must consider two cases.  If the profile is guessed
then the probability is most probably 71% to the non-zero case unless user used
an expect (since I would expect only PRED_OPCODE_NONEQUAL to match).  This is
data dependent branch unless this sits in a loop and those are badly predicted
statically.

If the probability is read, then probabilities over (say) 10% and less than 90%
means that the branch is likely not very well predictable (it still may be on
advanced architectures if it is predictable from context) and getting rid of it
is a good idea.

So if you want to disable the optimization for x being likely zero, I guess
testing that probability is over PROV_LIKELY is a resonable threshold - it will
handle both builtin_expect and the (very) badly predictable branches.
Maybe for FDO it should be higher, but we would need to do some research on it
that is probably not worth the effort.

The division transformation is bit different story, since cost of division is
more than cost of branch and the 50% threshold is a limit for one value
counter to be reliable.

Honza


Re: Optimize n?rotate(x,n):x

2014-04-23 Thread Marc Glisse

On Wed, 23 Apr 2014, Jan Hubicka wrote:


+  /* Now optimize (x != 0) ? x + y : y to just y.
+ The following condition is too restrictive, there can easily be
another
+ stmt in middle_bb, for instance a CONVERT_EXPR for the second
argument.  */
+  gimple assign = last_and_only_stmt (middle_bb);
+  if (!assign || gimple_code (assign) != GIMPLE_ASSIGN
+  || gimple_assign_rhs_class (assign) != GIMPLE_BINARY_RHS
+  || (!INTEGRAL_TYPE_P (TREE_TYPE (arg0))
+  !POINTER_TYPE_P (TREE_TYPE (arg0
+return 0;
+
+  /* assign may call a libgcc routine, which is slow.  */
+  if (!is_cheap_stmt (assign)  is_cheap_stmt (cond))
+return 0;
+
+  /* If the special case has a high probability, keep it.  */
+  if (EDGE_PRED (middle_bb, 0)-probability  PROB_EVEN)
+return 0;


Well, I would expect this transformation to be win always, + operation
is virtually for free.

Concerning profile - you must consider two cases.  If the profile is guessed
then the probability is most probably 71% to the non-zero case unless user
used an expect (since I would expect only PRED_OPCODE_NONEQUAL to match).
This is data dependent branch unless this sits in a loop and those are badly
predicted statically.

If the probability is read, then probabilities over (say) 10% and less than
90% means that the branch is likely not very well predictable (it still may
be on advanced architectures if it is predictable from context) and getting
rid of it is a good idea.

So if you want to disable the optimization for x being likely zero, I guess
testing that probability is over PROV_LIKELY is a resonable threshold - it
will handle both builtin_expect and the (very) badly predictable branches.
Maybe for FDO it should be higher, but we would need to do some research on
it that is probably not worth the effort.

The division transformation is bit different story, since cost of division
is more than cost of branch and the 50% threshold is a limit for one value
counter to be reliable.


Thank you for the comments. If I understand correctly:

- it is always ok to look at edge-probability (no need to check that the
  probabilities are available as Richard feared)
- PROB_EVEN is an ok threshold for division (not sure I understood your last
  sentence)
- for cheaper operations like addition, I should be less conservative and do
  the transformation always, or use a threshold of PROB_UNLIKELY.

Are there other operations than division (among those listed in
neutral_element_p or absorbing_element_p) that fall in the expensive
category? I guess there are some platforms where multiplication is
expensive, but few, and querying the target for the cost of operations
seems exagerated. So I would go with:

[move definition of code_def]
if ((code_def == TRUNC_DIV_EXPR || code_def == CEIL_DIV_EXPR
 || code_def == FLOOR_DIV_EXPR || code_def == ROUND_DIV_EXPR
 || code_def == EXACT_DIV_EXPR)
 EDGE_PRED (middle_bb, 0)-probability  PROB_EVEN)
  return 0;

(and change the testsuite example with __builtin_expect to use division)

or (my favorite):

[move definition of code_def]
int threshold = PROB_UNLIKELY;
if (code_def == TRUNC_DIV_EXPR || code_def == CEIL_DIV_EXPR
|| code_def == FLOOR_DIV_EXPR || code_def == ROUND_DIV_EXPR
|| code_def == EXACT_DIV_EXPR)
  threshold = PROB_EVEN;
if (EDGE_PRED (middle_bb, 0)-probability  threshold)
  return 0;


Is that ok, after re-testing?

--
Marc Glisse


Re: Optimize n?rotate(x,n):x

2014-04-23 Thread Jan Hubicka
 
 Thank you for the comments. If I understand correctly:
 
 - it is always ok to look at edge-probability (no need to check that the
   probabilities are available as Richard feared)

Actually with -fno-guess-branch-probabilities (default only at -O0) the 
probability
is not computed.  You can check profile_status = PROFILE_GUESSED

 - PROB_EVEN is an ok threshold for division (not sure I understood your last
   sentence)
 - for cheaper operations like addition, I should be less conservative and do
   the transformation always, or use a threshold of PROB_UNLIKELY.

PROB_EVEN is really coming from the way one value counter is implemented.
If its probability is less than 50%, the value predicted may be wrong and may
not be the most common value.

It does not apply for your case. If you always eliminate branch, I would always
disable the transformation only if the faster path you are going to slow down
is more than PROB_LIKELY.
For probabilities PROB_LIKELY...PROB_EVEN you are going to slow down the common
path, but you will eliminate branch that is most likely data dependent and not
well predictable.

For really expensive operations (division by non-constant) you probably want
to use PROB_EVEN. But perhaps for start you don't need to make a difference and
lets see.
 
 Are there other operations than division (among those listed in
 neutral_element_p or absorbing_element_p) that fall in the expensive
 category? I guess there are some platforms where multiplication is
 expensive, but few, and querying the target for the cost of operations
 seems exagerated. So I would go with:
 
 [move definition of code_def]
 if ((code_def == TRUNC_DIV_EXPR || code_def == CEIL_DIV_EXPR
  || code_def == FLOOR_DIV_EXPR || code_def == ROUND_DIV_EXPR
  || code_def == EXACT_DIV_EXPR)
  EDGE_PRED (middle_bb, 0)-probability  PROB_EVEN)
   return 0;
 
 (and change the testsuite example with __builtin_expect to use division)
 
 or (my favorite):
 
 [move definition of code_def]
 int threshold = PROB_UNLIKELY;
 if (code_def == TRUNC_DIV_EXPR || code_def == CEIL_DIV_EXPR
 || code_def == FLOOR_DIV_EXPR || code_def == ROUND_DIV_EXPR
 || code_def == EXACT_DIV_EXPR)
   threshold = PROB_EVEN;
 if (EDGE_PRED (middle_bb, 0)-probability  threshold)
   return 0;

You may probably ask time estimate of estimate_num_insns for expensive
operations. It is not terribly smarter than what you propose (modulo
special casing the constant divisor), but this way we will have all
the magic at one place that is good for maintanibility.

With these changes it is OK.
Honza
 
 
 Is that ok, after re-testing?
 
 -- 
 Marc Glisse


Re: Optimize n?rotate(x,n):x

2014-04-15 Thread Richard Biener
On Mon, Apr 14, 2014 at 6:40 PM, Marc Glisse marc.gli...@inria.fr wrote:
 On Mon, 14 Apr 2014, Richard Biener wrote:

 +  /* If the special case has a high probability, keep it.  */
 +  if (EDGE_PRED (middle_bb, 0)-probability  PROB_EVEN)


 I suppose Honza has a comment on how to test this properly
 (not sure if -probability or -frequency is always initialized properly).
 for example single_likely_edge tests profile_status_for_fn !=
 PROFILE_ABSENT (and uses a fixed probability value ...).
 Anyway, the comparison looks backwards to me, but maybe I'm
 missing sth - I'd use = PROB_LIKELY ;)


 Maybe the comment is confusing? middle_bb contains the expensive operation
 (say a/b) that the special case skips entirely. If the division happens in
 less than 50% of cases (that's the proba of the edge going from cond to
 middle_bb), then doing the comparison+jump may be cheaper and I abort the
 optimization. At least the testcase with __builtin_expect should prove that
 I didn't do it backwards.

Ah, indeed.  My mistake.

 value-prof seems to use 50% as the cut-off where it may become interesting
 to special case division, hence my choice of PROB_EVEN. I am not sure which
 way you want to use PROB_LIKELY (80%). If we have more than 80% chances of
 executing the division, always perform it? Or if we have more than 80%
 chances of skipping the division, keep the branch?

Ok, if it's from value-prof then that's fine.

The patch is ok if Honza doesn't have any comments on whether it's ok
to look at -probability unconditionally.

Thanks,
Richard.

 Attached is the latest version (passed the testsuite).
 Index: gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c
 ===
 --- gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c  (revision 0)
 +++ gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c  (working copy)
 @@ -0,0 +1,23 @@
 +/* { dg-do compile } */
 +/* { dg-options -O -fdump-tree-phiopt1 } */
 +
 +int f(int a, int b, int c) {
 +  if (c  5) return c;
 +  if (a == 0) return b;
 +  return a + b;
 +}
 +
 +unsigned rot(unsigned x, int n) {
 +  const int bits = __CHAR_BIT__ * __SIZEOF_INT__;
 +  return (n == 0) ? x : ((x  n) | (x  (bits - n)));
 +}
 +
 +unsigned m(unsigned a, unsigned b) {
 +  if (a == 0)
 +return 0;
 +  else
 +return a  b;
 +}
 +
 +/* { dg-final { scan-tree-dump-times goto 2 phiopt1 } } */
 +/* { dg-final { cleanup-tree-dump phiopt1 } } */
 Index: gcc/testsuite/gcc.dg/tree-ssa/phi-opt-13.c
 ===
 --- gcc/testsuite/gcc.dg/tree-ssa/phi-opt-13.c  (revision 0)
 +++ gcc/testsuite/gcc.dg/tree-ssa/phi-opt-13.c  (working copy)
 @@ -0,0 +1,19 @@
 +/* { dg-do compile { target x86_64-*-* } } */
 +/* { dg-options -O2 -fdump-tree-optimized } */
 +
 +int f(int a, int b) {
 +  if (__builtin_expect(a == 0, 1)) return b;
 +  return a + b;
 +}
 +
 +// optab_handler can handle if(b==1) but not a/b
 +// so we consider a/b too expensive.
 +unsigned __int128 g(unsigned __int128 a, unsigned __int128 b) {
 +  if (b == 1)
 +return a;
 +  else
 +return a / b;
 +}
 +
 +/* { dg-final { scan-tree-dump-times goto  4 optimized } } */
 +/* { dg-final { cleanup-tree-dump optimized } } */
 Index: gcc/tree-ssa-phiopt.c
 ===
 --- gcc/tree-ssa-phiopt.c   (revision 209353)
 +++ gcc/tree-ssa-phiopt.c   (working copy)
 @@ -140,20 +140,37 @@ static bool gate_hoist_loads (void);
 x = PHI (CONST, a)

 Gets replaced with:
   bb0:
   bb2:
 t1 = a == CONST;
 t2 = b  c;
 t3 = t1  t2;
 x = a;

 +
 +   It also replaces
 +
 + bb0:
 +   if (a != 0) goto bb1; else goto bb2;
 + bb1:
 +   c = a + b;
 + bb2:
 +   x = PHI c (bb1), b (bb0), ...;
 +
 +   with
 +
 + bb0:
 +   c = a + b;
 + bb2:
 +   x = PHI c (bb0), ...;
 +
 ABS Replacement
 ---

 This transformation, implemented in abs_replacement, replaces

   bb0:
 if (a = 0) goto bb2; else goto bb1;
   bb1:
 x = -a;
   bb2:
 @@ -809,20 +826,103 @@ operand_equal_for_value_replacement (con
if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp))
  return true;

tmp = gimple_assign_rhs2 (def);
if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp))
  return true;

return false;
  }

 +/* Returns true if ARG is a neutral element for operation CODE
 +   on the RIGHT side.  */
 +
 +static bool
 +neutral_element_p (tree_code code, tree arg, bool right)
 +{
 +  switch (code)
 +{
 +case PLUS_EXPR:
 +case BIT_IOR_EXPR:
 +case BIT_XOR_EXPR:
 +  return integer_zerop (arg);
 +
 +case LROTATE_EXPR:
 +case RROTATE_EXPR:
 +case LSHIFT_EXPR:
 +case RSHIFT_EXPR:
 +case MINUS_EXPR:
 +case POINTER_PLUS_EXPR:
 +  return right  integer_zerop (arg);
 +
 +case MULT_EXPR:
 +  return integer_onep (arg);
 +
 +case 

Re: Optimize n?rotate(x,n):x

2014-04-14 Thread Richard Biener
On Sun, Apr 13, 2014 at 10:58 PM, Marc Glisse marc.gli...@inria.fr wrote:
 On Mon, 3 Mar 2014, Richard Biener wrote:

 On Sat, Mar 1, 2014 at 3:33 PM, Marc Glisse marc.gli...@inria.fr wrote:

 Hello,

 again, a stage 1 patch that I will ping then, but early comments are
 welcome.

 PR 59100 was asking to transform n?rotate(x,n):x to rotate(x,n) (because
 it
 can be hard to write a strictly valid rotate in plain C). The operation
 is
 really:
 (x != neutral) ? x op y : y
 where neutral is such that (neutral op y) is always y, so that's what I
 implemented (and absorbing elements while I was at it).

 For some operations on some platforms, the transformation may not be such
 a
 good idea, in particular if division is very slow and b is 1 most of the
 time, then computing a/b may be slower than (b!=1)?a/b:a. The easiest
 might
 be to comment out those operations in the switch for now. I think
 divisions
 are the only ones slow enough to deserve this, though there certainly are
 CPUs where multiplication is not so fast, and even for rotate it may not
 always be a win if the processor doesn't have a rotate instruction and
 the
 shift amount is almost always 0.


 You only handle integer operations, so checking for INTEGER_TYPE_P
 early on would make sense.


 Ah, I wasn't planning on restricting this to integers but yes, indeed, since
 it ended up that way...


 Note that some archs may dispatch to libgcc for integer operations
 so it may make sense to check whether that is the case (you can
 query optabs to check that) - if the comparison also dispatches to
 libgcc then of course the transform would be a win again.


 Ok. I am not sure about using cbranch_optab, other ideas for the comparison?


 Even on x86 TImode division uses libgcc.


 This reminds me of PR 58897 (unrelated).


 Note also that value-profiling may have created this special case in the
 first place! (gimple_divmod_fixed_value_transform)


 Test coverage must be limited if I didn't break the testsuite :-(

 value-profiling seems to set edge probabilities when it does the
 transformation, so I am checking only that.


 Otherwise I think this is a good transform.  Did you check if it triggers
 during GCC bootstrap?


 It does, a few times. Java has the obvious:

 int padLength = blockSize;
 if (length % blockSize != 0)
   padLength = blockSize - length % blockSize;

 sched-deps.c has (I didn't check if it was actually this piece of code that
 triggered or another one in the same file):

   if ((ds1  t)  !(ds2  t))
 ds |= ds1  t;
   else if (!(ds1  t)  (ds2  t))
 ds |= ds2  t;

 and we end up seeing (phiopt3) if(a!=0)ds|=a.

 And mostly bitmap.h where we see quite a bit of:

   if (_867 != 0)
 goto bb 55;
   else
 goto bb 56;

   bb 55:
   start_bit_869 = _867 * 128;

   bb 56:
   # start_bit_870 = PHI 0(54), start_bit_869(55)


 Bootstrap+testsuite on x86_64-linux-gnu (modulo a minor reformatting, I'm
 retesting anyway).

Few comments below

 2014-04-14  Marc Glisse  marc.gli...@inria.fr

 PR tree-optimization/59100
 gcc/
 * tree-ssa-phiopt.c (neutral_element_p, absorbing_element_p,
 is_cheap_stmt): New functions.

 (value_replacement): Handle conditional binary operations with a
 neutral or absorbing element.
 gcc/testsuite/
 * gcc.dg/tree-ssa/phi-opt-12.c: New file.
 * gcc.dg/tree-ssa/phi-opt-13.c: Likewise.

 --
 Marc Glisse
 Index: gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c
 ===
 --- gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c  (revision 0)
 +++ gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c  (working copy)
 @@ -0,0 +1,23 @@
 +/* { dg-do compile } */
 +/* { dg-options -O -fdump-tree-phiopt1 } */
 +
 +int f(int a, int b, int c) {
 +  if (c  5) return c;
 +  if (a == 0) return b;
 +  return a + b;
 +}
 +
 +unsigned rot(unsigned x, int n) {
 +  const int bits = __CHAR_BIT__ * __SIZEOF_INT__;
 +  return (n == 0) ? x : ((x  n) | (x  (bits - n)));
 +}
 +
 +unsigned m(unsigned a, unsigned b) {
 +  if (a == 0)
 +return 0;
 +  else
 +return a  b;
 +}
 +
 +/* { dg-final { scan-tree-dump-times goto 2 phiopt1 } } */
 +/* { dg-final { cleanup-tree-dump phiopt1 } } */

 Property changes on: gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c
 ___
 Added: svn:keywords
 ## -0,0 +1 ##
 +Author Date Id Revision URL
 \ No newline at end of property
 Added: svn:eol-style
 ## -0,0 +1 ##
 +native
 \ No newline at end of property

Try to avoid this

 Index: gcc/testsuite/gcc.dg/tree-ssa/phi-opt-13.c
 ===
 --- gcc/testsuite/gcc.dg/tree-ssa/phi-opt-13.c  (revision 0)
 +++ gcc/testsuite/gcc.dg/tree-ssa/phi-opt-13.c  (working copy)
 @@ -0,0 +1,19 @@
 +/* { dg-do compile { target x86_64-*-* } } */
 +/* { dg-options -O2 -fdump-tree-optimized } */
 +
 +int f(int a, int b) {
 +  if 

Re: Optimize n?rotate(x,n):x

2014-04-14 Thread Marc Glisse

On Mon, 14 Apr 2014, Richard Biener wrote:


+  /* If the special case has a high probability, keep it.  */
+  if (EDGE_PRED (middle_bb, 0)-probability  PROB_EVEN)


I suppose Honza has a comment on how to test this properly
(not sure if -probability or -frequency is always initialized properly).
for example single_likely_edge tests profile_status_for_fn !=
PROFILE_ABSENT (and uses a fixed probability value ...).
Anyway, the comparison looks backwards to me, but maybe I'm
missing sth - I'd use = PROB_LIKELY ;)


Maybe the comment is confusing? middle_bb contains the expensive operation 
(say a/b) that the special case skips entirely. If the division happens in 
less than 50% of cases (that's the proba of the edge going from cond to 
middle_bb), then doing the comparison+jump may be cheaper and I abort the 
optimization. At least the testcase with __builtin_expect should prove 
that I didn't do it backwards.


value-prof seems to use 50% as the cut-off where it may become interesting 
to special case division, hence my choice of PROB_EVEN. I am not sure 
which way you want to use PROB_LIKELY (80%). If we have more than 80% 
chances of executing the division, always perform it? Or if we have more 
than 80% chances of skipping the division, keep the branch?


Attached is the latest version (passed the testsuite).Index: gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c
===
--- gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c  (revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c  (working copy)
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options -O -fdump-tree-phiopt1 } */
+
+int f(int a, int b, int c) {
+  if (c  5) return c;
+  if (a == 0) return b;
+  return a + b;
+}
+
+unsigned rot(unsigned x, int n) {
+  const int bits = __CHAR_BIT__ * __SIZEOF_INT__;
+  return (n == 0) ? x : ((x  n) | (x  (bits - n)));
+}
+
+unsigned m(unsigned a, unsigned b) {
+  if (a == 0)
+return 0;
+  else
+return a  b;
+}
+
+/* { dg-final { scan-tree-dump-times goto 2 phiopt1 } } */
+/* { dg-final { cleanup-tree-dump phiopt1 } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/phi-opt-13.c
===
--- gcc/testsuite/gcc.dg/tree-ssa/phi-opt-13.c  (revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/phi-opt-13.c  (working copy)
@@ -0,0 +1,19 @@
+/* { dg-do compile { target x86_64-*-* } } */
+/* { dg-options -O2 -fdump-tree-optimized } */
+
+int f(int a, int b) {
+  if (__builtin_expect(a == 0, 1)) return b;
+  return a + b;
+}
+
+// optab_handler can handle if(b==1) but not a/b
+// so we consider a/b too expensive.
+unsigned __int128 g(unsigned __int128 a, unsigned __int128 b) {
+  if (b == 1)
+return a;
+  else
+return a / b;
+}
+
+/* { dg-final { scan-tree-dump-times goto  4 optimized } } */
+/* { dg-final { cleanup-tree-dump optimized } } */
Index: gcc/tree-ssa-phiopt.c
===
--- gcc/tree-ssa-phiopt.c   (revision 209353)
+++ gcc/tree-ssa-phiopt.c   (working copy)
@@ -140,20 +140,37 @@ static bool gate_hoist_loads (void);
x = PHI (CONST, a)
 
Gets replaced with:
  bb0:
  bb2:
t1 = a == CONST;
t2 = b  c;
t3 = t1  t2;
x = a;
 
+
+   It also replaces
+
+ bb0:
+   if (a != 0) goto bb1; else goto bb2;
+ bb1:
+   c = a + b;
+ bb2:
+   x = PHI c (bb1), b (bb0), ...;
+
+   with
+
+ bb0:
+   c = a + b;
+ bb2:
+   x = PHI c (bb0), ...;
+
ABS Replacement
---
 
This transformation, implemented in abs_replacement, replaces
 
  bb0:
if (a = 0) goto bb2; else goto bb1;
  bb1:
x = -a;
  bb2:
@@ -809,20 +826,103 @@ operand_equal_for_value_replacement (con
   if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp))
 return true;
 
   tmp = gimple_assign_rhs2 (def);
   if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp))
 return true;
 
   return false;
 }
 
+/* Returns true if ARG is a neutral element for operation CODE
+   on the RIGHT side.  */
+
+static bool
+neutral_element_p (tree_code code, tree arg, bool right)
+{
+  switch (code)
+{
+case PLUS_EXPR:
+case BIT_IOR_EXPR:
+case BIT_XOR_EXPR:
+  return integer_zerop (arg);
+
+case LROTATE_EXPR:
+case RROTATE_EXPR:
+case LSHIFT_EXPR:
+case RSHIFT_EXPR:
+case MINUS_EXPR:
+case POINTER_PLUS_EXPR:
+  return right  integer_zerop (arg);
+
+case MULT_EXPR:
+  return integer_onep (arg);
+
+case TRUNC_DIV_EXPR:
+case CEIL_DIV_EXPR:
+case FLOOR_DIV_EXPR:
+case ROUND_DIV_EXPR:
+case EXACT_DIV_EXPR:
+  return right  integer_onep (arg);
+
+case BIT_AND_EXPR:
+  return integer_all_onesp (arg);
+
+default:
+  return false;
+}
+}
+
+/* Returns true if ARG is an absorbing element for operation CODE.  */
+
+static bool
+absorbing_element_p (tree_code code, 

Re: Optimize n?rotate(x,n):x

2014-04-13 Thread Marc Glisse

On Mon, 3 Mar 2014, Richard Biener wrote:


On Sat, Mar 1, 2014 at 3:33 PM, Marc Glisse marc.gli...@inria.fr wrote:

Hello,

again, a stage 1 patch that I will ping then, but early comments are
welcome.

PR 59100 was asking to transform n?rotate(x,n):x to rotate(x,n) (because it
can be hard to write a strictly valid rotate in plain C). The operation is
really:
(x != neutral) ? x op y : y
where neutral is such that (neutral op y) is always y, so that's what I
implemented (and absorbing elements while I was at it).

For some operations on some platforms, the transformation may not be such a
good idea, in particular if division is very slow and b is 1 most of the
time, then computing a/b may be slower than (b!=1)?a/b:a. The easiest might
be to comment out those operations in the switch for now. I think divisions
are the only ones slow enough to deserve this, though there certainly are
CPUs where multiplication is not so fast, and even for rotate it may not
always be a win if the processor doesn't have a rotate instruction and the
shift amount is almost always 0.


You only handle integer operations, so checking for INTEGER_TYPE_P
early on would make sense.


Ah, I wasn't planning on restricting this to integers but yes, indeed, 
since it ended up that way...



Note that some archs may dispatch to libgcc for integer operations
so it may make sense to check whether that is the case (you can
query optabs to check that) - if the comparison also dispatches to
libgcc then of course the transform would be a win again.


Ok. I am not sure about using cbranch_optab, other ideas for the 
comparison?



Even on x86 TImode division uses libgcc.


This reminds me of PR 58897 (unrelated).

Note also that value-profiling may have created this special case in the 
first place! (gimple_divmod_fixed_value_transform)


Test coverage must be limited if I didn't break the testsuite :-(

value-profiling seems to set edge probabilities when it does the 
transformation, so I am checking only that.



Otherwise I think this is a good transform.  Did you check if it triggers
during GCC bootstrap?


It does, a few times. Java has the obvious:

int padLength = blockSize;
if (length % blockSize != 0)
  padLength = blockSize - length % blockSize;

sched-deps.c has (I didn't check if it was actually this piece of code 
that triggered or another one in the same file):


  if ((ds1  t)  !(ds2  t))
ds |= ds1  t;
  else if (!(ds1  t)  (ds2  t))
ds |= ds2  t;

and we end up seeing (phiopt3) if(a!=0)ds|=a.

And mostly bitmap.h where we see quite a bit of:

  if (_867 != 0)
goto bb 55;
  else
goto bb 56;

  bb 55:
  start_bit_869 = _867 * 128;

  bb 56:
  # start_bit_870 = PHI 0(54), start_bit_869(55)


Bootstrap+testsuite on x86_64-linux-gnu (modulo a minor reformatting, I'm 
retesting anyway).


2014-04-14  Marc Glisse  marc.gli...@inria.fr

PR tree-optimization/59100
gcc/
* tree-ssa-phiopt.c (neutral_element_p, absorbing_element_p,
is_cheap_stmt): New functions.
(value_replacement): Handle conditional binary operations with a
neutral or absorbing element.
gcc/testsuite/
* gcc.dg/tree-ssa/phi-opt-12.c: New file.
* gcc.dg/tree-ssa/phi-opt-13.c: Likewise.

--
Marc GlisseIndex: gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c
===
--- gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c  (revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c  (working copy)
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options -O -fdump-tree-phiopt1 } */
+
+int f(int a, int b, int c) {
+  if (c  5) return c;
+  if (a == 0) return b;
+  return a + b;
+}
+
+unsigned rot(unsigned x, int n) {
+  const int bits = __CHAR_BIT__ * __SIZEOF_INT__;
+  return (n == 0) ? x : ((x  n) | (x  (bits - n)));
+}
+
+unsigned m(unsigned a, unsigned b) {
+  if (a == 0)
+return 0;
+  else
+return a  b;
+}
+
+/* { dg-final { scan-tree-dump-times goto 2 phiopt1 } } */
+/* { dg-final { cleanup-tree-dump phiopt1 } } */

Property changes on: gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c
___
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Revision URL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: gcc/testsuite/gcc.dg/tree-ssa/phi-opt-13.c
===
--- gcc/testsuite/gcc.dg/tree-ssa/phi-opt-13.c  (revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/phi-opt-13.c  (working copy)
@@ -0,0 +1,19 @@
+/* { dg-do compile { target x86_64-*-* } } */
+/* { dg-options -O2 -fdump-tree-optimized } */
+
+int f(int a, int b) {
+  if (__builtin_expect(a == 0, 1)) return b;
+  return a + b;
+}
+
+// optab_handler can handle if(b==1) but not a/b
+// so we consider a/b too expensive.
+unsigned __int128 g(unsigned __int128 a, unsigned __int128 b) {
+  if (b == 1)
+

Re: Optimize n?rotate(x,n):x

2014-03-03 Thread Richard Biener
On Sat, Mar 1, 2014 at 3:33 PM, Marc Glisse marc.gli...@inria.fr wrote:
 Hello,

 again, a stage 1 patch that I will ping then, but early comments are
 welcome.

 PR 59100 was asking to transform n?rotate(x,n):x to rotate(x,n) (because it
 can be hard to write a strictly valid rotate in plain C). The operation is
 really:
 (x != neutral) ? x op y : y
 where neutral is such that (neutral op y) is always y, so that's what I
 implemented (and absorbing elements while I was at it).

 For some operations on some platforms, the transformation may not be such a
 good idea, in particular if division is very slow and b is 1 most of the
 time, then computing a/b may be slower than (b!=1)?a/b:a. The easiest might
 be to comment out those operations in the switch for now. I think divisions
 are the only ones slow enough to deserve this, though there certainly are
 CPUs where multiplication is not so fast, and even for rotate it may not
 always be a win if the processor doesn't have a rotate instruction and the
 shift amount is almost always 0.

You only handle integer operations, so checking for INTEGER_TYPE_P
early on would make sense.

Note that some archs may dispatch to libgcc for integer operations
so it may make sense to check whether that is the case (you can
query optabs to check that) - if the comparison also dispatches to
libgcc then of course the transform would be a win again.  Even on
x86 TImode division uses libgcc.  Note also that value-profiling
may have created this special case in the first place!
(gimple_divmod_fixed_value_transform)

Otherwise I think this is a good transform.  Did you check if it triggers
during GCC bootstrap?

Thanks,
Richard.

 Passes bootstrap+testsuite on x86_64-linux-gnu.

 2014-03-01  Marc Glisse  marc.gli...@inria.fr

 PR tree-optimization/59100
 gcc/
 * tree-ssa-phiopt.c (neutral_element_p, absorbing_element_p): New
 functions.
 (value_replacement): Handle conditional binary operations with a
 neutral or absorbing element.
 gcc/testsuite/
 * gcc.dg/tree-ssa/phi-opt-12.c: New file.

 --
 Marc Glisse
 Index: gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c
 ===
 --- gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c  (revision 0)
 +++ gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c  (working copy)
 @@ -0,0 +1,23 @@
 +/* { dg-do compile } */
 +/* { dg-options -O -fdump-tree-phiopt1 } */
 +
 +int f(int a, int b, int c) {
 +  if (c  5) return c;
 +  if (a == 0) return b;
 +  return a + b;
 +}
 +
 +unsigned rot(unsigned x, int n) {
 +  const int bits = __CHAR_BIT__ * __SIZEOF_INT__;
 +  return (n == 0) ? x : ((x  n) | (x  (bits - n)));
 +}
 +
 +unsigned m(unsigned a, unsigned b) {
 +  if (a == 0)
 +return 0;
 +  else
 +return a  b;
 +}
 +
 +/* { dg-final { scan-tree-dump-times goto 2 phiopt1 } } */
 +/* { dg-final { cleanup-tree-dump phiopt1 } } */

 Property changes on: gcc/testsuite/gcc.dg/tree-ssa/phi-opt-12.c
 ___
 Added: svn:eol-style
 ## -0,0 +1 ##
 +native
 \ No newline at end of property
 Added: svn:keywords
 ## -0,0 +1 ##
 +Author Date Id Revision URL
 \ No newline at end of property
 Index: gcc/tree-ssa-phiopt.c
 ===
 --- gcc/tree-ssa-phiopt.c   (revision 208241)
 +++ gcc/tree-ssa-phiopt.c   (working copy)
 @@ -140,20 +140,37 @@ static bool gate_hoist_loads (void);
 x = PHI (CONST, a)

 Gets replaced with:
   bb0:
   bb2:
 t1 = a == CONST;
 t2 = b  c;
 t3 = t1  t2;
 x = a;

 +
 +   It also replaces
 +
 + bb0:
 +   if (a != 0) goto bb1; else goto bb2;
 + bb1:
 +   c = a + b;
 + bb2:
 +   x = PHI c (bb1), b (bb0), ...;
 +
 +   with
 +
 + bb0:
 +   c = a + b;
 + bb2:
 +   x = PHI c (bb0), ...;
 +
 ABS Replacement
 ---

 This transformation, implemented in abs_replacement, replaces

   bb0:
 if (a = 0) goto bb2; else goto bb1;
   bb1:
 x = -a;
   bb2:
 @@ -809,20 +826,79 @@ operand_equal_for_value_replacement (con
if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp))
  return true;

tmp = gimple_assign_rhs2 (def);
if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp))
  return true;

return false;
  }

 +/* Returns true if ARG is a neutral element for operation CODE
 +   on the RIGHT side.  */
 +
 +static bool
 +neutral_element_p (tree_code code, tree arg, bool right)
 +{
 +  switch (code)
 +{
 +case PLUS_EXPR:
 +case BIT_IOR_EXPR:
 +case BIT_XOR_EXPR:
 +  return integer_zerop (arg);
 +
 +case LROTATE_EXPR:
 +case RROTATE_EXPR:
 +case LSHIFT_EXPR:
 +case RSHIFT_EXPR:
 +case MINUS_EXPR:
 +case POINTER_PLUS_EXPR:
 +  return right  integer_zerop (arg);
 +
 +case MULT_EXPR:
 +  return integer_onep (arg);
 +