Re: [PATCH] MATCH: Move `a <= CST1 ? MAX : a` optimization to match
On Mon, May 8, 2023 at 12:21 AM Andrew Pinski via Gcc-patches wrote: > > This moves the `a <= CST1 ? MAX : a` optimization > from phiopt to match. It just adds a new pattern to match.pd. > > There is one more change needed before being able to remove > minmax_replacement from phiopt. > > A few notes on the testsuite changes: > * phi-opt-5.c is now able to optimize at phiopt1 so remove > the xfail. > * pr66726-4.c can be optimized during fold before phiopt1 > so need to change the scanning. > * pr66726-5.c needs two phiopt passes currently to optimize > to the right thing, it needed 2 phiopt passes before, the cast > from int to unsigned char is the reason. > * pr66726-6.c is what the original pr66726-4.c was testing > before the fold was able to optimize it. > > OK? Bootstrapped and tested on x86_64-linux-gnu. OK. > gcc/ChangeLog: > > * match.pd (`(a CMP CST1) ? max : a`): New > pattern. > > gcc/testsuite/ChangeLog: > > * gcc.dg/tree-ssa/phi-opt-5.c: Remove last xfail. > * gcc.dg/tree-ssa/pr66726-4.c: Change how scanning > works. > * gcc.dg/tree-ssa/pr66726-5.c: New test. > * gcc.dg/tree-ssa/pr66726-6.c: New test. > --- > gcc/match.pd | 18 +++ > gcc/testsuite/gcc.dg/tree-ssa/phi-opt-5.c | 2 +- > gcc/testsuite/gcc.dg/tree-ssa/pr66726-4.c | 5 +++- > gcc/testsuite/gcc.dg/tree-ssa/pr66726-5.c | 28 +++ > gcc/testsuite/gcc.dg/tree-ssa/pr66726-6.c | 17 ++ > 5 files changed, 68 insertions(+), 2 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr66726-5.c > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr66726-6.c > > diff --git a/gcc/match.pd b/gcc/match.pd > index ceae1c34abc..a55ede838cd 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -4954,6 +4954,24 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > (if (code == MAX_EXPR) >(minmax (max @1 @2) @4))) > > +/* Optimize (a CMP CST1) ? max : a */ > +(for cmp(gt ge lt le) > + minmax (min min max max) > + (simplify > + (cond (cmp @0 @1) (minmax:c@2 @0 @3) @4) > + (with > +{ > + tree_code code = minmax_from_comparison (cmp, @0, @1, @0, @4); > +} > +(if ((cmp == LT_EXPR || cmp == LE_EXPR) > +&& code == MIN_EXPR > + && integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node, @3, > @1))) > + (min @2 @4) > + (if ((cmp == GT_EXPR || cmp == GE_EXPR) > + && code == MAX_EXPR > + && integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node, @3, > @1))) > + (max @2 @4)) > + > /* X != C1 ? -X : C2 simplifies to -X when -C1 == C2. */ > (simplify > (cond (ne @0 INTEGER_CST@1) (negate@3 @0) INTEGER_CST@2) > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-5.c > b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-5.c > index 5f78a1ba6dc..e78d9d8b83d 100644 > --- a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-5.c > +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-5.c > @@ -39,7 +39,7 @@ float repl2 (float vary) > > /* phiopt1 confused by predictors. */ > /* { dg-final { scan-tree-dump "vary.*MAX_EXPR.*0\\.0" "phiopt1" } } */ > -/* { dg-final { scan-tree-dump "vary.*MIN_EXPR.*1\\.0" "phiopt1" { xfail > *-*-* } } } */ > +/* { dg-final { scan-tree-dump "vary.*MIN_EXPR.*1\\.0" "phiopt1" } } */ > /* { dg-final { scan-tree-dump "vary.*MAX_EXPR.*0\\.0" "phiopt2"} } */ > /* { dg-final { scan-tree-dump "vary.*MIN_EXPR.*1\\.0" "phiopt2"} } */ > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr66726-4.c > b/gcc/testsuite/gcc.dg/tree-ssa/pr66726-4.c > index 4e43522f3a3..930ad5fb79f 100644 > --- a/gcc/testsuite/gcc.dg/tree-ssa/pr66726-4.c > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr66726-4.c > @@ -9,4 +9,7 @@ foo (unsigned char *p, int i) >*p = SAT (i); > } > > -/* { dg-final { scan-tree-dump-times "COND_EXPR .*and PHI .*converted to > straightline code" 1 "phiopt1" } } */ > +/* fold could optimize SAT before phiopt1 so only match on the > + MIN/MAX here. */ > +/* { dg-final { scan-tree-dump-times "= MIN_EXPR" 1 "phiopt1" } } */ > +/* { dg-final { scan-tree-dump-times "= MAX_EXPR" 1 "phiopt1" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr66726-5.c > b/gcc/testsuite/gcc.dg/tree-ssa/pr66726-5.c > new file mode 100644 > index 000..4b5066cdb6b > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr66726-5.c > @@ -0,0 +1,28 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-phiopt1-details -fdump-tree-phiopt2-details > -fdump-tree-optimized" } */ > + > +#define SAT(x) (x < 0 ? 0 : (x > 255 ? 255 : x)) > + > +unsigned char > +foo (unsigned char *p, int i) > +{ > + if (i < 0) > +return 0; > + { > +int t; > +if (i > 255) > + t = 255; > +else > + t = i; > +return t; > + } > +} > + > +/* Because of the way PHIOPT works, it only does the merging of BBs after it > is done so we get the case were we can't > + optimize the above until phiopt2 right now. */ > +/* { dg-final {
[PATCH] MATCH: Move `a <= CST1 ? MAX : a` optimization to match
This moves the `a <= CST1 ? MAX : a` optimization from phiopt to match. It just adds a new pattern to match.pd. There is one more change needed before being able to remove minmax_replacement from phiopt. A few notes on the testsuite changes: * phi-opt-5.c is now able to optimize at phiopt1 so remove the xfail. * pr66726-4.c can be optimized during fold before phiopt1 so need to change the scanning. * pr66726-5.c needs two phiopt passes currently to optimize to the right thing, it needed 2 phiopt passes before, the cast from int to unsigned char is the reason. * pr66726-6.c is what the original pr66726-4.c was testing before the fold was able to optimize it. OK? Bootstrapped and tested on x86_64-linux-gnu. gcc/ChangeLog: * match.pd (`(a CMP CST1) ? max : a`): New pattern. gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/phi-opt-5.c: Remove last xfail. * gcc.dg/tree-ssa/pr66726-4.c: Change how scanning works. * gcc.dg/tree-ssa/pr66726-5.c: New test. * gcc.dg/tree-ssa/pr66726-6.c: New test. --- gcc/match.pd | 18 +++ gcc/testsuite/gcc.dg/tree-ssa/phi-opt-5.c | 2 +- gcc/testsuite/gcc.dg/tree-ssa/pr66726-4.c | 5 +++- gcc/testsuite/gcc.dg/tree-ssa/pr66726-5.c | 28 +++ gcc/testsuite/gcc.dg/tree-ssa/pr66726-6.c | 17 ++ 5 files changed, 68 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr66726-5.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr66726-6.c diff --git a/gcc/match.pd b/gcc/match.pd index ceae1c34abc..a55ede838cd 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -4954,6 +4954,24 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (code == MAX_EXPR) (minmax (max @1 @2) @4))) +/* Optimize (a CMP CST1) ? max : a */ +(for cmp(gt ge lt le) + minmax (min min max max) + (simplify + (cond (cmp @0 @1) (minmax:c@2 @0 @3) @4) + (with +{ + tree_code code = minmax_from_comparison (cmp, @0, @1, @0, @4); +} +(if ((cmp == LT_EXPR || cmp == LE_EXPR) +&& code == MIN_EXPR + && integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node, @3, @1))) + (min @2 @4) + (if ((cmp == GT_EXPR || cmp == GE_EXPR) + && code == MAX_EXPR + && integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node, @3, @1))) + (max @2 @4)) + /* X != C1 ? -X : C2 simplifies to -X when -C1 == C2. */ (simplify (cond (ne @0 INTEGER_CST@1) (negate@3 @0) INTEGER_CST@2) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-5.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-5.c index 5f78a1ba6dc..e78d9d8b83d 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-5.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-5.c @@ -39,7 +39,7 @@ float repl2 (float vary) /* phiopt1 confused by predictors. */ /* { dg-final { scan-tree-dump "vary.*MAX_EXPR.*0\\.0" "phiopt1" } } */ -/* { dg-final { scan-tree-dump "vary.*MIN_EXPR.*1\\.0" "phiopt1" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "vary.*MIN_EXPR.*1\\.0" "phiopt1" } } */ /* { dg-final { scan-tree-dump "vary.*MAX_EXPR.*0\\.0" "phiopt2"} } */ /* { dg-final { scan-tree-dump "vary.*MIN_EXPR.*1\\.0" "phiopt2"} } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr66726-4.c b/gcc/testsuite/gcc.dg/tree-ssa/pr66726-4.c index 4e43522f3a3..930ad5fb79f 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pr66726-4.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr66726-4.c @@ -9,4 +9,7 @@ foo (unsigned char *p, int i) *p = SAT (i); } -/* { dg-final { scan-tree-dump-times "COND_EXPR .*and PHI .*converted to straightline code" 1 "phiopt1" } } */ +/* fold could optimize SAT before phiopt1 so only match on the + MIN/MAX here. */ +/* { dg-final { scan-tree-dump-times "= MIN_EXPR" 1 "phiopt1" } } */ +/* { dg-final { scan-tree-dump-times "= MAX_EXPR" 1 "phiopt1" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr66726-5.c b/gcc/testsuite/gcc.dg/tree-ssa/pr66726-5.c new file mode 100644 index 000..4b5066cdb6b --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr66726-5.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-phiopt1-details -fdump-tree-phiopt2-details -fdump-tree-optimized" } */ + +#define SAT(x) (x < 0 ? 0 : (x > 255 ? 255 : x)) + +unsigned char +foo (unsigned char *p, int i) +{ + if (i < 0) +return 0; + { +int t; +if (i > 255) + t = 255; +else + t = i; +return t; + } +} + +/* Because of the way PHIOPT works, it only does the merging of BBs after it is done so we get the case were we can't + optimize the above until phiopt2 right now. */ +/* { dg-final { scan-tree-dump-times "COND_EXPR .*and PHI .*converted to straightline code" 2 "phiopt1" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "COND_EXPR .*and PHI .*converted to straightline code" 0 "phiopt2" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "= MIN_EXPR" 1 "phiopt1" } } */ +/* { dg-final {