Richard Biener <rguent...@suse.de> writes: > On Mon, 18 Oct 2021, Jiufu Guo wrote: > >> With reference the discussions in: >> https://gcc.gnu.org/pipermail/gcc-patches/2021-July/574334.html >> https://gcc.gnu.org/pipermail/gcc-patches/2021-June/572006.html >> https://gcc.gnu.org/pipermail/gcc-patches/2021-September/578672.html >> >> Base on the patches in above discussion, we may draft a patch to fix the >> issue. >> >> In this patch, to make sure it is ok to change '{b0,s0} op {b1,s1}' to >> '{b0,s0-s1} op {b1,0}', we also compute the condition which could assume >> both 2 ivs are not overflow/wrap: the niter "of '{b0,s0-s1} op {b1,0}'" >> < the niter "of untill wrap for iv0 or iv1". >> >> Does this patch make sense? > > Hum, the patch is mightly complex :/ I'm not sure we can throw > artficial IVs at number_of_iterations_cond and expect a meaningful > result. > > ISTR the problem is with number_of_iterations_ne[_max], but I would > have to go and dig in myself again for a full recap of the problem. > I did plan to do that, but not before stage3 starts. > > Thanks, > Richard.
Hi Richard, Thanks for your comment! It is really complex, using artificial IVs and recursively calling number_of_iterations_cond. We may use a simpler way. Not sure if you had started to dig into the problem. I refined a patch. Hope this patch is helpful. This patch enhances the conditions in some aspects. Attached are two test cases that could be handled. --- gcc/tree-ssa-loop-niter.c | 92 +++++++++++++++---- .../gcc.c-torture/execute/pr100740.c | 11 +++ gcc/testsuite/gcc.dg/vect/pr102131.c | 47 ++++++++++ 3 files changed, 134 insertions(+), 16 deletions(-) create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr100740.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr102131.c diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c index 06954e437f5..ee1d7293c5c 100644 --- a/gcc/tree-ssa-loop-niter.c +++ b/gcc/tree-ssa-loop-niter.c @@ -1788,6 +1788,70 @@ dump_affine_iv (FILE *file, affine_iv *iv) } } +/* Generate expr: (HIGH - LOW) / STEP, under UTYPE. */ + +static tree +get_step_count (tree high, tree low, tree step, tree utype, + bool end_inclusive = false) +{ + tree delta = fold_build2 (MINUS_EXPR, TREE_TYPE (low), high, low); + delta = fold_convert (utype,delta); + if (end_inclusive) + delta = fold_build2 (PLUS_EXPR, utype, delta, build_one_cst (utype)); + + if (tree_int_cst_sign_bit (step)) + step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step); + step = fold_convert (utype, step); + + return fold_build2 (FLOOR_DIV_EXPR, utype, delta, step); +} + +/* Get the additional assumption if both two steps are not zero. + Assumptions satisfy that there is no overflow or wrap during + v0 and v1 chasing. */ + +static tree +extra_iv_chase_assumption (affine_iv *iv0, affine_iv *iv1, tree step, + enum tree_code code) +{ + /* No need additional assumptions. */ + if (code == NE_EXPR) + return boolean_true_node; + + /* it not safe to transform {b0, 1} < {b1, 2}. */ + if (tree_int_cst_sign_bit (step)) + return boolean_false_node; + + /* No need addition assumption for pointer. */ + tree type = TREE_TYPE (iv0->base); + if (POINTER_TYPE_P (type)) + return boolean_true_node; + + bool positive0 = !tree_int_cst_sign_bit (iv0->step); + bool positive1 = !tree_int_cst_sign_bit (iv1->step); + bool positive = !tree_int_cst_sign_bit (step); + tree utype = unsigned_type_for (type); + bool add1 = code == LE_EXPR; + tree niter = positive + ? get_step_count (iv1->base, iv0->base, step, utype, add1) + : get_step_count (iv0->base, iv1->base, step, utype, add1); + + int prec = TYPE_PRECISION (type); + signop sgn = TYPE_SIGN (type); + tree max = wide_int_to_tree (type, wi::max_value (prec, sgn)); + tree min = wide_int_to_tree (type, wi::min_value (prec, sgn)); + tree valid_niter0, valid_niter1; + + valid_niter0 = positive0 ? get_step_count (max, iv0->base, iv0->step, utype) + : get_step_count (iv0->base, min, iv0->step, utype); + valid_niter1 = positive1 ? get_step_count (max, iv1->base, iv1->step, utype) + : get_step_count (iv1->base, min, iv1->step, utype); + + tree e0 = fold_build2 (LT_EXPR, boolean_type_node, niter, valid_niter0); + tree e1 = fold_build2 (LT_EXPR, boolean_type_node, niter, valid_niter1); + return fold_build2 (TRUTH_AND_EXPR, boolean_type_node, e0, e1); +} + /* Determine the number of iterations according to condition (for staying inside loop) which compares two induction variables using comparison operator CODE. The induction variable on left side of the comparison @@ -1879,30 +1943,26 @@ number_of_iterations_cond (class loop *loop, {iv0.base, iv0.step - iv1.step} cmp_code {iv1.base, 0} provided that either below condition is satisfied: + a. iv0.step and iv1.step are integer. + b. Additional condition: before iv0 chase up v1, iv0 and iv1 should not + step over min or max of the type. */ - a) the test is NE_EXPR; - b) iv0.step - iv1.step is integer and iv0/iv1 don't overflow. - - This rarely occurs in practice, but it is simple enough to manage. */ if (!integer_zerop (iv0->step) && !integer_zerop (iv1->step)) { + if (TREE_CODE (iv0->step) != INTEGER_CST + || TREE_CODE (iv1->step) != INTEGER_CST) + return false; + tree step_type = POINTER_TYPE_P (type) ? sizetype : type; - tree step = fold_binary_to_constant (MINUS_EXPR, step_type, - iv0->step, iv1->step); - - /* No need to check sign of the new step since below code takes care - of this well. */ - if (code != NE_EXPR - && (TREE_CODE (step) != INTEGER_CST - || !iv0->no_overflow || !iv1->no_overflow)) + tree step + = fold_binary_to_constant (MINUS_EXPR, step_type, iv0->step, iv1->step); + + niter->assumptions = extra_iv_chase_assumption (iv0, iv1, step, code); + if (integer_zerop (niter->assumptions)) return false; iv0->step = step; - if (!POINTER_TYPE_P (type)) - iv0->no_overflow = false; - iv1->step = build_int_cst (step_type, 0); - iv1->no_overflow = true; } /* If the result of the comparison is a constant, the loop is weird. More diff --git a/gcc/testsuite/gcc.c-torture/execute/pr100740.c b/gcc/testsuite/gcc.c-torture/execute/pr100740.c new file mode 100644 index 00000000000..8fcdaffef3b --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/execute/pr100740.c @@ -0,0 +1,11 @@ +/* PR tree-optimization/100740 */ + +unsigned a, b; +int main() { + unsigned c = 0; + for (a = 0; a < 2; a++) + for (b = 0; b < 2; b++) + if (++c < a) + __builtin_abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/vect/pr102131.c b/gcc/testsuite/gcc.dg/vect/pr102131.c new file mode 100644 index 00000000000..23975cfeadb --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr102131.c @@ -0,0 +1,47 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-additional-options "-O3" } */ +#define MAX ((unsigned int) 0xffffffff) +#define MIN ((unsigned int) (0)) + +int arr[512]; + +#define FUNC(NAME, CODE, S0, S1) \ + unsigned __attribute__ ((noinline)) NAME (unsigned int b0, unsigned int b1) \ + { \ + unsigned int n = 0; \ + unsigned int i0, i1; \ + int *p = arr; \ + for (i0 = b0, i1 = b1; i0 CODE i1; i0 += S0, i1 += S1) \ + { \ + n++; \ + *p++ = i0 + i1; \ + } \ + return n; \ + } + +FUNC (lt_5_1, <, 5, 1); +FUNC (le_1_m5, <=, 1, -5); +FUNC (lt_1_10, <, 1, 10); + +int +main () +{ + int fail = 0; + if (lt_5_1 (MAX - 124, MAX - 27) != 28) + fail++; + + /* to save time, do not run this. */ + /* + if (le_1_m5 (MIN + 1, MIN + 9) != 715827885) + fail++; */ + + if (lt_1_10 (MAX - 1000, MAX - 500) != 51) + fail++; + + if (fail) + __builtin_abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */ -- 2.17.1 > > >> BR, >> Jiufu Guo >> >> gcc/ChangeLog: >> >> PR tree-optimization/100740 >> * tree-ssa-loop-niter.c (number_of_iterations_cond): Add >> assume condition for combining of two IVs >> >> gcc/testsuite/ChangeLog: >> >> * gcc.c-torture/execute/pr100740.c: New test. >> --- >> gcc/tree-ssa-loop-niter.c | 103 +++++++++++++++--- >> .../gcc.c-torture/execute/pr100740.c | 11 ++ >> 2 files changed, 99 insertions(+), 15 deletions(-) >> create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr100740.c >> >> diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c >> index 75109407124..f2987a4448d 100644 >> --- a/gcc/tree-ssa-loop-niter.c >> +++ b/gcc/tree-ssa-loop-niter.c >> @@ -1863,29 +1863,102 @@ number_of_iterations_cond (class loop *loop, >> >> provided that either below condition is satisfied: >> >> - a) the test is NE_EXPR; >> - b) iv0.step - iv1.step is integer and iv0/iv1 don't overflow. >> + a) iv0.step - iv1.step is integer and iv0/iv1 don't overflow. >> + b) assumptions in below table also need to be satisfied. >> + >> + | iv0 | iv1 | assum (iv0<iv1) | assum (iv0!=iv1) | >> + |---------+---------+---------------------+---------------------| >> + | (b0,2) | (b1,1) | before iv1 overflow | before iv1 overflow | >> + | (b0,2) | (b1,-1) | true | true | >> + | (b0,-1) | (b1,-2) | before iv0 overflow | before iv0 overflow | >> + | | | | | >> + | (b0,1) | (b1,2) | false | before iv0 overflow | >> + | (b0,-1) | (b1,2) | false | true | >> + | (b0,-2) | (b1,-1) | false | before iv1 overflow | >> + 'true' in above table means no need additional condition. >> + 'false' means this case can not satify the transform. >> + The first three rows: iv0->step > iv1->step; >> + The second three rows: iv0->step < iv1->step. >> >> This rarely occurs in practice, but it is simple enough to manage. */ >> if (!integer_zerop (iv0->step) && !integer_zerop (iv1->step)) >> { >> + if (TREE_CODE (iv0->step) != INTEGER_CST >> + || TREE_CODE (iv1->step) != INTEGER_CST) >> + return false; >> + if (!iv0->no_overflow || !iv1->no_overflow) >> + return false; >> + >> tree step_type = POINTER_TYPE_P (type) ? sizetype : type; >> - tree step = fold_binary_to_constant (MINUS_EXPR, step_type, >> - iv0->step, iv1->step); >> - >> - /* No need to check sign of the new step since below code takes care >> - of this well. */ >> - if (code != NE_EXPR >> - && (TREE_CODE (step) != INTEGER_CST >> - || !iv0->no_overflow || !iv1->no_overflow)) >> + tree step >> + = fold_binary_to_constant (MINUS_EXPR, step_type, iv0->step, iv1->step); >> + >> + if (code != NE_EXPR && tree_int_cst_sign_bit (step)) >> return false; >> >> - iv0->step = step; >> - if (!POINTER_TYPE_P (type)) >> - iv0->no_overflow = false; >> + bool positive0 = !tree_int_cst_sign_bit (iv0->step); >> + bool positive1 = !tree_int_cst_sign_bit (iv1->step); >> >> - iv1->step = build_int_cst (step_type, 0); >> - iv1->no_overflow = true; >> + /* Cases in rows 2 and 4 of above table. */ >> + if ((positive0 && !positive1) || (!positive0 && positive1)) >> + { >> + iv0->step = step; >> + iv1->step = build_int_cst (step_type, 0); >> + return number_of_iterations_cond (loop, type, iv0, code, iv1, >> + niter, only_exit, every_iteration); >> + } >> + >> + affine_iv i_0, i_1; >> + class tree_niter_desc num; >> + i_0 = *iv0; >> + i_1 = *iv1; >> + i_0.step = step; >> + i_1.step = build_int_cst (step_type, 0); >> + if (!number_of_iterations_cond (loop, type, &i_0, code, &i_1, &num, >> + only_exit, every_iteration)) >> + return false; >> + >> + affine_iv i0, i1; >> + class tree_niter_desc num_wrap; >> + i0 = *iv0; >> + i1 = *iv1; >> + >> + /* Reset iv0 and iv1 to calculate the niter which cause overflow. */ >> + if (tree_int_cst_lt (i1.step, i0.step)) >> + { >> + if (positive0 && positive1) >> + i0.step = build_int_cst (step_type, 0); >> + else if (!positive0 && !positive1) >> + i1.step = build_int_cst (step_type, 0); >> + if (code == NE_EXPR) >> + code = LT_EXPR; >> + } >> + else >> + { >> + if (positive0 && positive1) >> + i1.step = build_int_cst (step_type, 0); >> + else if (!positive0 && !positive1) >> + i0.step = build_int_cst (step_type, 0); >> + gcc_assert (code == NE_EXPR); >> + code = GT_EXPR; >> + } >> + >> + /* Calculate the niter which cause overflow. */ >> + if (!number_of_iterations_cond (loop, type, &i0, code, &i1, &num_wrap, >> + only_exit, every_iteration)) >> + return false; >> + >> + /* Make assumption there is no overflow. */ >> + tree assum >> + = fold_build2 (LE_EXPR, boolean_type_node, num.niter, >> + fold_convert (TREE_TYPE (num.niter), num_wrap.niter)); >> + num.assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, >> + num.assumptions, assum); >> + >> + *iv0 = i_0; >> + *iv1 = i_1; >> + *niter = num; >> + return true; >> } >> >> /* If the result of the comparison is a constant, the loop is weird. >> More >> diff --git a/gcc/testsuite/gcc.c-torture/execute/pr100740.c >> b/gcc/testsuite/gcc.c-torture/execute/pr100740.c >> new file mode 100644 >> index 00000000000..8fcdaffef3b >> --- /dev/null >> +++ b/gcc/testsuite/gcc.c-torture/execute/pr100740.c >> @@ -0,0 +1,11 @@ >> +/* PR tree-optimization/100740 */ >> + >> +unsigned a, b; >> +int main() { >> + unsigned c = 0; >> + for (a = 0; a < 2; a++) >> + for (b = 0; b < 2; b++) >> + if (++c < a) >> + __builtin_abort (); >> + return 0; >> +} >>