With reference the discussions in:
https://gcc.gnu.org/pipermail/gcc-patches/2021-July/574334.html
https://gcc.gnu.org/pipermail/gcc-patches/2021-June/572006.html
https://gcc.gnu.org/pipermail/gcc-patches/2021-September/578672.html
Base on the patches in above discussion, we may draft a patch to fix
the
issue.
In this patch, to make sure it is ok to change '{b0,s0} op {b1,s1}' to
'{b0,s0-s1} op {b1,0}', we also compute the condition which could
assume
both 2 ivs are not overflow/wrap: the niter "of '{b0,s0-s1} op {b1,0}'"
< the niter "of untill wrap for iv0 or iv1".
Does this patch make sense?
BR,
Jiufu Guo
gcc/ChangeLog:
PR tree-optimization/100740
* tree-ssa-loop-niter.c (number_of_iterations_cond): Add
assume condition for combining of two IVs
gcc/testsuite/ChangeLog:
* gcc.c-torture/execute/pr100740.c: New test.
---
gcc/tree-ssa-loop-niter.c | 103 +++++++++++++++---
.../gcc.c-torture/execute/pr100740.c | 11 ++
2 files changed, 99 insertions(+), 15 deletions(-)
create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr100740.c
diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
index 75109407124..f2987a4448d 100644
--- a/gcc/tree-ssa-loop-niter.c
+++ b/gcc/tree-ssa-loop-niter.c
@@ -1863,29 +1863,102 @@ number_of_iterations_cond (class loop *loop,
provided that either below condition is satisfied:
- a) the test is NE_EXPR;
- b) iv0.step - iv1.step is integer and iv0/iv1 don't overflow.
+ a) iv0.step - iv1.step is integer and iv0/iv1 don't overflow.
+ b) assumptions in below table also need to be satisfied.
+
+ | iv0 | iv1 | assum (iv0<iv1) | assum (iv0!=iv1) |
+ |---------+---------+---------------------+---------------------|
+ | (b0,2) | (b1,1) | before iv1 overflow | before iv1 overflow |
+ | (b0,2) | (b1,-1) | true | true |
+ | (b0,-1) | (b1,-2) | before iv0 overflow | before iv0 overflow |
+ | | | | |
+ | (b0,1) | (b1,2) | false | before iv0 overflow |
+ | (b0,-1) | (b1,2) | false | true |
+ | (b0,-2) | (b1,-1) | false | before iv1 overflow |
+ 'true' in above table means no need additional condition.
+ 'false' means this case can not satify the transform.
+ The first three rows: iv0->step > iv1->step;
+ The second three rows: iv0->step < iv1->step.
This rarely occurs in practice, but it is simple enough to
manage. */
if (!integer_zerop (iv0->step) && !integer_zerop (iv1->step))
{
+ if (TREE_CODE (iv0->step) != INTEGER_CST
+ || TREE_CODE (iv1->step) != INTEGER_CST)
+ return false;
+ if (!iv0->no_overflow || !iv1->no_overflow)
+ return false;
+
tree step_type = POINTER_TYPE_P (type) ? sizetype : type;
- tree step = fold_binary_to_constant (MINUS_EXPR, step_type,
- iv0->step, iv1->step);
-
- /* No need to check sign of the new step since below code takes
care
- of this well. */
- if (code != NE_EXPR
- && (TREE_CODE (step) != INTEGER_CST
- || !iv0->no_overflow || !iv1->no_overflow))
+ tree step
+ = fold_binary_to_constant (MINUS_EXPR, step_type, iv0->step,
iv1->step);
+
+ if (code != NE_EXPR && tree_int_cst_sign_bit (step))
return false;
- iv0->step = step;
- if (!POINTER_TYPE_P (type))
- iv0->no_overflow = false;
+ bool positive0 = !tree_int_cst_sign_bit (iv0->step);
+ bool positive1 = !tree_int_cst_sign_bit (iv1->step);
- iv1->step = build_int_cst (step_type, 0);
- iv1->no_overflow = true;
+ /* Cases in rows 2 and 4 of above table. */
+ if ((positive0 && !positive1) || (!positive0 && positive1))
+ {
+ iv0->step = step;
+ iv1->step = build_int_cst (step_type, 0);
+ return number_of_iterations_cond (loop, type, iv0, code, iv1,
+ niter, only_exit, every_iteration);
+ }
+
+ affine_iv i_0, i_1;
+ class tree_niter_desc num;
+ i_0 = *iv0;
+ i_1 = *iv1;
+ i_0.step = step;
+ i_1.step = build_int_cst (step_type, 0);
+ if (!number_of_iterations_cond (loop, type, &i_0, code, &i_1,
&num,
+ only_exit, every_iteration))
+ return false;
+
+ affine_iv i0, i1;
+ class tree_niter_desc num_wrap;
+ i0 = *iv0;
+ i1 = *iv1;
+
+ /* Reset iv0 and iv1 to calculate the niter which cause
overflow. */
+ if (tree_int_cst_lt (i1.step, i0.step))
+ {
+ if (positive0 && positive1)
+ i0.step = build_int_cst (step_type, 0);
+ else if (!positive0 && !positive1)
+ i1.step = build_int_cst (step_type, 0);
+ if (code == NE_EXPR)
+ code = LT_EXPR;
+ }
+ else
+ {
+ if (positive0 && positive1)
+ i1.step = build_int_cst (step_type, 0);
+ else if (!positive0 && !positive1)
+ i0.step = build_int_cst (step_type, 0);
+ gcc_assert (code == NE_EXPR);
+ code = GT_EXPR;
+ }
+
+ /* Calculate the niter which cause overflow. */
+ if (!number_of_iterations_cond (loop, type, &i0, code, &i1,
&num_wrap,
+ only_exit, every_iteration))
+ return false;
+
+ /* Make assumption there is no overflow. */
+ tree assum
+ = fold_build2 (LE_EXPR, boolean_type_node, num.niter,
+ fold_convert (TREE_TYPE (num.niter), num_wrap.niter));
+ num.assumptions = fold_build2 (TRUTH_AND_EXPR,
boolean_type_node,
+ num.assumptions, assum);
+
+ *iv0 = i_0;
+ *iv1 = i_1;
+ *niter = num;
+ return true;
}
/* If the result of the comparison is a constant, the loop is
weird. More
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr100740.c
b/gcc/testsuite/gcc.c-torture/execute/pr100740.c
new file mode 100644
index 00000000000..8fcdaffef3b
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr100740.c
@@ -0,0 +1,11 @@
+/* PR tree-optimization/100740 */
+
+unsigned a, b;
+int main() {
+ unsigned c = 0;
+ for (a = 0; a < 2; a++)
+ for (b = 0; b < 2; b++)
+ if (++c < a)
+ __builtin_abort ();
+ return 0;
+}