Re: [PATCH V3] Split loop for NE condition.
On 2021-06-21 16:51, Richard Biener wrote: On Wed, 9 Jun 2021, guojiufu wrote: On 2021-06-09 17:42, guojiufu via Gcc-patches wrote: > On 2021-06-08 18:13, Richard Biener wrote: >> On Fri, 4 Jun 2021, Jiufu Guo wrote: >> > cut... >>> + gcond *cond = as_a (last); >>> + enum tree_code code = gimple_cond_code (cond); >>> + if (!(code == NE_EXPR >>> + || (code == EQ_EXPR && (e->flags & EDGE_TRUE_VALUE >> >> The NE_EXPR check misses a corresponding && (e->flags & EDGE_FALSE_VALUE) >> check. >> > Thanks, check (e->flags & EDGE_FALSE_VALUE) would be safer. > >>> + continue; >>> + >>> + /* Check if bound is invarant. */ >>> + tree idx = gimple_cond_lhs (cond); >>> + tree bnd = gimple_cond_rhs (cond); >>> + if (expr_invariant_in_loop_p (loop, idx)) >>> + std::swap (idx, bnd); >>> + else if (!expr_invariant_in_loop_p (loop, bnd)) >>> + continue; >>> + >>> + /* Only unsigned type conversion could cause wrap. */ >>> + tree type = TREE_TYPE (idx); >>> + if (!INTEGRAL_TYPE_P (type) || TREE_CODE (idx) != SSA_NAME >>> +|| !TYPE_UNSIGNED (type)) >>> + continue; >>> + >>> + /* Avoid to split if bound is MAX/MIN val. */ >>> + tree bound_type = TREE_TYPE (bnd); >>> + if (TREE_CODE (bnd) == INTEGER_CST && INTEGRAL_TYPE_P (bound_type) >>> +&& (tree_int_cst_equal (bnd, TYPE_MAX_VALUE (bound_type)) >>> +|| tree_int_cst_equal (bnd, TYPE_MIN_VALUE (bound_type >>> + continue; >> >> Note you do not require 'bnd' to be constant and thus at runtime those >> cases still need to be handled correctly. > Yes, bnd is not required to be constant. The above code is filtering the > case > where bnd is const max/min value of the type. So, the code could be updated > as: > if (tree_int_cst_equal (bnd, TYPE_MAX_VALUE (bound_type)) > || tree_int_cst_equal (bnd, TYPE_MIN_VALUE (bound_type))) Yes, and the comment adjusted to "if bound is known to be MAX/MIN val." >> >>> + /* Check if there is possible wrap. */ >>> + class tree_niter_desc niter; >>> + if (!number_of_iterations_exit (loop, e, , false, false)) > cut... >>> + >>> + /* Change if (i != n) to LOOP1:if (i > n) and LOOP2:if (i < n) */ >> >> It now occurs to me that we nowhere check the evolution of IDX >> (split_at_bb_p uses simple_iv for this for example). The transform >> assumes that we will actually hit i == n and that i increments, but >> while you check the control IV from number_of_iterations_exit >> for NE_EXPR that does not guarantee a positive evolution. >> > If I do not correctly reply your question, please point out: > number_of_iterations_exit is similar with simple_iv to invoke > simple_iv_with_niters > which check the evolution, and number_of_iterations_exit check > number_of_iterations_cond > which check no_overflow more accurate, this is one reason I use this > function. > > This transform assumes that the last run hits i==n. > Otherwise, the loop may run infinitely wrap after wrap. > For safe, if the step is 1 or -1, this assumption would be true. I > would add this check. OK. > Thanks so much for pointing out I missed the negative step! > >> Your testcases do not include any negative step examples, but I guess >> the conditions need to be swapped in this case? > > I would add cases and code to support step 1/-1. > >> >> I think you also have to consider the order we split, say with >> >> for (i = start; i != end; ++i) >> { >> push (i); >> if (a[i] != b[i]) >> break; >> } >> >> push (i) calls need to be in the same order for all cases of >> start < end, start == end and start > end (and also cover >> runtime testcases with end == 0 or end == UINT_MAX, likewise >> for start). > I add tests for the above cases. If missing sth, please point out, thanks! > >> >>> + bool inv = expr_invariant_in_loop_p (loop, gimple_cond_lhs (gc)); >>> + enum tree_code up_code = inv ? LT_EXPR : GT_EXPR; >>> + enum tree_code down_code = inv ? GT_EXPR : LT_EXPR; > cut > > Thanks again for the very helpful review! > > BR, > Jiufu Guo. Here is the updated patch, thanks for your time! diff --git a/gcc/testsuite/gcc.dg/loop-split1.c b/gcc/testsuite/gcc.dg/loop-split1.c new file mode 100644 index 000..dd2d03a7b96 --- /dev/null +++ b/gcc/testsuite/gcc.dg/loop-split1.c @@ -0,0 +1,101 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fsplit-loops -fdump-tree-lsplit-details" } */ + +void +foo (int *a, int *b, unsigned l, unsigned n) +{ + while (++l != n) +a[l] = b[l] + 1; +} +void +foo_1 (int *a, int *b, unsigned n) +{ + unsigned l = 0; + while (++l != n) +a[l] = b[l] + 1; +} + +void +foo1 (int *a, int *b, unsigned l, unsigned n) +{ + while (l++ != n) +a[l] = b[l] + 1; +} + +/* No wrap. */ +void +foo1_1 (int *a, int *b, unsigned n) +{ + unsigned l = 0; + while (l++ != n) +a[l] = b[l] + 1; +} + +unsigned +foo2 (char *a, char *b, unsigned l, unsigned n) +{ + while (++l != n) +if
Re: [PATCH V3] Split loop for NE condition.
On Wed, 9 Jun 2021, guojiufu wrote: > On 2021-06-09 17:42, guojiufu via Gcc-patches wrote: > > On 2021-06-08 18:13, Richard Biener wrote: > >> On Fri, 4 Jun 2021, Jiufu Guo wrote: > >> > > cut... > >>> + gcond *cond = as_a (last); > >>> + enum tree_code code = gimple_cond_code (cond); > >>> + if (!(code == NE_EXPR > >>> + || (code == EQ_EXPR && (e->flags & EDGE_TRUE_VALUE > >> > >> The NE_EXPR check misses a corresponding && (e->flags & EDGE_FALSE_VALUE) > >> check. > >> > > Thanks, check (e->flags & EDGE_FALSE_VALUE) would be safer. > > > >>> + continue; > >>> + > >>> + /* Check if bound is invarant. */ > >>> + tree idx = gimple_cond_lhs (cond); > >>> + tree bnd = gimple_cond_rhs (cond); > >>> + if (expr_invariant_in_loop_p (loop, idx)) > >>> + std::swap (idx, bnd); > >>> + else if (!expr_invariant_in_loop_p (loop, bnd)) > >>> + continue; > >>> + > >>> + /* Only unsigned type conversion could cause wrap. */ > >>> + tree type = TREE_TYPE (idx); > >>> + if (!INTEGRAL_TYPE_P (type) || TREE_CODE (idx) != SSA_NAME > >>> + || !TYPE_UNSIGNED (type)) > >>> + continue; > >>> + > >>> + /* Avoid to split if bound is MAX/MIN val. */ > >>> + tree bound_type = TREE_TYPE (bnd); > >>> + if (TREE_CODE (bnd) == INTEGER_CST && INTEGRAL_TYPE_P (bound_type) > >>> + && (tree_int_cst_equal (bnd, TYPE_MAX_VALUE (bound_type)) > >>> + || tree_int_cst_equal (bnd, TYPE_MIN_VALUE (bound_type > >>> + continue; > >> > >> Note you do not require 'bnd' to be constant and thus at runtime those > >> cases still need to be handled correctly. > > Yes, bnd is not required to be constant. The above code is filtering the > > case > > where bnd is const max/min value of the type. So, the code could be updated > > as: > > if (tree_int_cst_equal (bnd, TYPE_MAX_VALUE (bound_type)) > > || tree_int_cst_equal (bnd, TYPE_MIN_VALUE (bound_type))) Yes, and the comment adjusted to "if bound is known to be MAX/MIN val." > >> > >>> + /* Check if there is possible wrap. */ > >>> + class tree_niter_desc niter; > >>> + if (!number_of_iterations_exit (loop, e, , false, false)) > > cut... > >>> + > >>> + /* Change if (i != n) to LOOP1:if (i > n) and LOOP2:if (i < n) */ > >> > >> It now occurs to me that we nowhere check the evolution of IDX > >> (split_at_bb_p uses simple_iv for this for example). The transform > >> assumes that we will actually hit i == n and that i increments, but > >> while you check the control IV from number_of_iterations_exit > >> for NE_EXPR that does not guarantee a positive evolution. > >> > > If I do not correctly reply your question, please point out: > > number_of_iterations_exit is similar with simple_iv to invoke > > simple_iv_with_niters > > which check the evolution, and number_of_iterations_exit check > > number_of_iterations_cond > > which check no_overflow more accurate, this is one reason I use this > > function. > > > > This transform assumes that the last run hits i==n. > > Otherwise, the loop may run infinitely wrap after wrap. > > For safe, if the step is 1 or -1, this assumption would be true. I > > would add this check. OK. > > Thanks so much for pointing out I missed the negative step! > > > >> Your testcases do not include any negative step examples, but I guess > >> the conditions need to be swapped in this case? > > > > I would add cases and code to support step 1/-1. > > > >> > >> I think you also have to consider the order we split, say with > >> > >> for (i = start; i != end; ++i) > >> { > >> push (i); > >> if (a[i] != b[i]) > >> break; > >> } > >> > >> push (i) calls need to be in the same order for all cases of > >> start < end, start == end and start > end (and also cover > >> runtime testcases with end == 0 or end == UINT_MAX, likewise > >> for start). > > I add tests for the above cases. If missing sth, please point out, thanks! > > > >> > >>> + bool inv = expr_invariant_in_loop_p (loop, gimple_cond_lhs (gc)); > >>> + enum tree_code up_code = inv ? LT_EXPR : GT_EXPR; > >>> + enum tree_code down_code = inv ? GT_EXPR : LT_EXPR; > > cut > > > > Thanks again for the very helpful review! > > > > BR, > > Jiufu Guo. > > Here is the updated patch, thanks for your time! > > diff --git a/gcc/testsuite/gcc.dg/loop-split1.c > b/gcc/testsuite/gcc.dg/loop-split1.c > new file mode 100644 > index 000..dd2d03a7b96 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/loop-split1.c > @@ -0,0 +1,101 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fsplit-loops -fdump-tree-lsplit-details" } */ > + > +void > +foo (int *a, int *b, unsigned l, unsigned n) > +{ > + while (++l != n) > +a[l] = b[l] + 1; > +} > +void > +foo_1 (int *a, int *b, unsigned n) > +{ > + unsigned l = 0; > + while (++l != n) > +a[l] = b[l] + 1; > +} > + > +void > +foo1 (int *a, int *b, unsigned l, unsigned n) > +{ > + while (l++ != n) > +a[l] =
Re: [PATCH V3] Split loop for NE condition.
On 2021-06-09 19:18, guojiufu wrote: On 2021-06-09 17:42, guojiufu via Gcc-patches wrote: On 2021-06-08 18:13, Richard Biener wrote: On Fri, 4 Jun 2021, Jiufu Guo wrote: cut... cut... Here is the updated patch, thanks for your time! Updates: . Enhance code to support negative step. . Check step +-1 to make sure it hits loop condition != . Enhance runtime cases to check more boundary cases and run order cases. . Refine for compiling time: check loop num of insns and can_copy_bbs_p later diff --git a/gcc/testsuite/gcc.dg/loop-split1.c b/gcc/testsuite/gcc.dg/loop-split1.c new file mode 100644 index 000..dd2d03a7b96 --- /dev/null +++ b/gcc/testsuite/gcc.dg/loop-split1.c @@ -0,0 +1,101 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fsplit-loops -fdump-tree-lsplit-details" } */ + +void +foo (int *a, int *b, unsigned l, unsigned n) +{ + while (++l != n) +a[l] = b[l] + 1; +} +void +foo_1 (int *a, int *b, unsigned n) +{ + unsigned l = 0; + while (++l != n) +a[l] = b[l] + 1; +} + +void +foo1 (int *a, int *b, unsigned l, unsigned n) +{ + while (l++ != n) +a[l] = b[l] + 1; +} + +/* No wrap. */ +void +foo1_1 (int *a, int *b, unsigned n) +{ + unsigned l = 0; + while (l++ != n) +a[l] = b[l] + 1; +} + +unsigned +foo2 (char *a, char *b, unsigned l, unsigned n) +{ + while (++l != n) +if (a[l] != b[l]) + break; + + return l; +} + +unsigned +foo2_1 (char *a, char *b, unsigned l, unsigned n) +{ + l = 0; + while (++l != n) +if (a[l] != b[l]) + break; + + return l; +} + +unsigned +foo3 (char *a, char *b, unsigned l, unsigned n) +{ + while (l++ != n) +if (a[l] != b[l]) + break; + + return l; +} + +/* No wrap. */ +unsigned +foo3_1 (char *a, char *b, unsigned l, unsigned n) +{ + l = 0; + while (l++ != n) +if (a[l] != b[l]) + break; + + return l; +} + +void +bar (); +void +foo4 (unsigned n, unsigned i) +{ + do +{ + if (i == n) + return; + bar (); + ++i; +} + while (1); +} + +unsigned +find_skip_diff (char *p, char *q, unsigned n, unsigned i) +{ + while (p[i] == q[i] && ++i != n) +p++, q++; + + return i; +} + +/* { dg-final { scan-tree-dump-times "Loop split" 8 "lsplit" } } */ diff --git a/gcc/testsuite/gcc.dg/loop-split2.c b/gcc/testsuite/gcc.dg/loop-split2.c new file mode 100644 index 000..56377e2f2f5 --- /dev/null +++ b/gcc/testsuite/gcc.dg/loop-split2.c @@ -0,0 +1,155 @@ +/* { dg-do run } */ +/* { dg-options "-O3" } */ + +extern void +abort (void); +extern void +exit (int); +void +push (int); + +#define NI __attribute__ ((noinline)) + +void NI +foo (int *a, int *b, unsigned char l, unsigned char n) +{ + while (++l != n) +a[l] = b[l] + 1; +} + +unsigned NI +bar (int *a, int *b, unsigned char l, unsigned char n) +{ + while (l++ != n) +{ + push (l); + if (a[l] != b[l]) + break; + push (l + 1); +} + return l; +} + +void NI +foo_1 (int *a, int *b, unsigned char l, unsigned char n) +{ + while (--l != n) +a[l] = b[l] + 1; +} + +unsigned NI +bar_1 (int *a, int *b, unsigned char l, unsigned char n) +{ + while (l-- != n) +{ + push (l); + if (a[l] != b[l]) + break; + push (l + 1); +} + + return l; +} + +int a[258]; +int b[258]; +int c[1024]; +static int top = 0; +void +push (int e) +{ + c[top++] = e; +} + +void +reset () +{ + top = 0; + __builtin_memset (c, 0, sizeof (c)); +} + +#define check(a, b) (a == b) + +int +check_c (int *c, int a0, int a1, int a2, int a3, int a4, int a5) +{ + return check (c[0], a0) && check (c[1], a1) && check (c[2], a2) +&& check (c[3], a3) && check (c[4], a4) && check (c[5], a5); +} + +int +main () +{ + __builtin_memcpy (b, a, sizeof (a)); + reset (); + if (bar (a, b, 6, 8) != 9 || !check_c (c, 7, 8, 8, 9, 0, 0)) +abort (); + + reset (); + if (bar (a, b, 5, 3) != 4 || !check_c (c, 6, 7, 7, 8, 8, 9) + || !check_c (c + 496, 254, 255, 255, 256, 0, 1)) +abort (); + + reset (); + if (bar (a, b, 6, 6) != 7 || !check_c (c, 0, 0, 0, 0, 0, 0)) +abort (); + + reset (); + if (bar (a, b, 253, 255) != 0 || !check_c (c, 254, 255, 255, 256, 0, 0)) +abort (); + + reset (); + if (bar (a, b, 253, 0) != 1 || !check_c (c, 254, 255, 255, 256, 0, 1)) +abort (); + + reset (); + if (bar_1 (a, b, 6, 8) != 7 || !check_c (c, 5, 6, 4, 5, 3, 4)) +abort (); + + reset (); + if (bar_1 (a, b, 5, 3) != 2 || !check_c (c, 4, 5, 3, 4, 0, 0)) +abort (); + + reset (); + if (bar_1 (a, b, 6, 6) != 5) +abort (); + + reset (); + if (bar_1 (a, b, 2, 255) != 254 || !check_c (c, 1, 2, 0, 1, 255, 256)) +abort (); + + reset (); + if (bar_1 (a, b, 2, 0) != 255 || !check_c (c, 1, 2, 0, 1, 0, 0)) +abort (); + + b[100] += 1; + reset (); + if (bar (a, b, 90, 110) != 100) +abort (); + + reset (); + if (bar (a, b, 110, 105) != 100) +abort (); + + reset (); + if (bar_1 (a, b, 90, 110) != 109) +abort (); + + reset (); + if (bar_1 (a, b, 2, 90) != 100) +
Re: [PATCH V3] Split loop for NE condition.
On 2021-06-09 17:42, guojiufu via Gcc-patches wrote: On 2021-06-08 18:13, Richard Biener wrote: On Fri, 4 Jun 2021, Jiufu Guo wrote: cut... + gcond *cond = as_a (last); + enum tree_code code = gimple_cond_code (cond); + if (!(code == NE_EXPR + || (code == EQ_EXPR && (e->flags & EDGE_TRUE_VALUE The NE_EXPR check misses a corresponding && (e->flags & EDGE_FALSE_VALUE) check. Thanks, check (e->flags & EDGE_FALSE_VALUE) would be safer. + continue; + + /* Check if bound is invarant. */ + tree idx = gimple_cond_lhs (cond); + tree bnd = gimple_cond_rhs (cond); + if (expr_invariant_in_loop_p (loop, idx)) + std::swap (idx, bnd); + else if (!expr_invariant_in_loop_p (loop, bnd)) + continue; + + /* Only unsigned type conversion could cause wrap. */ + tree type = TREE_TYPE (idx); + if (!INTEGRAL_TYPE_P (type) || TREE_CODE (idx) != SSA_NAME + || !TYPE_UNSIGNED (type)) + continue; + + /* Avoid to split if bound is MAX/MIN val. */ + tree bound_type = TREE_TYPE (bnd); + if (TREE_CODE (bnd) == INTEGER_CST && INTEGRAL_TYPE_P (bound_type) + && (tree_int_cst_equal (bnd, TYPE_MAX_VALUE (bound_type)) + || tree_int_cst_equal (bnd, TYPE_MIN_VALUE (bound_type + continue; Note you do not require 'bnd' to be constant and thus at runtime those cases still need to be handled correctly. Yes, bnd is not required to be constant. The above code is filtering the case where bnd is const max/min value of the type. So, the code could be updated as: if (tree_int_cst_equal (bnd, TYPE_MAX_VALUE (bound_type)) || tree_int_cst_equal (bnd, TYPE_MIN_VALUE (bound_type))) + /* Check if there is possible wrap. */ + class tree_niter_desc niter; + if (!number_of_iterations_exit (loop, e, , false, false)) cut... + + /* Change if (i != n) to LOOP1:if (i > n) and LOOP2:if (i < n) */ It now occurs to me that we nowhere check the evolution of IDX (split_at_bb_p uses simple_iv for this for example). The transform assumes that we will actually hit i == n and that i increments, but while you check the control IV from number_of_iterations_exit for NE_EXPR that does not guarantee a positive evolution. If I do not correctly reply your question, please point out: number_of_iterations_exit is similar with simple_iv to invoke simple_iv_with_niters which check the evolution, and number_of_iterations_exit check number_of_iterations_cond which check no_overflow more accurate, this is one reason I use this function. This transform assumes that the last run hits i==n. Otherwise, the loop may run infinitely wrap after wrap. For safe, if the step is 1 or -1, this assumption would be true. I would add this check. Thanks so much for pointing out I missed the negative step! Your testcases do not include any negative step examples, but I guess the conditions need to be swapped in this case? I would add cases and code to support step 1/-1. I think you also have to consider the order we split, say with for (i = start; i != end; ++i) { push (i); if (a[i] != b[i]) break; } push (i) calls need to be in the same order for all cases of start < end, start == end and start > end (and also cover runtime testcases with end == 0 or end == UINT_MAX, likewise for start). I add tests for the above cases. If missing sth, please point out, thanks! + bool inv = expr_invariant_in_loop_p (loop, gimple_cond_lhs (gc)); + enum tree_code up_code = inv ? LT_EXPR : GT_EXPR; + enum tree_code down_code = inv ? GT_EXPR : LT_EXPR; cut Thanks again for the very helpful review! BR, Jiufu Guo. Here is the updated patch, thanks for your time! diff --git a/gcc/testsuite/gcc.dg/loop-split1.c b/gcc/testsuite/gcc.dg/loop-split1.c new file mode 100644 index 000..dd2d03a7b96 --- /dev/null +++ b/gcc/testsuite/gcc.dg/loop-split1.c @@ -0,0 +1,101 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fsplit-loops -fdump-tree-lsplit-details" } */ + +void +foo (int *a, int *b, unsigned l, unsigned n) +{ + while (++l != n) +a[l] = b[l] + 1; +} +void +foo_1 (int *a, int *b, unsigned n) +{ + unsigned l = 0; + while (++l != n) +a[l] = b[l] + 1; +} + +void +foo1 (int *a, int *b, unsigned l, unsigned n) +{ + while (l++ != n) +a[l] = b[l] + 1; +} + +/* No wrap. */ +void +foo1_1 (int *a, int *b, unsigned n) +{ + unsigned l = 0; + while (l++ != n) +a[l] = b[l] + 1; +} + +unsigned +foo2 (char *a, char *b, unsigned l, unsigned n) +{ + while (++l != n) +if (a[l] != b[l]) + break; + + return l; +} + +unsigned +foo2_1 (char *a, char *b, unsigned l, unsigned n) +{ + l = 0; + while (++l != n) +if (a[l] != b[l]) + break; + + return l; +} + +unsigned +foo3 (char *a, char *b, unsigned l, unsigned n) +{ + while (l++ != n) +if (a[l] != b[l]) + break; + + return l; +} + +/* No wrap. */ +unsigned +foo3_1
Re: [PATCH V3] Split loop for NE condition.
On 2021-06-08 18:13, Richard Biener wrote: On Fri, 4 Jun 2021, Jiufu Guo wrote: cut... + gcond *cond = as_a (last); + enum tree_code code = gimple_cond_code (cond); + if (!(code == NE_EXPR + || (code == EQ_EXPR && (e->flags & EDGE_TRUE_VALUE The NE_EXPR check misses a corresponding && (e->flags & EDGE_FALSE_VALUE) check. Thanks, check (e->flags & EDGE_FALSE_VALUE) would be safer. + continue; + + /* Check if bound is invarant. */ + tree idx = gimple_cond_lhs (cond); + tree bnd = gimple_cond_rhs (cond); + if (expr_invariant_in_loop_p (loop, idx)) + std::swap (idx, bnd); + else if (!expr_invariant_in_loop_p (loop, bnd)) + continue; + + /* Only unsigned type conversion could cause wrap. */ + tree type = TREE_TYPE (idx); + if (!INTEGRAL_TYPE_P (type) || TREE_CODE (idx) != SSA_NAME + || !TYPE_UNSIGNED (type)) + continue; + + /* Avoid to split if bound is MAX/MIN val. */ + tree bound_type = TREE_TYPE (bnd); + if (TREE_CODE (bnd) == INTEGER_CST && INTEGRAL_TYPE_P (bound_type) + && (tree_int_cst_equal (bnd, TYPE_MAX_VALUE (bound_type)) + || tree_int_cst_equal (bnd, TYPE_MIN_VALUE (bound_type + continue; Note you do not require 'bnd' to be constant and thus at runtime those cases still need to be handled correctly. Yes, bnd is not required to be constant. The above code is filtering the case where bnd is const max/min value of the type. So, the code could be updated as: if (tree_int_cst_equal (bnd, TYPE_MAX_VALUE (bound_type)) || tree_int_cst_equal (bnd, TYPE_MIN_VALUE (bound_type))) + /* Check if there is possible wrap. */ + class tree_niter_desc niter; + if (!number_of_iterations_exit (loop, e, , false, false)) cut... + + /* Change if (i != n) to LOOP1:if (i > n) and LOOP2:if (i < n) */ It now occurs to me that we nowhere check the evolution of IDX (split_at_bb_p uses simple_iv for this for example). The transform assumes that we will actually hit i == n and that i increments, but while you check the control IV from number_of_iterations_exit for NE_EXPR that does not guarantee a positive evolution. If I do not correctly reply your question, please point out: number_of_iterations_exit is similar with simple_iv to invoke simple_iv_with_niters which check the evolution, and number_of_iterations_exit check number_of_iterations_cond which check no_overflow more accurate, this is one reason I use this function. This transform assumes that the last run hits i==n. Otherwise, the loop may run infinitely wrap after wrap. For safe, if the step is 1 or -1, this assumption would be true. I would add this check. Thanks so much for pointing out I missed the negative step! Your testcases do not include any negative step examples, but I guess the conditions need to be swapped in this case? I would add cases and code to support step 1/-1. I think you also have to consider the order we split, say with for (i = start; i != end; ++i) { push (i); if (a[i] != b[i]) break; } push (i) calls need to be in the same order for all cases of start < end, start == end and start > end (and also cover runtime testcases with end == 0 or end == UINT_MAX, likewise for start). I add tests for the above cases. If missing sth, please point out, thanks! + bool inv = expr_invariant_in_loop_p (loop, gimple_cond_lhs (gc)); + enum tree_code up_code = inv ? LT_EXPR : GT_EXPR; + enum tree_code down_code = inv ? GT_EXPR : LT_EXPR; cut Thanks again for the very helpful review! BR, Jiufu Guo.
Re: [PATCH V3] Split loop for NE condition.
On Fri, 4 Jun 2021, Jiufu Guo wrote: > Update the patch since v2: > . Check index and bound from gcond before checking if wrap. > . Update test case, and add an executable case. > . Refine code comments. > . Enhance the checking for i++/++i in the loop header. > . Enhance code to handle equal condition on exit > > Bootstrap and regtest pass on powerpc64le, and also pass regtest > on bootstrap-O3. Is this ok for trunk? > > BR. > Jiufu Guo. > > > When there is the possibility that wrap may happen on the loop index, > a few optimizations would not happen. For example code: > > foo (int *a, int *b, unsigned k, unsigned n) > { > while (++k != n) > a[k] = b[k] + 1; > } > > For this code, if "k > n", k would wrap. if "k < n" at begining, > it could be optimized (e.g. vectorization). > > We can split the loop into two loops: > > while (++k > n) > a[k] = b[k] + 1; > while (k++ < n) > a[k] = b[k] + 1; > > This patch splits this kind of loop to achieve better performance. > > gcc/ChangeLog: > > 2021-06-04 Jiufu Guo > > * tree-ssa-loop-split.c (connect_loop_phis): Add new param. > (get_ne_cond_branch): New function. > (split_ne_loop): New function. > (split_loop_on_ne_cond): New function. > (tree_ssa_split_loops): Use split_loop_on_ne_cond. > > gcc/testsuite/ChangeLog: > > 2021-06-04 Jiufu Guo > > * gcc.dg/loop-split1.c: New test. > * gcc.dg/loop-split2.c: New test. > * g++.dg/vect/pr98064.cc: Suppress warning. > > --- > gcc/testsuite/g++.dg/vect/pr98064.cc | 4 +- > gcc/testsuite/gcc.dg/loop-split1.c | 101 +++ > gcc/testsuite/gcc.dg/loop-split2.c | 54 ++ > gcc/tree-ssa-loop-split.c| 251 ++- > 4 files changed, 404 insertions(+), 6 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/loop-split1.c > create mode 100644 gcc/testsuite/gcc.dg/loop-split2.c > > diff --git a/gcc/testsuite/g++.dg/vect/pr98064.cc > b/gcc/testsuite/g++.dg/vect/pr98064.cc > index 74043ce7725..dcb2985d05a 100644 > --- a/gcc/testsuite/g++.dg/vect/pr98064.cc > +++ b/gcc/testsuite/g++.dg/vect/pr98064.cc > @@ -1,5 +1,7 @@ > // { dg-do compile } > -// { dg-additional-options "-O3" } > +// { dg-additional-options "-O3 -Wno-stringop-overflow" } > +/* There is warning message when "short g = var_8; g; g++" > + is optimized/analyzed as string operation,e.g. memset. */ > > const long long (const long long &__a, long long &__b) { >if (__b < __a) > diff --git a/gcc/testsuite/gcc.dg/loop-split1.c > b/gcc/testsuite/gcc.dg/loop-split1.c > new file mode 100644 > index 000..dd2d03a7b96 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/loop-split1.c > @@ -0,0 +1,101 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fsplit-loops -fdump-tree-lsplit-details" } */ > + > +void > +foo (int *a, int *b, unsigned l, unsigned n) > +{ > + while (++l != n) > +a[l] = b[l] + 1; > +} > +void > +foo_1 (int *a, int *b, unsigned n) > +{ > + unsigned l = 0; > + while (++l != n) > +a[l] = b[l] + 1; > +} > + > +void > +foo1 (int *a, int *b, unsigned l, unsigned n) > +{ > + while (l++ != n) > +a[l] = b[l] + 1; > +} > + > +/* No wrap. */ > +void > +foo1_1 (int *a, int *b, unsigned n) > +{ > + unsigned l = 0; > + while (l++ != n) > +a[l] = b[l] + 1; > +} > + > +unsigned > +foo2 (char *a, char *b, unsigned l, unsigned n) > +{ > + while (++l != n) > +if (a[l] != b[l]) > + break; > + > + return l; > +} > + > +unsigned > +foo2_1 (char *a, char *b, unsigned l, unsigned n) > +{ > + l = 0; > + while (++l != n) > +if (a[l] != b[l]) > + break; > + > + return l; > +} > + > +unsigned > +foo3 (char *a, char *b, unsigned l, unsigned n) > +{ > + while (l++ != n) > +if (a[l] != b[l]) > + break; > + > + return l; > +} > + > +/* No wrap. */ > +unsigned > +foo3_1 (char *a, char *b, unsigned l, unsigned n) > +{ > + l = 0; > + while (l++ != n) > +if (a[l] != b[l]) > + break; > + > + return l; > +} > + > +void > +bar (); > +void > +foo4 (unsigned n, unsigned i) > +{ > + do > +{ > + if (i == n) > + return; > + bar (); > + ++i; > +} > + while (1); > +} > + > +unsigned > +find_skip_diff (char *p, char *q, unsigned n, unsigned i) > +{ > + while (p[i] == q[i] && ++i != n) > +p++, q++; > + > + return i; > +} > + > +/* { dg-final { scan-tree-dump-times "Loop split" 8 "lsplit" } } */ > diff --git a/gcc/testsuite/gcc.dg/loop-split2.c > b/gcc/testsuite/gcc.dg/loop-split2.c > new file mode 100644 > index 000..0d3fded3f61 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/loop-split2.c > @@ -0,0 +1,54 @@ > +/* { dg-do run } */ > +/* { dg-options "-O3" } */ > + > +extern void abort (void); > +extern void exit (int); > + > +#define NI __attribute__ ((noinline)) > + > +void NI > +foo (int *a, int *b, unsigned char l, unsigned char n) > +{ > + while (++l != n) > +a[l] = b[l] + 1; > +} > + > +unsigned NI > +bar (int *a, int *b, unsigned
[PATCH V3] Split loop for NE condition.
Update the patch since v2: . Check index and bound from gcond before checking if wrap. . Update test case, and add an executable case. . Refine code comments. . Enhance the checking for i++/++i in the loop header. . Enhance code to handle equal condition on exit Bootstrap and regtest pass on powerpc64le, and also pass regtest on bootstrap-O3. Is this ok for trunk? BR. Jiufu Guo. When there is the possibility that wrap may happen on the loop index, a few optimizations would not happen. For example code: foo (int *a, int *b, unsigned k, unsigned n) { while (++k != n) a[k] = b[k] + 1; } For this code, if "k > n", k would wrap. if "k < n" at begining, it could be optimized (e.g. vectorization). We can split the loop into two loops: while (++k > n) a[k] = b[k] + 1; while (k++ < n) a[k] = b[k] + 1; This patch splits this kind of loop to achieve better performance. gcc/ChangeLog: 2021-06-04 Jiufu Guo * tree-ssa-loop-split.c (connect_loop_phis): Add new param. (get_ne_cond_branch): New function. (split_ne_loop): New function. (split_loop_on_ne_cond): New function. (tree_ssa_split_loops): Use split_loop_on_ne_cond. gcc/testsuite/ChangeLog: 2021-06-04 Jiufu Guo * gcc.dg/loop-split1.c: New test. * gcc.dg/loop-split2.c: New test. * g++.dg/vect/pr98064.cc: Suppress warning. --- gcc/testsuite/g++.dg/vect/pr98064.cc | 4 +- gcc/testsuite/gcc.dg/loop-split1.c | 101 +++ gcc/testsuite/gcc.dg/loop-split2.c | 54 ++ gcc/tree-ssa-loop-split.c| 251 ++- 4 files changed, 404 insertions(+), 6 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/loop-split1.c create mode 100644 gcc/testsuite/gcc.dg/loop-split2.c diff --git a/gcc/testsuite/g++.dg/vect/pr98064.cc b/gcc/testsuite/g++.dg/vect/pr98064.cc index 74043ce7725..dcb2985d05a 100644 --- a/gcc/testsuite/g++.dg/vect/pr98064.cc +++ b/gcc/testsuite/g++.dg/vect/pr98064.cc @@ -1,5 +1,7 @@ // { dg-do compile } -// { dg-additional-options "-O3" } +// { dg-additional-options "-O3 -Wno-stringop-overflow" } +/* There is warning message when "short g = var_8; g; g++" + is optimized/analyzed as string operation,e.g. memset. */ const long long (const long long &__a, long long &__b) { if (__b < __a) diff --git a/gcc/testsuite/gcc.dg/loop-split1.c b/gcc/testsuite/gcc.dg/loop-split1.c new file mode 100644 index 000..dd2d03a7b96 --- /dev/null +++ b/gcc/testsuite/gcc.dg/loop-split1.c @@ -0,0 +1,101 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fsplit-loops -fdump-tree-lsplit-details" } */ + +void +foo (int *a, int *b, unsigned l, unsigned n) +{ + while (++l != n) +a[l] = b[l] + 1; +} +void +foo_1 (int *a, int *b, unsigned n) +{ + unsigned l = 0; + while (++l != n) +a[l] = b[l] + 1; +} + +void +foo1 (int *a, int *b, unsigned l, unsigned n) +{ + while (l++ != n) +a[l] = b[l] + 1; +} + +/* No wrap. */ +void +foo1_1 (int *a, int *b, unsigned n) +{ + unsigned l = 0; + while (l++ != n) +a[l] = b[l] + 1; +} + +unsigned +foo2 (char *a, char *b, unsigned l, unsigned n) +{ + while (++l != n) +if (a[l] != b[l]) + break; + + return l; +} + +unsigned +foo2_1 (char *a, char *b, unsigned l, unsigned n) +{ + l = 0; + while (++l != n) +if (a[l] != b[l]) + break; + + return l; +} + +unsigned +foo3 (char *a, char *b, unsigned l, unsigned n) +{ + while (l++ != n) +if (a[l] != b[l]) + break; + + return l; +} + +/* No wrap. */ +unsigned +foo3_1 (char *a, char *b, unsigned l, unsigned n) +{ + l = 0; + while (l++ != n) +if (a[l] != b[l]) + break; + + return l; +} + +void +bar (); +void +foo4 (unsigned n, unsigned i) +{ + do +{ + if (i == n) + return; + bar (); + ++i; +} + while (1); +} + +unsigned +find_skip_diff (char *p, char *q, unsigned n, unsigned i) +{ + while (p[i] == q[i] && ++i != n) +p++, q++; + + return i; +} + +/* { dg-final { scan-tree-dump-times "Loop split" 8 "lsplit" } } */ diff --git a/gcc/testsuite/gcc.dg/loop-split2.c b/gcc/testsuite/gcc.dg/loop-split2.c new file mode 100644 index 000..0d3fded3f61 --- /dev/null +++ b/gcc/testsuite/gcc.dg/loop-split2.c @@ -0,0 +1,54 @@ +/* { dg-do run } */ +/* { dg-options "-O3" } */ + +extern void abort (void); +extern void exit (int); + +#define NI __attribute__ ((noinline)) + +void NI +foo (int *a, int *b, unsigned char l, unsigned char n) +{ + while (++l != n) +a[l] = b[l] + 1; +} + +unsigned NI +bar (int *a, int *b, unsigned char l, unsigned char n) +{ + while (l++ != n) +if (a[l] != b[l]) + break; + + return l; +} + +int a[258]; +int b[258]; + +int main() +{ + __builtin_memcpy (b, a, sizeof (a)); + + if (bar (a, b, 3, 8) != 9) +abort (); + + if (bar (a, b, 8, 3) != 4) +abort (); + + b[100] += 1; + if (bar (a, b, 90, 110) != 100) +abort (); + + if (bar (a, b, 110, 105) != 100) +abort (); + + foo (a, b, 99, 99); + a[99]