------- Additional Comments From sebastian dot pop at cri dot ensmp dot fr 2005-07-26 10:06 ------- Subject: Re: [4.1 Regression] wrong code for casts and scev
After inlining, we end up with a loop containing the following code: b.0_3 = (signed char) b_8; D.1621_4 = (int) b.0_3; a_5 = (signed char) D.1621_4; D.1640_6 = (int) a_5; b_7 = D.1640_6 - 127; if (b_7 > 1) goto <L3>; else goto <L9>; that is equivalent to: b_7 = ((int) (signed char) (int) (signed char) b_8) - 127; if (b_7 > 1) goto <L3>; else goto <L9>; with b_8 = (unsigned char) {1, +, 1} b_7 = ((int) (signed char) (int) (signed char) {(uchar)1, +, (uchar)1}) - 127; if (b_7 > 1) goto <L3>; else goto <L9>; A sequence of unsigned char 1, 2, ..., 255 has to be converted to signed char that would wrap with -fwrapv: 1, 2, ..., 127, -128, ... So here is a patch that keeps the cast if the sequence wraps. The remaining problem is that with this patch, chrec_convert gets more picky about the things that it transforms: in some of the testcases of the vectorizer, we get some fails because the number of iterations is not known, making scev_probably_wraps_p to return true, and finally the conversion is kept. I have bootstrapped this patch on x86_64, but there are some regressions: FAIL: gcc.dg/vect/vect-46.c scan-tree-dump-times vectorized 1 loops 1 FAIL: gcc.dg/vect/vect-50.c scan-tree-dump-times vectorized 1 loops 1 FAIL: gcc.dg/vect/vect-50.c scan-tree-dump-times Vectorizing an unaligned access 2 FAIL: gcc.dg/vect/vect-50.c scan-tree-dump-times Alignment of access forced using peeling 1 FAIL: gcc.dg/vect/vect-52.c scan-tree-dump-times vectorized 1 loops 1 FAIL: gcc.dg/vect/vect-52.c scan-tree-dump-times Vectorizing an unaligned access 2 FAIL: gcc.dg/vect/vect-58.c scan-tree-dump-times vectorized 1 loops 1 FAIL: gcc.dg/vect/vect-58.c scan-tree-dump-times Alignment of access forced using peeling 1 FAIL: gcc.dg/vect/vect-60.c scan-tree-dump-times vectorized 1 loops 1 FAIL: gcc.dg/vect/vect-60.c scan-tree-dump-times Vectorizing an unaligned access 2 FAIL: gcc.dg/vect/vect-92.c scan-tree-dump-times vectorized 1 loops 3 FAIL: gcc.dg/vect/vect-92.c scan-tree-dump-times Alignment of access forced using peeling 3 In all these cases, the loop bound is a parameter. If IP-constant propagation is not used, chrec_convert has not enough information for removing the cast. I propose to modify all these testcases to make the loop bound explicit. FAIL: gcc.dg/vect/vect-77.c scan-tree-dump-times vectorized 1 loops 1 FAIL: gcc.dg/vect/vect-77.c scan-tree-dump-times Vectorizing an unaligned access 1 FAIL: gcc.dg/vect/vect-78.c scan-tree-dump-times vectorized 1 loops 1 FAIL: gcc.dg/vect/vect-78.c scan-tree-dump-times Vectorizing an unaligned access 1 For these two regressions, the problem is the same: we end with an evolution: ib_16 + (aint *) ((long unsigned int) {off_11, +, 1}_1 * 4) in which the casts cannot be removed because the offset is not known, and even if the number of iterations is known, chrec_convert cannot prove that it does not overflow. I propose to propagate the offset by hand, or to wait for ipcp ;-) FAIL: gcc.dg/vect/vect-87.c scan-tree-dump-times vectorized 1 loops 1 FAIL: gcc.dg/vect/vect-87.c scan-tree-dump-times Alignment of access forced using peeling 1 FAIL: gcc.dg/vect/vect-88.c scan-tree-dump-times vectorized 1 loops 1 FAIL: gcc.dg/vect/vect-88.c scan-tree-dump-times Alignment of access forced using peeling 1 For these two testcases we'll need IP-value range propagation. I think that we'll have to modify these testcases by inlining the code. Dorit, could you look at vect-{77,78,87,88}.c testcases? Thanks. * tree-cfg.c (print_succ_bbs): Correctly print successors. * tree-chrec.c (chrec_convert): Call scev_probably_wraps_p for checking that the iv does not wrap before converting the iv. * tree-ssa-loop-ivcanon.c: Correct typo in comment. * tree-ssa-loop-ivopts.c (idx_find_step): Add a fixme note. * tree-ssa-loop-niter.c (scev_probably_wraps_p): Check that AT_STMT is not null. (convert_step): Add a comment. Index: tree-cfg.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/tree-cfg.c,v retrieving revision 2.211 diff -d -u -p -r2.211 tree-cfg.c --- tree-cfg.c 12 Jul 2005 13:20:28 -0000 2.211 +++ tree-cfg.c 26 Jul 2005 09:59:38 -0000 @@ -4454,7 +4454,7 @@ static void print_pred_bbs (FILE *, basi static void print_succ_bbs (FILE *, basic_block bb); -/* Print the predecessors indexes of edge E on FILE. */ +/* Print on FILE the indexes for the predecessors of basic_block BB. */ static void print_pred_bbs (FILE *file, basic_block bb) @@ -4463,11 +4463,11 @@ print_pred_bbs (FILE *file, basic_block edge_iterator ei; FOR_EACH_EDGE (e, ei, bb->preds) - fprintf (file, "bb_%d", e->src->index); + fprintf (file, "bb_%d ", e->src->index); } -/* Print the successors indexes of edge E on FILE. */ +/* Print on FILE the indexes for the successors of basic_block BB. */ static void print_succ_bbs (FILE *file, basic_block bb) @@ -4476,7 +4476,7 @@ print_succ_bbs (FILE *file, basic_block edge_iterator ei; FOR_EACH_EDGE (e, ei, bb->succs) - fprintf (file, "bb_%d", e->src->index); + fprintf (file, "bb_%d ", e->dest->index); } Index: tree-chrec.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/tree-chrec.c,v retrieving revision 2.22 diff -d -u -p -r2.22 tree-chrec.c --- tree-chrec.c 13 Jul 2005 10:08:36 -0000 2.22 +++ tree-chrec.c 26 Jul 2005 09:59:38 -0000 @@ -1110,9 +1110,24 @@ chrec_convert (tree type, tree chrec, tr if (evolution_function_is_affine_p (chrec)) { - tree step = convert_step (current_loops->parray[CHREC_VARIABLE (chrec)], - type, CHREC_LEFT (chrec), CHREC_RIGHT (chrec), - at_stmt); + tree step; + bool dummy; + + /* Avoid conversion of (signed char) {(uchar)1, +, (uchar)1}_x + when it is not possible to prove that the scev does not wrap. + See PR22236, where a sequence 1, 2, ..., 255 has to be + converted to signed char, but this would wrap: 1, 2, ..., + 127, -128, ... The result should not be {(schar)1, +, + (schar)1}_x, but instead, we should keep the conversion: + (schar) {(uchar)1, +, (uchar)1}_x. */ + if (scev_probably_wraps_p (type, CHREC_LEFT (chrec), CHREC_RIGHT (chrec), + at_stmt, + current_loops->parray[CHREC_VARIABLE (chrec)], + &dummy)) + return fold_convert (type, chrec); + + step = convert_step (current_loops->parray[CHREC_VARIABLE (chrec)], type, + CHREC_LEFT (chrec), CHREC_RIGHT (chrec), at_stmt); if (!step) return fold_convert (type, chrec); Index: tree-ssa-loop-ivcanon.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/tree-ssa-loop-ivcanon.c,v retrieving revision 2.18 diff -d -u -p -r2.18 tree-ssa-loop-ivcanon.c --- tree-ssa-loop-ivcanon.c 21 Jul 2005 07:24:10 -0000 2.18 +++ tree-ssa-loop-ivcanon.c 26 Jul 2005 09:59:38 -0000 @@ -252,7 +252,7 @@ try_unroll_loop_completely (struct loops } /* Adds a canonical induction variable to LOOP if suitable. LOOPS is the loops - tree. CREATE_IV is true if we may create a new iv. UL determines what + tree. CREATE_IV is true if we may create a new iv. UL determines which loops we are allowed to completely unroll. If TRY_EVAL is true, we try to determine the number of iterations of a loop by direct evaluation. Returns true if cfg is changed. */ Index: tree-ssa-loop-ivopts.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/tree-ssa-loop-ivopts.c,v retrieving revision 2.85 diff -d -u -p -r2.85 tree-ssa-loop-ivopts.c --- tree-ssa-loop-ivopts.c 21 Jul 2005 07:24:10 -0000 2.85 +++ tree-ssa-loop-ivopts.c 26 Jul 2005 09:59:38 -0000 @@ -1443,6 +1443,8 @@ idx_find_step (tree base, tree *idx, voi /* The step for pointer arithmetics already is 1 byte. */ step = build_int_cst (sizetype, 1); + /* FIXME: convert_step should not be used outside chrec_convert: fix + this by calling chrec_convert. */ iv_step = convert_step (dta->ivopts_data->current_loop, sizetype, iv->base, iv->step, dta->stmt); Index: tree-ssa-loop-niter.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/tree-ssa-loop-niter.c,v retrieving revision 2.33 diff -d -u -p -r2.33 tree-ssa-loop-niter.c --- tree-ssa-loop-niter.c 21 Jul 2005 07:24:12 -0000 2.33 +++ tree-ssa-loop-niter.c 26 Jul 2005 09:59:38 -0000 @@ -1707,7 +1707,7 @@ scev_probably_wraps_p (tree type, tree b i_2 to wrap around, but not i.0_6, because it is of a signed type. This causes VRP to erroneously fold the predicate above because it thinks that i.0_6 cannot be negative. */ - if (TREE_CODE (at_stmt) == MODIFY_EXPR) + if (at_stmt && TREE_CODE (at_stmt) == MODIFY_EXPR) { tree rhs = TREE_OPERAND (at_stmt, 1); tree outer_t = TREE_TYPE (rhs); @@ -1748,7 +1748,8 @@ scev_probably_wraps_p (tree type, tree b } /* Return the conversion to NEW_TYPE of the STEP of an induction - variable BASE + STEP * I at AT_STMT. */ + variable BASE + STEP * I at AT_STMT. When it fails, return + NULL_TREE. */ tree convert_step (struct loop *loop, tree new_type, tree base, tree step, Index: testsuite/gcc.dg/vect/vect-46.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/testsuite/gcc.dg/vect/vect-46.c,v retrieving revision 1.7 diff -d -u -p -r1.7 vect-46.c --- testsuite/gcc.dg/vect/vect-46.c 31 Mar 2005 18:34:18 -0000 1.7 +++ testsuite/gcc.dg/vect/vect-46.c 26 Jul 2005 09:59:52 -0000 @@ -23,11 +23,11 @@ void bar (afloat *pa, afloat *pb, afloat int -main1 (int n , afloat * __restrict__ pa, afloat * __restrict__ pb, afloat * __restrict__ pc) +main1 (afloat * __restrict__ pa, afloat * __restrict__ pb, afloat * __restrict__ pc) { int i; - for (i = 0; i < n; i++) + for (i = 0; i < N; i++) { pa[i] = pb[i] * pc[i]; } @@ -38,14 +38,13 @@ main1 (int n , afloat * __restrict__ pa, int main (void) { int i; - int n=N; afloat a[N]; afloat b[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,48,51,54,57}; afloat c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19}; check_vect (); - main1 (n,a,b,c); + main1 (a,b,c); bar (a,b,c); return 0; } Index: testsuite/gcc.dg/vect/vect-50.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/testsuite/gcc.dg/vect/vect-50.c,v retrieving revision 1.9 diff -d -u -p -r1.9 vect-50.c --- testsuite/gcc.dg/vect/vect-50.c 31 Mar 2005 18:34:18 -0000 1.9 +++ testsuite/gcc.dg/vect/vect-50.c 26 Jul 2005 09:59:52 -0000 @@ -22,11 +22,11 @@ void bar (float *pa, float *pb, float *p int -main1 (int n, float * __restrict__ pa, float * __restrict__ pb, float * __restrict__ pc) +main1 (float * __restrict__ pa, float * __restrict__ pb, float * __restrict__ pc) { int i; - for (i = 0; i < n; i++) + for (i = 0; i < N; i++) { pa[i] = pb[i] * pc[i]; } @@ -45,7 +45,7 @@ int main (void) check_vect (); - main1 (N,a,b,c); + main1 (a,b,c); return 0; } Index: testsuite/gcc.dg/vect/vect-52.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/testsuite/gcc.dg/vect/vect-52.c,v retrieving revision 1.9 diff -d -u -p -r1.9 vect-52.c --- testsuite/gcc.dg/vect/vect-52.c 31 Mar 2005 18:34:18 -0000 1.9 +++ testsuite/gcc.dg/vect/vect-52.c 26 Jul 2005 09:59:52 -0000 @@ -23,11 +23,11 @@ void bar (float *pa, float *pb, float *p int -main1 (int n, afloat * __restrict__ pa, float * __restrict__ pb, float * __restrict__ pc) +main1 (afloat * __restrict__ pa, float * __restrict__ pb, float * __restrict__ pc) { int i; - for (i = 0; i < n; i++) + for (i = 0; i < N; i++) { pa[i] = pb[i] * pc[i]; } @@ -46,8 +46,8 @@ int main (void) check_vect (); - main1 (N,a,&b[1],c); - main1 (N,a,&b[1],&c[1]); + main1 (a,&b[1],c); + main1 (a,&b[1],&c[1]); return 0; } Index: testsuite/gcc.dg/vect/vect-58.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/testsuite/gcc.dg/vect/vect-58.c,v retrieving revision 1.10 diff -d -u -p -r1.10 vect-58.c --- testsuite/gcc.dg/vect/vect-58.c 31 Mar 2005 18:34:18 -0000 1.10 +++ testsuite/gcc.dg/vect/vect-58.c 26 Jul 2005 09:59:52 -0000 @@ -23,11 +23,11 @@ void bar (afloat *pa, afloat *pb, afloat int -main1 (int n , afloat * __restrict__ pa, afloat * __restrict__ pb, afloat * __restrict__ pc) +main1 (afloat * __restrict__ pa, afloat * __restrict__ pb, afloat * __restrict__ pc) { int i; - for (i = 0; i < n/2; i++) + for (i = 0; i < N/2; i++) { pa[i+1] = pb[i+1] * pc[i+1]; } @@ -40,14 +40,13 @@ main1 (int n , afloat * __restrict__ pa, int main (void) { int i; - int n=N; afloat a[N]; afloat b[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,48,51,54,57}; afloat c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19}; check_vect (); - main1 (n,a,b,c); + main1 (a,b,c); return 0; } Index: testsuite/gcc.dg/vect/vect-60.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/testsuite/gcc.dg/vect/vect-60.c,v retrieving revision 1.9 diff -d -u -p -r1.9 vect-60.c --- testsuite/gcc.dg/vect/vect-60.c 31 Mar 2005 18:34:18 -0000 1.9 +++ testsuite/gcc.dg/vect/vect-60.c 26 Jul 2005 09:59:52 -0000 @@ -23,11 +23,11 @@ void bar (afloat *pa, afloat *pb, afloat int -main1 (int n , afloat * __restrict__ pa, afloat * __restrict__ pb, afloat * __restrict__ pc) +main1 (afloat * __restrict__ pa, afloat * __restrict__ pb, afloat * __restrict__ pc) { int i; - for (i = 0; i < n/2; i++) + for (i = 0; i < N/2; i++) { pa[i] = pb[i+1] * pc[i+1]; } @@ -40,14 +40,13 @@ main1 (int n , afloat * __restrict__ pa, int main (void) { int i; - int n=N; afloat a[N]; afloat b[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,48,51,54,57}; afloat c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19}; check_vect (); - main1 (n,a,b,c); + main1 (a,b,c); return 0; } Index: testsuite/gcc.dg/vect/vect-77.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/testsuite/gcc.dg/vect/vect-77.c,v retrieving revision 1.11 diff -d -u -p -r1.11 vect-77.c --- testsuite/gcc.dg/vect/vect-77.c 7 Jun 2005 19:51:25 -0000 1.11 +++ testsuite/gcc.dg/vect/vect-77.c 26 Jul 2005 09:59:52 -0000 @@ -20,7 +20,6 @@ int main1 (aint *ib, int off) ia[i] = ib[i+off]; } - /* check results: */ for (i = 0; i < N; i++) { Index: testsuite/gcc.dg/vect/vect-92.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/testsuite/gcc.dg/vect/vect-92.c,v retrieving revision 1.4 diff -d -u -p -r1.4 vect-92.c --- testsuite/gcc.dg/vect/vect-92.c 16 Jul 2005 18:56:53 -0000 1.4 +++ testsuite/gcc.dg/vect/vect-92.c 26 Jul 2005 09:59:52 -0000 @@ -50,17 +50,17 @@ main2 (afloat * __restrict__ pa, afloat } int -main3 (afloat * __restrict__ pa, afloat * __restrict__ pb, afloat * __restrict__ pc, int n) +main3 (afloat * __restrict__ pa, afloat * __restrict__ pb, afloat * __restrict__ pc) { int i; - for (i = 0; i < n; i++) + for (i = 0; i < N-1; i++) { pa[i+1] = pb[i+1] * pc[i+1]; } /* check results: */ - for (i = 0; i < n; i++) + for (i = 0; i < N-1; i++) { if (pa[i+1] != (pb[i+1] * pc[i+1])) abort (); @@ -80,7 +80,7 @@ int main (void) main1 (a,b,c); main2 (a,b,c); - main3 (a,b,c,N-1); + main3 (a,b,c); return 0; } -- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=22236