This patch is fixing PR60505 in which the vectorizer may produce unnecessary epilogues.
Bootstrapped and tested on a x86_64 machine. OK for trunk? thanks, Cong diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e1d8666..f98e628 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +2014-03-11 Cong Hou <co...@google.com> + + PR tree-optimization/60505 + * tree-vect-loop.c (vect_analyze_loop_2): Check the maximum number + of iterations of the loop and see if we should build the epilogue. + 2014-03-10 Jakub Jelinek <ja...@redhat.com> PR ipa/60457 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 41b6875..09ec1c0 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2014-03-11 Cong Hou <co...@google.com> + + PR tree-optimization/60505 + * gcc.dg/vect/pr60505.c: New test. + 2014-03-10 Jakub Jelinek <ja...@redhat.com> PR ipa/60457 diff --git a/gcc/testsuite/gcc.dg/vect/pr60505.c b/gcc/testsuite/gcc.dg/vect/pr60505.c new file mode 100644 index 0000000..6940513 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr60505.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-Wall -Werror" } */ + +void foo(char *in, char *out, int num) +{ + int i; + char ovec[16] = {0}; + + for(i = 0; i < num ; ++i) + out[i] = (ovec[i] = in[i]); + out[num] = ovec[num/2]; +} diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index df6ab6f..2156d5f 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -1625,6 +1625,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo) bool ok, slp = false; int max_vf = MAX_VECTORIZATION_FACTOR; int min_vf = 2; + int th; /* Find all data references in the loop (which correspond to vdefs/vuses) and analyze their evolution in the loop. Also adjust the minimal @@ -1769,6 +1770,12 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo) /* Decide whether we need to create an epilogue loop to handle remaining scalar iterations. */ + th = MAX (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND), 1) + * LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1; + th = MAX (th, LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo)) + 1; + th = (th / LOOP_VINFO_VECT_FACTOR (loop_vinfo)) + * LOOP_VINFO_VECT_FACTOR (loop_vinfo); + if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0) { @@ -1779,7 +1786,9 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo) } else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) || (tree_ctz (LOOP_VINFO_NITERS (loop_vinfo)) - < (unsigned)exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo)))) + < (unsigned)exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo)) + && (unsigned HOST_WIDE_INT)max_stmt_executions_int + (LOOP_VINFO_LOOP (loop_vinfo)) > (unsigned)th)) LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true; /* If an epilogue loop is required make sure we can create one. */