This patch is fixing PR60505 in which the vectorizer may produce
unnecessary epilogues.

Bootstrapped and tested on a x86_64 machine.

OK for trunk?


thanks,
Cong


diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index e1d8666..f98e628 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2014-03-11  Cong Hou  <co...@google.com>
+
+ PR tree-optimization/60505
+ * tree-vect-loop.c (vect_analyze_loop_2): Check the maximum number
+ of iterations of the loop and see if we should build the epilogue.
+
 2014-03-10  Jakub Jelinek  <ja...@redhat.com>

  PR ipa/60457
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 41b6875..09ec1c0 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2014-03-11  Cong Hou  <co...@google.com>
+
+ PR tree-optimization/60505
+ * gcc.dg/vect/pr60505.c: New test.
+
 2014-03-10  Jakub Jelinek  <ja...@redhat.com>

  PR ipa/60457
diff --git a/gcc/testsuite/gcc.dg/vect/pr60505.c
b/gcc/testsuite/gcc.dg/vect/pr60505.c
new file mode 100644
index 0000000..6940513
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr60505.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-Wall -Werror" } */
+
+void foo(char *in, char *out, int num)
+{
+  int i;
+  char ovec[16] = {0};
+
+  for(i = 0; i < num ; ++i)
+    out[i] = (ovec[i] = in[i]);
+  out[num] = ovec[num/2];
+}
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index df6ab6f..2156d5f 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -1625,6 +1625,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo)
   bool ok, slp = false;
   int max_vf = MAX_VECTORIZATION_FACTOR;
   int min_vf = 2;
+  int th;

   /* Find all data references in the loop (which correspond to vdefs/vuses)
      and analyze their evolution in the loop.  Also adjust the minimal
@@ -1769,6 +1770,12 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo)

   /* Decide whether we need to create an epilogue loop to handle
      remaining scalar iterations.  */
+  th = MAX (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND), 1)
+       * LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1;
+  th = MAX (th, LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo)) + 1;
+  th = (th / LOOP_VINFO_VECT_FACTOR (loop_vinfo))
+       * LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+
   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
       && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
     {
@@ -1779,7 +1786,9 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo)
     }
   else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
    || (tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
-       < (unsigned)exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))))
+       < (unsigned)exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
+       && (unsigned HOST_WIDE_INT)max_stmt_executions_int
+    (LOOP_VINFO_LOOP (loop_vinfo)) > (unsigned)th))
     LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;

   /* If an epilogue loop is required make sure we can create one.  */

Reply via email to