The current GCC vectorizer requires the following pattern as a simple
reduction computation:

   loop_header:
     a1 = phi < a0, a2 >
     a3 = ...
     a2 = operation (a3, a1)

But a3 can also be defined outside of the loop. For example, the
following loop can benefit from vectorization but the GCC vectorizer
fails to vectorize it:


int foo(int v)
{
  int s = 1;
  ++v;
  for (int i = 0; i < 100000; ++i)
    s *= v;
  return s;
}


This patch relaxes the original requirement by also considering the
following pattern:


   a3 = ...
   loop_header:
     a1 = phi < a0, a2 >
     a2 = operation (a3, a1)


A test case is also added. The patch is tested on x86-64.


thanks,
Cong

================================

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 39c786e..45c1667 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2013-09-27  Cong Hou  <co...@google.com>
+
+ * tree-vect-loop.c: Relax the requirement of the reduction
+ pattern so that one operand of the reduction operation can
+ come from outside of the loop.
+
 2013-09-25  Tom Tromey  <tro...@redhat.com>

  * Makefile.in (PARTITION_H, LTO_SYMTAB_H, COMMON_TARGET_DEF_H)
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 09644d2..90496a2 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2013-09-27  Cong Hou  <co...@google.com>
+
+ * gcc.dg/vect/vect-reduc-pattern-3.c: New test.
+
 2013-09-25  Marek Polacek  <pola...@redhat.com>

  PR sanitizer/58413
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 2871ba1..3c51c3b 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -2091,6 +2091,13 @@ vect_is_slp_reduction (loop_vec_info loop_info,
gimple phi, gimple first_stmt)
      a3 = ...
      a2 = operation (a3, a1)

+   or
+
+   a3 = ...
+   loop_header:
+     a1 = phi < a0, a2 >
+     a2 = operation (a3, a1)
+
    such that:
    1. operation is commutative and associative and it is safe to
       change the order of the computation (if CHECK_REDUCTION is true)
@@ -2451,6 +2458,7 @@ vect_is_simple_reduction_1 (loop_vec_info
loop_info, gimple phi,
   if (def2 && def2 == phi
       && (code == COND_EXPR
   || !def1 || gimple_nop_p (def1)
+  || !flow_bb_inside_loop_p (loop, gimple_bb (def1))
           || (def1 && flow_bb_inside_loop_p (loop, gimple_bb (def1))
               && (is_gimple_assign (def1)
   || is_gimple_call (def1)
@@ -2469,6 +2477,7 @@ vect_is_simple_reduction_1 (loop_vec_info
loop_info, gimple phi,
   if (def1 && def1 == phi
       && (code == COND_EXPR
   || !def2 || gimple_nop_p (def2)
+  || !flow_bb_inside_loop_p (loop, gimple_bb (def2))
           || (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2))
       && (is_gimple_assign (def2)
   || is_gimple_call (def2)
diff --git gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-3.c
gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-3.c
new file mode 100644
index 0000000..06a9416
--- /dev/null
+++ gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-3.c
@@ -0,0 +1,41 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 10
+#define RES 1024
+
+/* A reduction pattern in which there is no data ref in
+   the loop and one operand is defined outside of the loop.  */
+
+__attribute__ ((noinline)) int
+foo (int v)
+{
+  int i;
+  int result = 1;
+
+  ++v;
+  for (i = 0; i < N; i++)
+    result *= v;
+
+  return result;
+}
+
+int
+main (void)
+{
+  int res;
+
+  check_vect ();
+
+  res = foo (1);
+  if (res != RES)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+

Reply via email to