On Wed, May 15, 2013 at 03:24:37PM +0200, Richard Biener wrote:
> We have the same issue in some other places where we insert invariant
> code into the loop body - one reason there is another LIM pass
> after vectorization.

Well, in this case it causes the shift amount to be loaded into a vector
instead of scalar, therefore even when LIM moves it before the loop, it
will only work with vector/vector shifts and be more expensive that way
(need to broadcast the value in a vector).  The following patch
improves it slightly at least for loops, by just emitting the shift amount
stmts to loop preheader, rotate-4.c used to be only vectorizable with
-mavx2 (which has vector/vector shifts), now also -mavx (which doesn't)
vectorizes it.  Unfortunately this trick doesn't work for SLP vectorization,
emitting the stmts at the start of the current bb doesn't help, because
every stmt emits its own and thus it is vectorized with vector/vector
shifts only anyway.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2013-05-17  Jakub Jelinek  <ja...@redhat.com>

        * tree-vect-patterns.c (vect_recog_rotate_pattern): For
        vect_external_def oprnd1 with loop_vinfo, try to emit
        optional cast, negation and and stmts on the loop preheader
        edge instead of into the pattern def seq.

        * gcc.target/i386/rotate-4.c: Compile only with -mavx
        instead of -mavx2, require only avx instead of avx2.
        * gcc.target/i386/rotate-4a.c: Include avx-check.h instead
        of avx2-check.h and turn into an avx runtime test instead of
        avx2 runtime test.

--- gcc/tree-vect-patterns.c.jj 2013-05-16 13:56:08.000000000 +0200
+++ gcc/tree-vect-patterns.c    2013-05-16 15:27:00.565143478 +0200
@@ -1494,6 +1494,7 @@ vect_recog_rotate_pattern (vec<gimple> *
   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
   enum vect_def_type dt;
   optab optab1, optab2;
+  edge ext_def = NULL;
 
   if (!is_gimple_assign (last_stmt))
     return NULL;
@@ -1574,6 +1575,21 @@ vect_recog_rotate_pattern (vec<gimple> *
   if (*type_in == NULL_TREE)
     return NULL;
 
+  if (dt == vect_external_def
+      && TREE_CODE (oprnd1) == SSA_NAME
+      && loop_vinfo)
+    {
+      struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+      ext_def = loop_preheader_edge (loop);
+      if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
+       {
+         basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
+         if (bb == NULL
+             || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
+           ext_def = NULL;
+       }
+    }
+
   def = NULL_TREE;
   if (TREE_CODE (oprnd1) == INTEGER_CST
       || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
@@ -1593,7 +1609,14 @@ vect_recog_rotate_pattern (vec<gimple> *
       def = vect_recog_temp_ssa_var (type, NULL);
       def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
                                               NULL_TREE);
-      append_pattern_def_seq (stmt_vinfo, def_stmt);
+      if (ext_def)
+       {
+         basic_block new_bb
+           = gsi_insert_on_edge_immediate (ext_def, def_stmt);
+         gcc_assert (!new_bb);
+       }
+      else
+       append_pattern_def_seq (stmt_vinfo, def_stmt);
     }
   stype = TREE_TYPE (def);
 
@@ -1618,11 +1641,19 @@ vect_recog_rotate_pattern (vec<gimple> *
       def2 = vect_recog_temp_ssa_var (stype, NULL);
       def_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, def2, def,
                                               NULL_TREE);
-      def_stmt_vinfo
-       = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
-      set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
-      STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
-      append_pattern_def_seq (stmt_vinfo, def_stmt);
+      if (ext_def)
+       {
+         basic_block new_bb
+           = gsi_insert_on_edge_immediate (ext_def, def_stmt);
+         gcc_assert (!new_bb);
+       }
+      else
+       {
+         def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
+         set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
+         STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
+         append_pattern_def_seq (stmt_vinfo, def_stmt);
+       }
 
       def2 = vect_recog_temp_ssa_var (stype, NULL);
       tree mask
@@ -1630,11 +1661,19 @@ vect_recog_rotate_pattern (vec<gimple> *
       def_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, def2,
                                               gimple_assign_lhs (def_stmt),
                                               mask);
-      def_stmt_vinfo
-       = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
-      set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
-      STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
-      append_pattern_def_seq (stmt_vinfo, def_stmt);
+      if (ext_def)
+       {
+         basic_block new_bb
+           = gsi_insert_on_edge_immediate (ext_def, def_stmt);
+         gcc_assert (!new_bb);
+       }
+      else
+       {
+         def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
+         set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
+         STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
+         append_pattern_def_seq (stmt_vinfo, def_stmt);
+       }
     }
 
   var1 = vect_recog_temp_ssa_var (type, NULL);
--- gcc/testsuite/gcc.target/i386/rotate-4.c.jj 2013-05-16 13:50:14.000000000 
+0200
+++ gcc/testsuite/gcc.target/i386/rotate-4.c    2013-05-16 15:23:32.729313026 
+0200
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
-/* { dg-require-effective-target avx2 } */
-/* { dg-options "-O3 -mavx2 -fdump-tree-vect-details" } */
+/* { dg-require-effective-target avx } */
+/* { dg-options "-O3 -mavx -fdump-tree-vect-details" } */
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 
--- gcc/testsuite/gcc.target/i386/rotate-4a.c.jj        2013-05-16 
14:00:33.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/rotate-4a.c   2013-05-16 15:23:44.791247428 
+0200
@@ -1,14 +1,14 @@
 /* { dg-do run } */
-/* { dg-require-effective-target avx2 } */
-/* { dg-options "-O3 -mavx2" } */
+/* { dg-require-effective-target avx } */
+/* { dg-options "-O3 -mavx" } */
 
-#include "avx2-check.h"
+#include "avx-check.h"
 
 #include "rotate-4.c"
 
 static void
 __attribute__((noinline))
-avx2_test (void)
+avx_test (void)
 {
   int i;
   for (i = 0; i < 1024; i++)


        Jakub

Reply via email to