Hi!

As discussed in the PR and on IRC, the problem here is that peeling
for alignment can for some linear argument that during vect analysis
passed simple_iv no longer pass it during vect transform phase.

So, to fix this, this patch remembers the base and step values from
simple_iv during vect analysis and uses them during transform phase
(biased by what the peeling for alignment advanced of course).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2014-11-26  Jakub Jelinek  <ja...@redhat.com>

        PR tree-optimization/64024
        * tree-vectorizer.h (struct _stmt_vec_info): Remove simd_clone_fndecl
        field.  Add simd_clone_info field.
        (STMT_VINFO_SIMD_CLONE_FNDECL): Remove.
        (STMT_VINFO_SIMD_CLONE_INFO): Define.
        * tree-vect-stmts.c (vectorizable_simd_clone_call): Adjust for
        STMT_VINFO_SIMD_CLONE_FNDECL becoming first element of
        STMT_VINFO_SIMD_CLONE_INFO vector.  For linear arguments, remember
        base and linear_step from analysis phase and use it during transform
        phase, biased by the difference between LOOP_VINFO_NITERS{_UNCHANGED,}
        multiplied by linear_step.
        (free_stmt_vec_info): Release STMT_VINFO_SIMD_CLONE_INFO.

        * gcc.dg/vect/vect-simd-clone-13.c: New test.
        * gcc.dg/vect/vect-simd-clone-14.c: New test.

--- gcc/tree-vectorizer.h.jj    2014-11-19 18:48:07.000000000 +0100
+++ gcc/tree-vectorizer.h       2014-11-26 12:56:00.899824766 +0100
@@ -602,8 +602,10 @@ typedef struct _stmt_vec_info {
      of this stmt.  */
   vec<dr_p> same_align_refs;
 
-  /* Selected SIMD clone's function decl.  */
-  tree simd_clone_fndecl;
+  /* Selected SIMD clone's function info.  First vector element
+     is SIMD clone's function decl, followed by a pair of trees (base + step)
+     for linear arguments (pair of NULLs for other arguments).  */
+  vec<tree> simd_clone_info;
 
   /* Classify the def of this stmt.  */
   enum vect_def_type def_type;
@@ -677,7 +679,7 @@ typedef struct _stmt_vec_info {
 #define STMT_VINFO_RELATED_STMT(S)         (S)->related_stmt
 #define STMT_VINFO_PATTERN_DEF_SEQ(S)      (S)->pattern_def_seq
 #define STMT_VINFO_SAME_ALIGN_REFS(S)      (S)->same_align_refs
-#define STMT_VINFO_SIMD_CLONE_FNDECL(S)           (S)->simd_clone_fndecl
+#define STMT_VINFO_SIMD_CLONE_INFO(S)     (S)->simd_clone_info
 #define STMT_VINFO_DEF_TYPE(S)             (S)->def_type
 #define STMT_VINFO_GROUP_FIRST_ELEMENT(S)  (S)->first_element
 #define STMT_VINFO_GROUP_NEXT_ELEMENT(S)   (S)->next_element
--- gcc/tree-vect-stmts.c.jj    2014-11-19 18:47:59.000000000 +0100
+++ gcc/tree-vect-stmts.c       2014-11-26 15:38:59.883409014 +0100
@@ -2715,12 +2715,40 @@ vectorizable_simd_clone_call (gimple stm
       else
        gcc_assert (thisarginfo.vectype != NULL_TREE);
 
-      if (thisarginfo.dt != vect_constant_def
-         && thisarginfo.dt != vect_external_def
-         && loop_vinfo
-         && TREE_CODE (op) == SSA_NAME
-         && simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false)
-         && tree_fits_shwi_p (iv.step))
+      /* For linear arguments, the analyze phase should have saved
+        the base and step in STMT_VINFO_SIMD_CLONE_INFO.  */
+      if (i * 2 + 3 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
+         && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2])
+       {
+         gcc_assert (vec_stmt);
+         thisarginfo.linear_step
+           = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2]);
+         thisarginfo.op
+           = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 1];
+         /* If loop has been peeled for alignment, we need to adjust it.  */
+         tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
+         tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
+         if (n1 != n2)
+           {
+             tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
+             tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2];
+             tree opt = TREE_TYPE (thisarginfo.op);
+             bias = fold_convert (TREE_TYPE (step), bias);
+             bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
+             thisarginfo.op
+               = fold_build2 (POINTER_TYPE_P (opt)
+                              ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
+                              thisarginfo.op, bias);
+           }
+       }
+      else if (!vec_stmt
+              && thisarginfo.dt != vect_constant_def
+              && thisarginfo.dt != vect_external_def
+              && loop_vinfo
+              && TREE_CODE (op) == SSA_NAME
+              && simple_iv (loop, loop_containing_stmt (stmt), op,
+                            &iv, false)
+              && tree_fits_shwi_p (iv.step))
        {
          thisarginfo.linear_step = tree_to_shwi (iv.step);
          thisarginfo.op = iv.base;
@@ -2735,8 +2763,8 @@ vectorizable_simd_clone_call (gimple stm
 
   unsigned int badness = 0;
   struct cgraph_node *bestn = NULL;
-  if (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info))
-    bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info));
+  if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
+    bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
   else
     for (struct cgraph_node *n = node->simd_clones; n != NULL;
         n = n->simdclone->next_clone)
@@ -2855,7 +2883,19 @@ vectorizable_simd_clone_call (gimple stm
 
   if (!vec_stmt) /* transformation not required.  */
     {
-      STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info) = bestn->decl;
+      STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
+      for (i = 0; i < nargs; i++)
+       if (bestn->simdclone->args[i].arg_type
+           == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
+         {
+           STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 2
+                                                                       + 1);
+           STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
+           tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
+                      ? size_type_node : TREE_TYPE (arginfo[i].op);
+           tree ls = build_int_cst (lst, arginfo[i].linear_step);
+           STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
+         }
       STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
       if (dump_enabled_p ())
        dump_printf_loc (MSG_NOTE, vect_location,
@@ -7479,6 +7519,7 @@ free_stmt_vec_info (gimple stmt)
     }
 
   STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
+  STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
   set_vinfo_for_stmt (stmt, NULL);
   free (stmt_info);
 }
--- gcc/testsuite/gcc.dg/vect/vect-simd-clone-13.c.jj   2014-11-26 
15:42:26.162690785 +0100
+++ gcc/testsuite/gcc.dg/vect/vect-simd-clone-13.c      2014-11-26 
15:42:49.252278876 +0100
@@ -0,0 +1,7 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd -fcommon" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#include "vect-simd-clone-6.c"
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- gcc/testsuite/gcc.dg/vect/vect-simd-clone-14.c.jj   2014-11-26 
15:43:09.522919202 +0100
+++ gcc/testsuite/gcc.dg/vect/vect-simd-clone-14.c      2014-11-26 
15:43:24.566652273 +0100
@@ -0,0 +1,7 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd -fcommon" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#include "vect-simd-clone-11.c"
+
+/* { dg-final { cleanup-tree-dump "vect" } } */

        Jakub

Reply via email to