Hi! As discussed in the PR and on IRC, the problem here is that peeling for alignment can for some linear argument that during vect analysis passed simple_iv no longer pass it during vect transform phase.
So, to fix this, this patch remembers the base and step values from simple_iv during vect analysis and uses them during transform phase (biased by what the peeling for alignment advanced of course). Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2014-11-26 Jakub Jelinek <ja...@redhat.com> PR tree-optimization/64024 * tree-vectorizer.h (struct _stmt_vec_info): Remove simd_clone_fndecl field. Add simd_clone_info field. (STMT_VINFO_SIMD_CLONE_FNDECL): Remove. (STMT_VINFO_SIMD_CLONE_INFO): Define. * tree-vect-stmts.c (vectorizable_simd_clone_call): Adjust for STMT_VINFO_SIMD_CLONE_FNDECL becoming first element of STMT_VINFO_SIMD_CLONE_INFO vector. For linear arguments, remember base and linear_step from analysis phase and use it during transform phase, biased by the difference between LOOP_VINFO_NITERS{_UNCHANGED,} multiplied by linear_step. (free_stmt_vec_info): Release STMT_VINFO_SIMD_CLONE_INFO. * gcc.dg/vect/vect-simd-clone-13.c: New test. * gcc.dg/vect/vect-simd-clone-14.c: New test. --- gcc/tree-vectorizer.h.jj 2014-11-19 18:48:07.000000000 +0100 +++ gcc/tree-vectorizer.h 2014-11-26 12:56:00.899824766 +0100 @@ -602,8 +602,10 @@ typedef struct _stmt_vec_info { of this stmt. */ vec<dr_p> same_align_refs; - /* Selected SIMD clone's function decl. */ - tree simd_clone_fndecl; + /* Selected SIMD clone's function info. First vector element + is SIMD clone's function decl, followed by a pair of trees (base + step) + for linear arguments (pair of NULLs for other arguments). */ + vec<tree> simd_clone_info; /* Classify the def of this stmt. */ enum vect_def_type def_type; @@ -677,7 +679,7 @@ typedef struct _stmt_vec_info { #define STMT_VINFO_RELATED_STMT(S) (S)->related_stmt #define STMT_VINFO_PATTERN_DEF_SEQ(S) (S)->pattern_def_seq #define STMT_VINFO_SAME_ALIGN_REFS(S) (S)->same_align_refs -#define STMT_VINFO_SIMD_CLONE_FNDECL(S) (S)->simd_clone_fndecl +#define STMT_VINFO_SIMD_CLONE_INFO(S) (S)->simd_clone_info #define STMT_VINFO_DEF_TYPE(S) (S)->def_type #define STMT_VINFO_GROUP_FIRST_ELEMENT(S) (S)->first_element #define STMT_VINFO_GROUP_NEXT_ELEMENT(S) (S)->next_element --- gcc/tree-vect-stmts.c.jj 2014-11-19 18:47:59.000000000 +0100 +++ gcc/tree-vect-stmts.c 2014-11-26 15:38:59.883409014 +0100 @@ -2715,12 +2715,40 @@ vectorizable_simd_clone_call (gimple stm else gcc_assert (thisarginfo.vectype != NULL_TREE); - if (thisarginfo.dt != vect_constant_def - && thisarginfo.dt != vect_external_def - && loop_vinfo - && TREE_CODE (op) == SSA_NAME - && simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false) - && tree_fits_shwi_p (iv.step)) + /* For linear arguments, the analyze phase should have saved + the base and step in STMT_VINFO_SIMD_CLONE_INFO. */ + if (i * 2 + 3 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length () + && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2]) + { + gcc_assert (vec_stmt); + thisarginfo.linear_step + = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2]); + thisarginfo.op + = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 1]; + /* If loop has been peeled for alignment, we need to adjust it. */ + tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo); + tree n2 = LOOP_VINFO_NITERS (loop_vinfo); + if (n1 != n2) + { + tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2); + tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2]; + tree opt = TREE_TYPE (thisarginfo.op); + bias = fold_convert (TREE_TYPE (step), bias); + bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step); + thisarginfo.op + = fold_build2 (POINTER_TYPE_P (opt) + ? POINTER_PLUS_EXPR : PLUS_EXPR, opt, + thisarginfo.op, bias); + } + } + else if (!vec_stmt + && thisarginfo.dt != vect_constant_def + && thisarginfo.dt != vect_external_def + && loop_vinfo + && TREE_CODE (op) == SSA_NAME + && simple_iv (loop, loop_containing_stmt (stmt), op, + &iv, false) + && tree_fits_shwi_p (iv.step)) { thisarginfo.linear_step = tree_to_shwi (iv.step); thisarginfo.op = iv.base; @@ -2735,8 +2763,8 @@ vectorizable_simd_clone_call (gimple stm unsigned int badness = 0; struct cgraph_node *bestn = NULL; - if (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info)) - bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info)); + if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ()) + bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]); else for (struct cgraph_node *n = node->simd_clones; n != NULL; n = n->simdclone->next_clone) @@ -2855,7 +2883,19 @@ vectorizable_simd_clone_call (gimple stm if (!vec_stmt) /* transformation not required. */ { - STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info) = bestn->decl; + STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl); + for (i = 0; i < nargs; i++) + if (bestn->simdclone->args[i].arg_type + == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP) + { + STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 2 + + 1); + STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op); + tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op)) + ? size_type_node : TREE_TYPE (arginfo[i].op); + tree ls = build_int_cst (lst, arginfo[i].linear_step); + STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls); + } STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type; if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -7479,6 +7519,7 @@ free_stmt_vec_info (gimple stmt) } STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release (); + STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release (); set_vinfo_for_stmt (stmt, NULL); free (stmt_info); } --- gcc/testsuite/gcc.dg/vect/vect-simd-clone-13.c.jj 2014-11-26 15:42:26.162690785 +0100 +++ gcc/testsuite/gcc.dg/vect/vect-simd-clone-13.c 2014-11-26 15:42:49.252278876 +0100 @@ -0,0 +1,7 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd -fcommon" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#include "vect-simd-clone-6.c" + +/* { dg-final { cleanup-tree-dump "vect" } } */ --- gcc/testsuite/gcc.dg/vect/vect-simd-clone-14.c.jj 2014-11-26 15:43:09.522919202 +0100 +++ gcc/testsuite/gcc.dg/vect/vect-simd-clone-14.c 2014-11-26 15:43:24.566652273 +0100 @@ -0,0 +1,7 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd -fcommon" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#include "vect-simd-clone-11.c" + +/* { dg-final { cleanup-tree-dump "vect" } } */ Jakub