vect_recog_rotate_pattern had code to prevent operations on invariants being vectorised unnecessarily:
if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME && is_a <loop_vec_info> (vinfo)) { struct loop *loop = as_a <loop_vec_info> (vinfo)->loop; ext_def = loop_preheader_edge (loop); if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1)) { basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1)); if (bb == NULL || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb)) ext_def = NULL; } } [..] if (ext_def) { basic_block new_bb = gsi_insert_on_edge_immediate (ext_def, def_stmt); gcc_assert (!new_bb); } This patch reuses the same idea for casts of invariants created during widening optimisations. One hitch was that vect_loop_versioning asserted that the vector loop preheader was still empty, although the cfg transformation it's doing should be correct either way. Tested on aarch64-linux-gnu and x86_64-linux-gnu. OK to install? Richard 2018-06-20 Richard Sandiford <richard.sandif...@arm.com> gcc/ * tree-vect-patterns.c (vect_get_external_def_edge): New function, split out from... (vect_recog_rotate_pattern): ...here. (vect_convert_input): Try to insert casts of invariants in the preheader. * tree-vect-loop-manip.c (vect_loop_versioning): Don't require the preheader to be empty. gcc/testsuite/ * gcc.dg/vect/vect-widen-mult-extern-1.c: New test. Index: gcc/tree-vect-patterns.c =================================================================== --- gcc/tree-vect-patterns.c 2018-06-20 11:26:11.409264955 +0100 +++ gcc/tree-vect-patterns.c 2018-06-20 11:26:16.089223672 +0100 @@ -123,6 +123,30 @@ new_pattern_def_seq (stmt_vec_info stmt_ append_pattern_def_seq (stmt_info, stmt); } +/* The caller wants to perform new operations on vect_external variable + VAR, so that the result of the operations would also be vect_external. + Return the edge on which the operations can be performed, if one exists. + Return null if the operations should instead be treated as part of + the pattern that needs them. */ + +static edge +vect_get_external_def_edge (vec_info *vinfo, tree var) +{ + edge e = NULL; + if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo)) + { + e = loop_preheader_edge (loop_vinfo->loop); + if (!SSA_NAME_IS_DEFAULT_DEF (var)) + { + basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var)); + if (bb == NULL + || !dominated_by_p (CDI_DOMINATORS, e->dest, bb)) + e = NULL; + } + } + return e; +} + /* Return true if the target supports a vector version of CODE, where CODE is known to map to a direct optab. ITYPE specifies the type of (some of) the scalar inputs and OTYPE specifies the @@ -561,6 +585,16 @@ vect_convert_input (stmt_vec_info stmt_i tree new_op = vect_recog_temp_ssa_var (type, NULL); gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, unprom->op); + /* If OP is an external value, see if we can insert the new statement + on an incoming edge. */ + if (unprom->dt == vect_external_def) + if (edge e = vect_get_external_def_edge (stmt_info->vinfo, unprom->op)) + { + basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt); + gcc_assert (!new_bb); + return new_op; + } + /* As a (common) last resort, add the statement to the pattern itself. */ append_pattern_def_seq (stmt_info, new_stmt, vectype); return new_op; @@ -1760,19 +1794,8 @@ vect_recog_rotate_pattern (vec<gimple *> *type_out = vectype; if (dt == vect_external_def - && TREE_CODE (oprnd1) == SSA_NAME - && is_a <loop_vec_info> (vinfo)) - { - struct loop *loop = as_a <loop_vec_info> (vinfo)->loop; - ext_def = loop_preheader_edge (loop); - if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1)) - { - basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1)); - if (bb == NULL - || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb)) - ext_def = NULL; - } - } + && TREE_CODE (oprnd1) == SSA_NAME) + ext_def = vect_get_external_def_edge (vinfo, oprnd1); def = NULL_TREE; scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type); Index: gcc/tree-vect-loop-manip.c =================================================================== --- gcc/tree-vect-loop-manip.c 2018-06-20 08:59:51.294580064 +0100 +++ gcc/tree-vect-loop-manip.c 2018-06-20 11:26:16.089223672 +0100 @@ -3034,8 +3034,9 @@ vect_loop_versioning (loop_vec_info loop while we need to move it above LOOP's preheader. */ e = loop_preheader_edge (loop); scalar_e = loop_preheader_edge (scalar_loop); - gcc_assert (empty_block_p (e->src) - && single_pred_p (e->src)); + /* The vector loop preheader might not be empty, since new + invariants could have been created while analyzing the loop. */ + gcc_assert (single_pred_p (e->src)); gcc_assert (empty_block_p (scalar_e->src) && single_pred_p (scalar_e->src)); gcc_assert (single_pred_p (condition_bb)); Index: gcc/testsuite/gcc.dg/vect/vect-widen-mult-extern-1.c =================================================================== --- /dev/null 2018-06-13 14:36:57.192460992 +0100 +++ gcc/testsuite/gcc.dg/vect/vect-widen-mult-extern-1.c 2018-06-20 11:26:16.089223672 +0100 @@ -0,0 +1,15 @@ +/* { dg-do compile } */ + +#define N 1024 + +void +f (unsigned int *x1, unsigned int *x2, unsigned short *y, unsigned char z) +{ + unsigned short zu = z; + for (int i = 0; i < N; ++i) + { + unsigned short yi = y[i]; + x1[i] = x1[i] > 10 ? yi * zu : x1[i] + 1; + x2[i] += 1; + } +}