The following patch optimizes the case where we decide to build up all operands from a SLP node from scalars to just build up the result from scalars. That's usually less expensive and in the PR65961 works around a case that we don't handle correctly (yet).
Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk. Richard. 2015-06-01 Richard Biener <rguent...@suse.de> PR tree-optimization/65961 * tree-vect-slp.c (vect_get_and_check_slp_defs): Remove bogus check and clarify dump message. (vect_build_slp_tree): If all children are built up from scalars build up the parent from scalars instead. * tree-vect-stmts.c (vect_is_simple_use): Cleanup. * gcc.dg/torture/pr65961.c: New testcase. Index: gcc/tree-vect-slp.c =================================================================== *** gcc/tree-vect-slp.c (revision 223974) --- gcc/tree-vect-slp.c (working copy) *************** again: *** 301,313 **** oprnd_info = (*oprnds_info)[i]; if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt, ! &def, &dt) ! || (!def_stmt && dt != vect_constant_def)) { if (dump_enabled_p ()) { dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, ! "Build SLP failed: can't find def for "); dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd); dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); } --- 301,312 ---- oprnd_info = (*oprnds_info)[i]; if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt, ! &def, &dt)) { if (dump_enabled_p ()) { dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, ! "Build SLP failed: can't analyze def for "); dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd); dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); } *************** vect_build_slp_tree (loop_vec_info loop_ *** 1092,1097 **** --- 1091,1125 ---- vectorization_factor, matches, npermutes, &this_tree_size, max_tree_size)) { + /* If we have all children of child built up from scalars then just + throw that away and build it up this node from scalars. */ + if (!SLP_TREE_CHILDREN (child).is_empty ()) + { + unsigned int j; + slp_tree grandchild; + + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild) + if (grandchild != NULL) + break; + if (!grandchild) + { + /* Roll back. */ + *max_nunits = old_max_nunits; + loads->truncate (old_nloads); + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild) + vect_free_slp_tree (grandchild); + SLP_TREE_CHILDREN (child).truncate (0); + + dump_printf_loc (MSG_NOTE, vect_location, + "Building parent vector operands from " + "scalars instead\n"); + oprnd_info->def_stmts = vNULL; + vect_free_slp_tree (child); + SLP_TREE_CHILDREN (*node).quick_push (NULL); + continue; + } + } + oprnd_info->def_stmts = vNULL; SLP_TREE_CHILDREN (*node).quick_push (child); continue; Index: gcc/tree-vect-stmts.c =================================================================== *** gcc/tree-vect-stmts.c (revision 223974) --- gcc/tree-vect-stmts.c (working copy) *************** vect_is_simple_use (tree operand, gimple *** 7878,7892 **** bb_vec_info bb_vinfo, gimple *def_stmt, tree *def, enum vect_def_type *dt) { - basic_block bb; - stmt_vec_info stmt_vinfo; - struct loop *loop = NULL; - - if (loop_vinfo) - loop = LOOP_VINFO_LOOP (loop_vinfo); - *def_stmt = NULL; *def = NULL_TREE; if (dump_enabled_p ()) { --- 7878,7886 ---- bb_vec_info bb_vinfo, gimple *def_stmt, tree *def, enum vect_def_type *dt) { *def_stmt = NULL; *def = NULL_TREE; + *dt = vect_unknown_def_type; if (dump_enabled_p ()) { *************** vect_is_simple_use (tree operand, gimple *** 7909,7921 **** return true; } - if (TREE_CODE (operand) == PAREN_EXPR) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.\n"); - operand = TREE_OPERAND (operand, 0); - } - if (TREE_CODE (operand) != SSA_NAME) { if (dump_enabled_p ()) --- 7903,7908 ---- *************** vect_is_simple_use (tree operand, gimple *** 7924,7963 **** return false; } ! *def_stmt = SSA_NAME_DEF_STMT (operand); ! if (*def_stmt == NULL) { ! if (dump_enabled_p ()) ! dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, ! "no def_stmt.\n"); ! return false; } if (dump_enabled_p ()) { dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: "); dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0); } ! /* Empty stmt is expected only in case of a function argument. ! (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */ ! if (gimple_nop_p (*def_stmt)) ! { ! *def = operand; ! *dt = vect_external_def; ! return true; ! } ! ! bb = gimple_bb (*def_stmt); ! ! if ((loop && !flow_bb_inside_loop_p (loop, bb)) ! || (!loop && bb != BB_VINFO_BB (bb_vinfo)) ! || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI)) *dt = vect_external_def; else { ! stmt_vinfo = vinfo_for_stmt (*def_stmt); ! if (!loop && !STMT_VINFO_VECTORIZABLE (stmt_vinfo)) *dt = vect_external_def; else *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo); --- 7911,7940 ---- return false; } ! if (SSA_NAME_IS_DEFAULT_DEF (operand)) { ! *def = operand; ! *dt = vect_external_def; ! return true; } + *def_stmt = SSA_NAME_DEF_STMT (operand); if (dump_enabled_p ()) { dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: "); dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0); } ! basic_block bb = gimple_bb (*def_stmt); ! if ((loop_vinfo && !flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo), bb)) ! || (bb_vinfo ! && (bb != BB_VINFO_BB (bb_vinfo) ! || gimple_code (*def_stmt) == GIMPLE_PHI))) *dt = vect_external_def; else { ! stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt); ! if (bb_vinfo && !STMT_VINFO_VECTORIZABLE (stmt_vinfo)) *dt = vect_external_def; else *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo); Index: gcc/testsuite/gcc.dg/torture/pr65961.c =================================================================== *** gcc/testsuite/gcc.dg/torture/pr65961.c (revision 0) --- gcc/testsuite/gcc.dg/torture/pr65961.c (working copy) *************** *** 0 **** --- 1,20 ---- + /* { dg-do compile } */ + + int *a; + void + foo () + { + do + { + a[16] = (a[1] ^ a[0]) << 1 | a[1]; + a[17] = (a[0] ^ a[1]) << 1 | a[0]; + a[18] = (a[0] ^ a[1]) << 1 | a[0]; + a[19] = (a[0] ^ a[1]) << 1 | a[0]; + a[20] = (a[0] ^ a[1]) << 1 | a[0]; + a[21] = (a[0] ^ a[1]) << 1 | a[0]; + a[22] = (a[0] ^ a[1]) << 1 | a[0]; + a[23] = (a[20] ^ a[1]) << 1 | a[9]; + a += 8; + } + while (1); + }