This splits out load permutation computation from SLP tree building. It also removes the broken support for swapping mismatched operands. If it ever triggers we'll ICE later because:
case vect_internal_def: ! if (different_types) ! { ! oprnd0_info = (*oprnds_info)[0]; ! oprnd1_info = (*oprnds_info)[0]; ! if (i == 0) ! oprnd1_info->def_stmts.quick_push (def_stmt); ! else ! oprnd0_info->def_stmts.quick_push (def_stmt); ! } pushes to the same operand vector twice ... The cases this tries to handle should all be canonicalized by reassoc earlier. I am going to re-instantiate more complete support for handling commutated operations in the next patch. Bootstrapped and tested on x86_64-unknown-linux-gnu, applied. Richard. 2013-04-10 Richard Biener <rguent...@suse.de> * tree-vect-slp.c (vect_get_and_check_slp_defs): Remove broken code swapping operands. (vect_build_slp_tree): Do not compute load permutations here. (vect_analyze_slp_instance): Compute load permutations here, after building the SLP tree. Index: gcc/tree-vect-slp.c =================================================================== *** gcc/tree-vect-slp.c (revision 197635) --- gcc/tree-vect-slp.c (working copy) *************** vect_get_and_check_slp_defs (loop_vec_in *** 204,218 **** { tree oprnd; unsigned int i, number_of_oprnds; ! tree def, def_op0 = NULL_TREE; gimple def_stmt; enum vect_def_type dt = vect_uninitialized_def; - enum vect_def_type dt_op0 = vect_uninitialized_def; struct loop *loop = NULL; - enum tree_code rhs_code; - bool different_types = false; bool pattern = false; ! slp_oprnd_info oprnd_info, oprnd0_info, oprnd1_info; int op_idx = 1; tree compare_rhs = NULL_TREE; --- 204,215 ---- { tree oprnd; unsigned int i, number_of_oprnds; ! tree def; gimple def_stmt; enum vect_def_type dt = vect_uninitialized_def; struct loop *loop = NULL; bool pattern = false; ! slp_oprnd_info oprnd_info; int op_idx = 1; tree compare_rhs = NULL_TREE; *************** vect_get_and_check_slp_defs (loop_vec_in *** 334,345 **** oprnd_info->first_def_type = NULL_TREE; oprnd_info->first_const_oprnd = oprnd; } - - if (i == 0) - { - def_op0 = def; - dt_op0 = dt; - } } else { --- 331,336 ---- *************** vect_get_and_check_slp_defs (loop_vec_in *** 357,413 **** TREE_TYPE (def)))) || (!def && !types_compatible_p (TREE_TYPE (oprnd_info->first_const_oprnd), ! TREE_TYPE (oprnd))) ! || different_types) { ! if (number_of_oprnds != 2) ! { ! if (dump_enabled_p ()) ! dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, ! "Build SLP failed: different types "); ! ! return false; ! } ! ! /* Try to swap operands in case of binary operation. */ ! if (i == 0) ! different_types = true; ! else ! { ! oprnd0_info = (*oprnds_info)[0]; ! if (is_gimple_assign (stmt) ! && (rhs_code = gimple_assign_rhs_code (stmt)) ! && TREE_CODE_CLASS (rhs_code) == tcc_binary ! && commutative_tree_code (rhs_code) ! && oprnd0_info->first_dt == dt ! && oprnd_info->first_dt == dt_op0 ! && def_op0 && def ! && !(oprnd0_info->first_def_type ! && !types_compatible_p (oprnd0_info->first_def_type, ! TREE_TYPE (def))) ! && !(oprnd_info->first_def_type ! && !types_compatible_p (oprnd_info->first_def_type, ! TREE_TYPE (def_op0)))) ! { ! if (dump_enabled_p ()) ! { ! dump_printf_loc (MSG_NOTE, vect_location, ! "Swapping operands of "); ! dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); ! } ! ! swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt), ! gimple_assign_rhs2_ptr (stmt)); ! } ! else ! { ! if (dump_enabled_p ()) ! dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, ! "Build SLP failed: different types "); ! return false; ! } ! } } } --- 348,360 ---- TREE_TYPE (def)))) || (!def && !types_compatible_p (TREE_TYPE (oprnd_info->first_const_oprnd), ! TREE_TYPE (oprnd)))) { ! if (dump_enabled_p ()) ! dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, ! "Build SLP failed: different types "); ! return false; } } *************** vect_get_and_check_slp_defs (loop_vec_in *** 420,437 **** break; case vect_internal_def: ! if (different_types) ! { ! oprnd0_info = (*oprnds_info)[0]; ! oprnd1_info = (*oprnds_info)[0]; ! if (i == 0) ! oprnd1_info->def_stmts.quick_push (def_stmt); ! else ! oprnd0_info->def_stmts.quick_push (def_stmt); ! } ! else ! oprnd_info->def_stmts.quick_push (def_stmt); ! break; default: --- 367,373 ---- break; case vect_internal_def: ! oprnd_info->def_stmts.quick_push (def_stmt); break; default: *************** static bool *** 460,468 **** vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, slp_tree *node, unsigned int group_size, unsigned int *max_nunits, - vec<int> *load_permutation, vec<slp_tree> *loads, ! unsigned int vectorization_factor, bool *loads_permuted) { unsigned int i; vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (*node); --- 396,403 ---- vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, slp_tree *node, unsigned int group_size, unsigned int *max_nunits, vec<slp_tree> *loads, ! unsigned int vectorization_factor) { unsigned int i; vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (*node); *************** vect_build_slp_tree (loop_vec_info loop_ *** 478,485 **** enum machine_mode vec_mode; struct data_reference *first_dr; HOST_WIDE_INT dummy; - bool permutation = false; - unsigned int load_place; gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL; vec<slp_oprnd_info> oprnds_info; unsigned int nops; --- 413,418 ---- *************** vect_build_slp_tree (loop_vec_info loop_ *** 839,854 **** } } - /* Store the place of this load in the interleaving chain. In - case that permutation is needed we later decide if a specific - permutation is supported. */ - load_place = vect_get_place_in_interleaving_chain (stmt, - first_load); - if (load_place != i) - permutation = true; - - load_permutation->safe_push (load_place); - /* We stop the tree when we reach a group of loads. */ stop_recursion = true; continue; --- 772,777 ---- *************** vect_build_slp_tree (loop_vec_info loop_ *** 925,941 **** if (stop_recursion) { loads->safe_push (*node); - if (permutation) - *loads_permuted = true; - else - { - /* We don't check here complex numbers chains, so we set - LOADS_PERMUTED for further check in - vect_supported_load_permutation_p. */ - if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR) - *loads_permuted = true; - } - vect_free_oprnd_info (oprnds_info); return true; } --- 848,853 ---- *************** vect_build_slp_tree (loop_vec_info loop_ *** 951,958 **** child = vect_create_new_slp_node (oprnd_info->def_stmts); if (!child || !vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, group_size, ! max_nunits, load_permutation, loads, ! vectorization_factor, loads_permuted)) { if (child) oprnd_info->def_stmts = vNULL; --- 863,870 ---- child = vect_create_new_slp_node (oprnd_info->def_stmts); if (!child || !vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, group_size, ! max_nunits, loads, ! vectorization_factor)) { if (child) oprnd_info->def_stmts = vNULL; *************** vect_analyze_slp_instance (loop_vec_info *** 1604,1613 **** unsigned int vectorization_factor = 0; int i; unsigned int max_nunits = 0; - vec<int> load_permutation; vec<slp_tree> loads; struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)); - bool loads_permuted = false; vec<gimple> scalar_stmts; if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))) --- 1516,1523 ---- *************** vect_analyze_slp_instance (loop_vec_info *** 1689,1701 **** node = vect_create_new_slp_node (scalar_stmts); - load_permutation.create (group_size * group_size); loads.create (group_size); /* Build the tree for the SLP instance. */ if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size, ! &max_nunits, &load_permutation, &loads, ! vectorization_factor, &loads_permuted)) { /* Calculate the unrolling factor based on the smallest type. */ if (max_nunits > nunits) --- 1599,1610 ---- node = vect_create_new_slp_node (scalar_stmts); loads.create (group_size); /* Build the tree for the SLP instance. */ if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size, ! &max_nunits, &loads, ! vectorization_factor)) { /* Calculate the unrolling factor based on the smallest type. */ if (max_nunits > nunits) *************** vect_analyze_slp_instance (loop_vec_info *** 1709,1715 **** "Build SLP failed: unrolling required in basic" " block SLP"); vect_free_slp_tree (node); - load_permutation.release (); loads.release (); return false; } --- 1618,1623 ---- *************** vect_analyze_slp_instance (loop_vec_info *** 1722,1731 **** SLP_INSTANCE_BODY_COST_VEC (new_instance) = vNULL; SLP_INSTANCE_LOADS (new_instance) = loads; SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL; ! SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation; if (loads_permuted) { if (!vect_supported_load_permutation_p (new_instance, group_size, load_permutation)) { --- 1630,1666 ---- SLP_INSTANCE_BODY_COST_VEC (new_instance) = vNULL; SLP_INSTANCE_LOADS (new_instance) = loads; SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL; ! SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = vNULL; ! ! /* Compute the load permutation. */ ! slp_tree load_node; ! bool loads_permuted = false; ! vec<int> load_permutation; ! load_permutation.create (group_size * group_size); ! FOR_EACH_VEC_ELT (loads, i, load_node) ! { ! int j; ! gimple load; ! FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load) ! { ! int load_place; ! load_place = vect_get_place_in_interleaving_chain ! (load, GROUP_FIRST_ELEMENT (vinfo_for_stmt (load))); ! if (load_place != j ! /* ??? We allow loads from different groups to ! get to here for a special case handled in ! the permutation code. Make sure we get to that. */ ! || (GROUP_FIRST_ELEMENT ! (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0])) ! != GROUP_FIRST_ELEMENT (vinfo_for_stmt (load)))) ! loads_permuted = true; ! load_permutation.safe_push (load_place); ! } ! } if (loads_permuted) { + SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation; if (!vect_supported_load_permutation_p (new_instance, group_size, load_permutation)) { *************** vect_analyze_slp_instance (loop_vec_info *** 1745,1751 **** = vect_find_first_load_in_slp_instance (new_instance); } else ! SLP_INSTANCE_LOAD_PERMUTATION (new_instance).release (); /* Compute the costs of this SLP instance. */ vect_analyze_slp_cost (loop_vinfo, bb_vinfo, --- 1680,1686 ---- = vect_find_first_load_in_slp_instance (new_instance); } else ! load_permutation.release (); /* Compute the costs of this SLP instance. */ vect_analyze_slp_cost (loop_vinfo, bb_vinfo, *************** vect_analyze_slp_instance (loop_vec_info *** 1765,1771 **** /* Failed to SLP. */ /* Free the allocated memory. */ vect_free_slp_tree (node); - load_permutation.release (); loads.release (); return false; --- 1700,1705 ----