Hi, this is the udpated patch I comitted after testing. I suppose we will need to find way to make SOC smaller for simple loops - it is way too overestimated currently.
* tree-vectorizer.h (vect_estimate_min_profitable_iters): Remove. * tree-vect-loop.c (vect_estimate_min_profitable_iters): Declare here. (vect_analyze_loop_operations): Use loop count estimate to rule out unprofitable vectorization. (vect_estimate_min_profitable_iters): Return ret_min_profitable_estimate. Index: tree-vectorizer.h =================================================================== *** tree-vectorizer.h (revision 192114) --- tree-vectorizer.h (working copy) *************** extern bool vectorizable_live_operation *** 976,982 **** extern bool vectorizable_reduction (gimple, gimple_stmt_iterator *, gimple *, slp_tree); extern bool vectorizable_induction (gimple, gimple_stmt_iterator *, gimple *); - extern int vect_estimate_min_profitable_iters (loop_vec_info); extern tree get_initial_def_for_reduction (gimple, tree, tree *); extern int vect_min_worthwhile_factor (enum tree_code); extern int vect_get_known_peeling_cost (loop_vec_info, int, int *, int, --- 976,981 ---- Index: tree-vect-loop.c =================================================================== *** tree-vect-loop.c (revision 192114) --- tree-vect-loop.c (working copy) *************** along with GCC; see the file COPYING3. *** 140,145 **** --- 140,147 ---- http://gcc.gnu.org/projects/tree-ssa/vectorization.html */ + static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *); + /* Function vect_determine_vectorization_factor Determine the vectorization factor (VF). VF is the number of data elements *************** vect_analyze_loop_operations (loop_vec_i *** 1287,1292 **** --- 1289,1296 ---- unsigned int th; bool only_slp_in_loop = true, ok; HOST_WIDE_INT max_niter; + HOST_WIDE_INT estimated_niter; + int min_profitable_estimate; if (dump_kind_p (MSG_NOTE)) dump_printf_loc (MSG_NOTE, vect_location, *************** vect_analyze_loop_operations (loop_vec_i *** 1490,1496 **** vector stmts depends on VF. */ vect_update_slp_costs_according_to_vf (loop_vinfo); ! min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo); LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters; if (min_profitable_iters < 0) --- 1494,1501 ---- vector stmts depends on VF. */ vect_update_slp_costs_according_to_vf (loop_vinfo); ! vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters, ! &min_profitable_estimate); LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters; if (min_profitable_iters < 0) *************** vect_analyze_loop_operations (loop_vec_i *** 1531,1536 **** --- 1537,1559 ---- return false; } + if ((estimated_niter = estimated_stmt_executions_int (loop)) != -1 + && ((unsigned HOST_WIDE_INT) estimated_niter + <= MAX (th, (unsigned)min_profitable_estimate))) + { + if (dump_kind_p (MSG_MISSED_OPTIMIZATION)) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "not vectorized: estimated iteration count too " + "small."); + if (dump_kind_p (MSG_NOTE)) + dump_printf_loc (MSG_NOTE, vect_location, + "not vectorized: estimated iteration count smaller " + "than specified loop bound parameter or minimum " + "profitable iterations (whichever is more " + "conservative)."); + return false; + } + if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo)) *************** vect_get_known_peeling_cost (loop_vec_in *** 2603,2617 **** Return the number of iterations required for the vector version of the loop to be profitable relative to the cost of the scalar version of the ! loop. ! TODO: Take profile info into account before making vectorization ! decisions, if available. */ ! ! int ! vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) { int min_profitable_iters; int peel_iters_prologue; int peel_iters_epilogue; unsigned vec_inside_cost = 0; --- 2626,2640 ---- Return the number of iterations required for the vector version of the loop to be profitable relative to the cost of the scalar version of the ! loop. */ ! static void ! vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo, ! int *ret_min_profitable_niters, ! int *ret_min_profitable_estimate) { int min_profitable_iters; + int min_profitable_estimate; int peel_iters_prologue; int peel_iters_epilogue; unsigned vec_inside_cost = 0; *************** vect_estimate_min_profitable_iters (loop *** 2628,2634 **** if (!flag_vect_cost_model) { dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled."); ! return 0; } /* Requires loop versioning tests to handle misalignment. */ --- 2651,2659 ---- if (!flag_vect_cost_model) { dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled."); ! *ret_min_profitable_niters = 0; ! *ret_min_profitable_estimate = 0; ! return; } /* Requires loop versioning tests to handle misalignment. */ *************** vect_estimate_min_profitable_iters (loop *** 2863,2869 **** "divided by the scalar iteration cost = %d " "is greater or equal to the vectorization factor = %d.", vec_inside_cost, scalar_single_iter_cost, vf); ! return -1; } if (dump_kind_p (MSG_NOTE)) --- 2888,2896 ---- "divided by the scalar iteration cost = %d " "is greater or equal to the vectorization factor = %d.", vec_inside_cost, scalar_single_iter_cost, vf); ! *ret_min_profitable_niters = -1; ! *ret_min_profitable_estimate = -1; ! return; } if (dump_kind_p (MSG_NOTE)) *************** vect_estimate_min_profitable_iters (loop *** 2879,2884 **** --- 2906,2913 ---- scalar_single_iter_cost); dump_printf (MSG_NOTE, " Scalar outside cost: %d\n", scalar_outside_cost); + dump_printf (MSG_NOTE, " Vector outside cost: %d\n", + vec_outside_cost); dump_printf (MSG_NOTE, " prologue iterations: %d\n", peel_iters_prologue); dump_printf (MSG_NOTE, " epilogue iterations: %d\n", *************** vect_estimate_min_profitable_iters (loop *** 2898,2906 **** if (dump_kind_p (MSG_NOTE)) dump_printf_loc (MSG_NOTE, vect_location, ! " Profitability threshold = %d\n", min_profitable_iters); ! return min_profitable_iters; } --- 2927,2961 ---- if (dump_kind_p (MSG_NOTE)) dump_printf_loc (MSG_NOTE, vect_location, ! " Runtime profitability threshold = %d\n", min_profitable_iters); ! ! *ret_min_profitable_niters = min_profitable_iters; ! ! /* Calculate number of iterations required to make the vector version ! profitable, relative to the loop bodies only. ! ! Non-vectorized variant is SIC * niters and it must win over vector ! variant on the expected loop trip count. The following condition must hold true: ! SIC * niters > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC + SOC */ ! ! if (vec_outside_cost <= 0) ! min_profitable_estimate = 1; ! else ! { ! min_profitable_estimate = ((vec_outside_cost + scalar_outside_cost) * vf ! - vec_inside_cost * peel_iters_prologue ! - vec_inside_cost * peel_iters_epilogue) ! / ((scalar_single_iter_cost * vf) ! - vec_inside_cost); ! } ! min_profitable_estimate --; ! min_profitable_estimate = MAX (min_profitable_estimate, min_profitable_iters); ! if (dump_kind_p (MSG_NOTE)) ! dump_printf_loc (MSG_NOTE, vect_location, ! " Static estimate profitability threshold = %d\n", ! min_profitable_iters); ! *ret_min_profitable_estimate = min_profitable_estimate; }