When I fixed PR64909 I knew it wasn't a 100% precise fix. This bites us now in PR65660 (which also shows other issues). Thus the following precise fix for PR64909 (which didn't turn out as too involved) - compute a cost vector for the scalar iteration cost and apply it for peeled iterations.
Bootstrap and regtest running on x86_64-unknown-linux-gnu. I plan to apply this to trunk and to the 4.9 branch where I backported the original fix to as it fixes round-off errors which can lead to very unprofitable vectorizations. Richard. 2015-04-02 Richard Biener <rguent...@suse.de> PR tree-optimization/64909 PR tree-optimization/65660 * tree-vectorizer.h (vect_get_known_peeling_cost): Adjust to take a cost vector for scalar iteration cost. (vect_get_single_scalar_iteration_cost): Likewise. * tree-vect-loop.c (vect_get_single_scalar_iteration_cost): Compute the scalar iteration cost into a cost vector. (vect_get_known_peeling_cost): Use the scalar cost vector to account for the cost of the peeled iterations. (vect_estimate_min_profitable_iters): Likewise. * tree-vect-data-refs.c (vect_peeling_hash_get_lowest_cost): Likewise. Index: gcc/tree-vectorizer.h =================================================================== --- gcc/tree-vectorizer.h (revision 220580) +++ gcc/tree-vectorizer.h (working copy) @@ -1101,10 +1101,12 @@ extern bool vectorizable_reduction (gimp extern bool vectorizable_induction (gimple, gimple_stmt_iterator *, gimple *); extern tree get_initial_def_for_reduction (gimple, tree, tree *); extern int vect_min_worthwhile_factor (enum tree_code); -extern int vect_get_known_peeling_cost (loop_vec_info, int, int *, int, +extern int vect_get_known_peeling_cost (loop_vec_info, int, int *, + stmt_vector_for_cost *, stmt_vector_for_cost *, stmt_vector_for_cost *); -extern int vect_get_single_scalar_iteration_cost (loop_vec_info); +extern int vect_get_single_scalar_iteration_cost (loop_vec_info, + stmt_vector_for_cost *); /* In tree-vect-slp.c. */ extern void vect_free_slp_instance (slp_instance); Index: gcc/tree-vect-loop.c =================================================================== --- gcc/tree-vect-loop.c (revision 220580) +++ gcc/tree-vect-loop.c (working copy) @@ -2653,12 +2653,13 @@ vect_force_simple_reduction (loop_vec_in /* Calculate the cost of one scalar iteration of the loop. */ int -vect_get_single_scalar_iteration_cost (loop_vec_info loop_vinfo) +vect_get_single_scalar_iteration_cost (loop_vec_info loop_vinfo, + stmt_vector_for_cost *scalar_cost_vec) { struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0; - int innerloop_iters, i, stmt_cost; + int innerloop_iters, i; /* Count statements in scalar loop. Using this as scalar cost for a single iteration for now. @@ -2699,17 +2700,20 @@ vect_get_single_scalar_iteration_cost (l && !STMT_VINFO_IN_PATTERN_P (stmt_info)) continue; + vect_cost_for_stmt kind; if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))) { if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))) - stmt_cost = vect_get_stmt_cost (scalar_load); + kind = scalar_load; else - stmt_cost = vect_get_stmt_cost (scalar_store); + kind = scalar_store; } else - stmt_cost = vect_get_stmt_cost (scalar_stmt); + kind = scalar_stmt; - scalar_single_iter_cost += stmt_cost * factor; + scalar_single_iter_cost + += record_stmt_cost (scalar_cost_vec, factor, kind, + NULL, 0, vect_prologue); } } return scalar_single_iter_cost; @@ -2719,7 +2723,7 @@ vect_get_single_scalar_iteration_cost (l int vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue, int *peel_iters_epilogue, - int scalar_single_iter_cost, + stmt_vector_for_cost *scalar_cost_vec, stmt_vector_for_cost *prologue_cost_vec, stmt_vector_for_cost *epilogue_cost_vec) { @@ -2736,8 +2740,10 @@ vect_get_known_peeling_cost (loop_vec_in /* If peeled iterations are known but number of scalar loop iterations are unknown, count a taken branch per peeled loop. */ - retval = record_stmt_cost (prologue_cost_vec, 2, cond_branch_taken, + retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken, NULL, 0, vect_prologue); + retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken, + NULL, 0, vect_epilogue); } else { @@ -2751,14 +2757,21 @@ vect_get_known_peeling_cost (loop_vec_in *peel_iters_epilogue = vf; } + stmt_info_for_cost *si; + int j; if (peel_iters_prologue) - retval += record_stmt_cost (prologue_cost_vec, - peel_iters_prologue * scalar_single_iter_cost, - scalar_stmt, NULL, 0, vect_prologue); + FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si) + retval += record_stmt_cost (prologue_cost_vec, + si->count * peel_iters_prologue, + si->kind, NULL, si->misalign, + vect_prologue); if (*peel_iters_epilogue) - retval += record_stmt_cost (epilogue_cost_vec, - *peel_iters_epilogue * scalar_single_iter_cost, - scalar_stmt, NULL, 0, vect_epilogue); + FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si) + retval += record_stmt_cost (epilogue_cost_vec, + si->count * *peel_iters_epilogue, + si->kind, NULL, si->misalign, + vect_epilogue); + return retval; } @@ -2833,12 +2846,9 @@ vect_estimate_min_profitable_iters (loop TODO: Consider assigning different costs to different scalar statements. */ - scalar_single_iter_cost = vect_get_single_scalar_iteration_cost (loop_vinfo); - /* ??? Below we use this cost as number of stmts with scalar_stmt cost, - thus divide by that. This introduces rounding errors, thus better - introduce a new cost kind (raw_cost? scalar_iter_cost?). */ - int scalar_single_iter_stmts - = scalar_single_iter_cost / vect_get_stmt_cost (scalar_stmt); + auto_vec<stmt_info_for_cost> scalar_cost_vec; + scalar_single_iter_cost + = vect_get_single_scalar_iteration_cost (loop_vinfo, &scalar_cost_vec); /* Add additional cost for the peeled instructions in prologue and epilogue loop. @@ -2866,18 +2876,29 @@ vect_estimate_min_profitable_iters (loop branch per peeled loop. Even if scalar loop iterations are known, vector iterations are not known since peeled prologue iterations are not known. Hence guards remain the same. */ - (void) add_stmt_cost (target_cost_data, 2, cond_branch_taken, + (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken, NULL, 0, vect_prologue); - (void) add_stmt_cost (target_cost_data, 2, cond_branch_not_taken, + (void) add_stmt_cost (target_cost_data, 1, cond_branch_not_taken, NULL, 0, vect_prologue); - /* FORNOW: Don't attempt to pass individual scalar instructions to - the model; just assume linear cost for scalar iterations. */ - (void) add_stmt_cost (target_cost_data, - peel_iters_prologue * scalar_single_iter_stmts, - scalar_stmt, NULL, 0, vect_prologue); - (void) add_stmt_cost (target_cost_data, - peel_iters_epilogue * scalar_single_iter_stmts, - scalar_stmt, NULL, 0, vect_epilogue); + (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken, + NULL, 0, vect_epilogue); + (void) add_stmt_cost (target_cost_data, 1, cond_branch_not_taken, + NULL, 0, vect_epilogue); + stmt_info_for_cost *si; + int j; + FOR_EACH_VEC_ELT (scalar_cost_vec, j, si) + { + struct _stmt_vec_info *stmt_info + = si->stmt ? vinfo_for_stmt (si->stmt) : NULL; + (void) add_stmt_cost (target_cost_data, + si->count * peel_iters_prologue, + si->kind, stmt_info, si->misalign, + vect_prologue); + (void) add_stmt_cost (target_cost_data, + si->count * peel_iters_epilogue, + si->kind, stmt_info, si->misalign, + vect_epilogue); + } } else { @@ -2892,7 +2913,7 @@ vect_estimate_min_profitable_iters (loop (void) vect_get_known_peeling_cost (loop_vinfo, peel_iters_prologue, &peel_iters_epilogue, - scalar_single_iter_stmts, + &scalar_cost_vec, &prologue_cost_vec, &epilogue_cost_vec); Index: gcc/tree-vect-data-refs.c =================================================================== --- gcc/tree-vect-data-refs.c (revision 220580) +++ gcc/tree-vect-data-refs.c (working copy) @@ -1183,14 +1183,12 @@ vect_peeling_hash_get_lowest_cost (_vect SET_DR_MISALIGNMENT (dr, save_misalignment); } - single_iter_cost = vect_get_single_scalar_iteration_cost (loop_vinfo); + auto_vec<stmt_info_for_cost> scalar_cost_vec; + single_iter_cost + = vect_get_single_scalar_iteration_cost (loop_vinfo, &scalar_cost_vec); outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel, &dummy, - /* ??? We use this cost as number of stmts with scalar_stmt cost, - thus divide by that. This introduces rounding errors, thus better - introduce a new cost kind (raw_cost? scalar_iter_cost?). */ - single_iter_cost / vect_get_stmt_cost (scalar_stmt), - &prologue_cost_vec, &epilogue_cost_vec); + &scalar_cost_vec, &prologue_cost_vec, &epilogue_cost_vec); /* Prologue and epilogue costs are added to the target model later. These costs depend only on the scalar iteration cost, the