This removes the broken function from tree-scalar-evolution.c and re-implements it inside the now single user (but unfixed). It also re-shuffles the vectorizer niter code some more to make the final fix (use # of latch executions throughout) more easy.
Bootstrapped and tested on x86_64-unknown-linux-gnu, applied. Richard. 2013-11-21 Richard Biener <[email protected]> PR tree-optimization/59058 * tree-scalar-evolution.h (number_of_exit_cond_executions): Remove. * tree-scalar-evolution.c (number_of_exit_cond_executions): Likewise. * tree-vectorizer.h (LOOP_PEELING_FOR_ALIGNMENT): Rename to ... (LOOP_VINFO_PEELING_FOR_ALIGNMENT): ... this. (NITERS_KNOWN_P): Fold into ... (LOOP_VINFO_NITERS_KNOWN_P): ... this. (LOOP_VINFO_PEELING_FOR_NITER): Add. * tree-vect-loop-manip.c (vect_gen_niters_for_prolog_loop): Use LOOP_VINFO_PEELING_FOR_ALIGNMENT. (vect_do_peeling_for_alignment): Re-use precomputed niter instead of re-emitting it. * tree-vect-data-refs.c (vect_enhance_data_refs_alignment): Use LOOP_VINFO_PEELING_FOR_ALIGNMENT. * tree-vect-loop.c (vect_get_loop_niters): Use number_of_latch_executions. (new_loop_vec_info): Initialize LOOP_VINFO_PEELING_FOR_NITER. (vect_analyze_loop_form): Simplify. (vect_analyze_loop_operations): Move epilogue peeling code ... (vect_analyze_loop_2): ... here and adjust it to compute LOOP_VINFO_PEELING_FOR_NITER. (vect_estimate_min_profitable_iters): Use LOOP_VINFO_PEELING_FOR_ALIGNMENT. (vect_build_loop_niters): Emit on the preheader. (vect_generate_tmps_on_preheader): Likewise. (vect_transform_loop): Use LOOP_VINFO_PEELING_FOR_NITER instead of recomputing it. Adjust. Index: gcc/tree-vect-loop-manip.c =================================================================== *** gcc/tree-vect-loop-manip.c.orig 2013-11-21 14:58:43.061653802 +0100 --- gcc/tree-vect-loop-manip.c 2013-11-21 14:58:51.151747654 +0100 *************** vect_gen_niters_for_prolog_loop (loop_ve *** 1736,1751 **** pe = loop_preheader_edge (loop); ! if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0) { ! int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "known peeling = %d.\n", npeel); iters = build_int_cst (niters_type, npeel); ! *bound = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo); } else { --- 1736,1751 ---- pe = loop_preheader_edge (loop); ! if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0) { ! int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "known peeling = %d.\n", npeel); iters = build_int_cst (niters_type, npeel); ! *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo); } else { *************** vect_do_peeling_for_alignment (loop_vec_ *** 1876,1882 **** { struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); tree niters_of_prolog_loop; - tree n_iters; tree wide_prolog_niters; struct loop *new_loop; int max_iter; --- 1876,1881 ---- *************** vect_do_peeling_for_alignment (loop_vec_ *** 1918,1926 **** "loop to %d\n", max_iter); /* Update number of times loop executes. */ - n_iters = LOOP_VINFO_NITERS (loop_vinfo); LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR, ! TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop); if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop))) wide_prolog_niters = niters_of_prolog_loop; --- 1917,1924 ---- "loop to %d\n", max_iter); /* Update number of times loop executes. */ LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR, ! TREE_TYPE (ni_name), ni_name, niters_of_prolog_loop); if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop))) wide_prolog_niters = niters_of_prolog_loop; Index: gcc/tree-vectorizer.h =================================================================== *** gcc/tree-vectorizer.h.orig 2013-11-21 14:58:43.062653811 +0100 --- gcc/tree-vectorizer.h 2013-11-21 14:58:51.153747678 +0100 *************** typedef struct _loop_vec_info { *** 361,367 **** #define LOOP_VINFO_DATAREFS(L) (L)->datarefs #define LOOP_VINFO_DDRS(L) (L)->ddrs #define LOOP_VINFO_INT_NITERS(L) (TREE_INT_CST_LOW ((L)->num_iters)) ! #define LOOP_PEELING_FOR_ALIGNMENT(L) (L)->peeling_for_alignment #define LOOP_VINFO_UNALIGNED_DR(L) (L)->unaligned_dr #define LOOP_VINFO_MAY_MISALIGN_STMTS(L) (L)->may_misalign_stmts #define LOOP_VINFO_MAY_ALIAS_DDRS(L) (L)->may_alias_ddrs --- 361,367 ---- #define LOOP_VINFO_DATAREFS(L) (L)->datarefs #define LOOP_VINFO_DDRS(L) (L)->ddrs #define LOOP_VINFO_INT_NITERS(L) (TREE_INT_CST_LOW ((L)->num_iters)) ! #define LOOP_VINFO_PEELING_FOR_ALIGNMENT(L) (L)->peeling_for_alignment #define LOOP_VINFO_UNALIGNED_DR(L) (L)->unaligned_dr #define LOOP_VINFO_MAY_MISALIGN_STMTS(L) (L)->may_misalign_stmts #define LOOP_VINFO_MAY_ALIAS_DDRS(L) (L)->may_alias_ddrs *************** typedef struct _loop_vec_info { *** 375,392 **** #define LOOP_VINFO_TARGET_COST_DATA(L) (L)->target_cost_data #define LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps #define LOOP_VINFO_OPERANDS_SWAPPED(L) (L)->operands_swapped #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \ ! (L)->may_misalign_stmts.length () > 0 #define LOOP_REQUIRES_VERSIONING_FOR_ALIAS(L) \ ! (L)->may_alias_ddrs.length () > 0 ! ! #define NITERS_KNOWN_P(n) \ ! (tree_fits_shwi_p ((n)) \ ! && tree_to_shwi ((n)) > 0) #define LOOP_VINFO_NITERS_KNOWN_P(L) \ ! NITERS_KNOWN_P ((L)->num_iters) static inline loop_vec_info loop_vec_info_for_loop (struct loop *loop) --- 375,389 ---- #define LOOP_VINFO_TARGET_COST_DATA(L) (L)->target_cost_data #define LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps #define LOOP_VINFO_OPERANDS_SWAPPED(L) (L)->operands_swapped + #define LOOP_VINFO_PEELING_FOR_NITER(L) (L)->peeling_for_niter #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \ ! (L)->may_misalign_stmts.length () > 0 #define LOOP_REQUIRES_VERSIONING_FOR_ALIAS(L) \ ! (L)->may_alias_ddrs.length () > 0 #define LOOP_VINFO_NITERS_KNOWN_P(L) \ ! (tree_fits_shwi_p ((L)->num_iters) && tree_to_shwi ((L)->num_iters) > 0) static inline loop_vec_info loop_vec_info_for_loop (struct loop *loop) Index: gcc/tree-scalar-evolution.c =================================================================== *** gcc/tree-scalar-evolution.c.orig 2013-11-21 14:48:17.710397038 +0100 --- gcc/tree-scalar-evolution.c 2013-11-21 14:58:51.148747619 +0100 *************** number_of_latch_executions (struct loop *** 2910,2943 **** loop->nb_iterations = res; return res; } - - /* Returns the number of executions of the exit condition of LOOP, - i.e., the number by one higher than number_of_latch_executions. - Note that unlike number_of_latch_executions, this number does - not necessarily fit in the unsigned variant of the type of - the control variable -- if the number of iterations is a constant, - we return chrec_dont_know if adding one to number_of_latch_executions - overflows; however, in case the number of iterations is symbolic - expression, the caller is responsible for dealing with this - the possible overflow. */ - - tree - number_of_exit_cond_executions (struct loop *loop) - { - tree ret = number_of_latch_executions (loop); - tree type = chrec_type (ret); - - if (chrec_contains_undetermined (ret)) - return ret; - - ret = chrec_fold_plus (type, ret, build_int_cst (type, 1)); - if (TREE_CODE (ret) == INTEGER_CST - && TREE_OVERFLOW (ret)) - return chrec_dont_know; - - return ret; - } - /* Counters for the stats. */ --- 2910,2915 ---- Index: gcc/tree-scalar-evolution.h =================================================================== *** gcc/tree-scalar-evolution.h.orig 2013-11-21 14:48:17.709397027 +0100 --- gcc/tree-scalar-evolution.h 2013-11-21 14:58:51.148747619 +0100 *************** along with GCC; see the file COPYING3. *** 22,28 **** #define GCC_TREE_SCALAR_EVOLUTION_H extern tree number_of_latch_executions (struct loop *); - extern tree number_of_exit_cond_executions (struct loop *); extern gimple get_loop_exit_condition (const struct loop *); extern void scev_initialize (void); --- 22,27 ---- Index: gcc/tree-vect-data-refs.c =================================================================== *** gcc/tree-vect-data-refs.c.orig 2013-11-21 14:48:17.709397027 +0100 --- gcc/tree-vect-data-refs.c 2013-11-21 14:58:51.150747642 +0100 *************** vect_enhance_data_refs_alignment (loop_v *** 1735,1743 **** LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0; if (npeel) ! LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel; else ! LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0); SET_DR_MISALIGNMENT (dr0, 0); if (dump_enabled_p ()) { --- 1735,1744 ---- LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0; if (npeel) ! LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel; else ! LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) ! = DR_MISALIGNMENT (dr0); SET_DR_MISALIGNMENT (dr0, 0); if (dump_enabled_p ()) { Index: gcc/tree-vect-loop.c =================================================================== *** gcc/tree-vect-loop.c.orig 2013-11-21 14:48:17.709397027 +0100 --- gcc/tree-vect-loop.c 2013-11-21 14:58:51.152747666 +0100 *************** vect_analyze_scalar_cycles (loop_vec_inf *** 771,781 **** vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner); } /* Function vect_get_loop_niters. ! Determine how many iterations the loop is executed. ! If an expression that represents the number of iterations ! can be constructed, place it in NUMBER_OF_ITERATIONS. Return the loop exit condition. */ static gimple --- 771,782 ---- vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner); } + /* Function vect_get_loop_niters. ! Determine how many iterations the loop is executed and place it ! in NUMBER_OF_ITERATIONS. ! Return the loop exit condition. */ static gimple *************** vect_get_loop_niters (struct loop *loop, *** 786,805 **** if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "=== get_loop_niters ===\n"); - niters = number_of_exit_cond_executions (loop); ! if (niters != NULL_TREE ! && niters != chrec_dont_know) ! { ! *number_of_iterations = niters; ! ! if (dump_enabled_p ()) ! { ! dump_printf_loc (MSG_NOTE, vect_location, "==> get_loop_niters:"); ! dump_generic_expr (MSG_NOTE, TDF_SLIM, *number_of_iterations); ! dump_printf (MSG_NOTE, "\n"); ! } ! } return get_loop_exit_condition (loop); } --- 787,802 ---- if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "=== get_loop_niters ===\n"); ! niters = number_of_latch_executions (loop); ! /* We want the number of loop header executions which is the number ! of latch executions plus one. ! ??? For UINT_MAX latch executions this number overflows to zero ! for loops like do { n++; } while (n != 0); */ ! if (niters && !chrec_contains_undetermined (niters)) ! niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters), niters, ! build_int_cst (TREE_TYPE (niters), 1)); ! *number_of_iterations = niters; return get_loop_exit_condition (loop); } *************** new_loop_vec_info (struct loop *loop) *** 907,913 **** LOOP_VINFO_NITERS_UNCHANGED (res) = NULL; LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0; LOOP_VINFO_VECTORIZABLE_P (res) = 0; ! LOOP_PEELING_FOR_ALIGNMENT (res) = 0; LOOP_VINFO_VECT_FACTOR (res) = 0; LOOP_VINFO_LOOP_NEST (res).create (3); LOOP_VINFO_DATAREFS (res).create (10); --- 904,910 ---- LOOP_VINFO_NITERS_UNCHANGED (res) = NULL; LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0; LOOP_VINFO_VECTORIZABLE_P (res) = 0; ! LOOP_VINFO_PEELING_FOR_ALIGNMENT (res) = 0; LOOP_VINFO_VECT_FACTOR (res) = 0; LOOP_VINFO_LOOP_NEST (res).create (3); LOOP_VINFO_DATAREFS (res).create (10); *************** new_loop_vec_info (struct loop *loop) *** 924,929 **** --- 921,927 ---- LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1; LOOP_VINFO_TARGET_COST_DATA (res) = init_cost (loop); LOOP_VINFO_PEELING_FOR_GAPS (res) = false; + LOOP_VINFO_PEELING_FOR_NITER (res) = false; LOOP_VINFO_OPERANDS_SWAPPED (res) = false; return res; *************** vect_analyze_loop_form (struct loop *loo *** 1091,1102 **** } if (empty_block_p (loop->header)) ! { ! if (dump_enabled_p ()) ! dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "not vectorized: empty loop.\n"); ! return NULL; ! } } else { --- 1089,1100 ---- } if (empty_block_p (loop->header)) ! { ! if (dump_enabled_p ()) ! dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "not vectorized: empty loop.\n"); ! return NULL; ! } } else { *************** vect_analyze_loop_form (struct loop *loo *** 1243,1249 **** return NULL; } ! if (!number_of_iterations) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, --- 1241,1248 ---- return NULL; } ! if (!number_of_iterations ! || chrec_contains_undetermined (number_of_iterations)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, *************** vect_analyze_loop_form (struct loop *loo *** 1254,1270 **** return NULL; } ! if (chrec_contains_undetermined (number_of_iterations)) { if (dump_enabled_p ()) ! dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, ! "Infinite number of iterations.\n"); if (inner_loop_vinfo) ! destroy_loop_vec_info (inner_loop_vinfo, true); return NULL; } ! if (!NITERS_KNOWN_P (number_of_iterations)) { if (dump_enabled_p ()) { --- 1253,1273 ---- return NULL; } ! if (integer_zerop (number_of_iterations)) { if (dump_enabled_p ()) ! dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, ! "not vectorized: number of iterations = 0.\n"); if (inner_loop_vinfo) ! destroy_loop_vec_info (inner_loop_vinfo, true); return NULL; } ! loop_vinfo = new_loop_vec_info (loop); ! LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations; ! LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations; ! ! if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) { if (dump_enabled_p ()) { *************** vect_analyze_loop_form (struct loop *loo *** 1274,1292 **** dump_printf (MSG_NOTE, "\n"); } } - else if (TREE_INT_CST_LOW (number_of_iterations) == 0) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "not vectorized: number of iterations = 0.\n"); - if (inner_loop_vinfo) - destroy_loop_vec_info (inner_loop_vinfo, true); - return NULL; - } - - loop_vinfo = new_loop_vec_info (loop); - LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations; - LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations; STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type; --- 1277,1282 ---- *************** vect_analyze_loop_operations (loop_vec_i *** 1588,1610 **** return false; } - if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) - || ((int) tree_ctz (LOOP_VINFO_NITERS (loop_vinfo)) - < exact_log2 (vectorization_factor))) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required\n"); - if (!vect_can_advance_ivs_p (loop_vinfo) - || !slpeel_can_duplicate_loop_p (loop, single_exit (loop))) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "not vectorized: can't create required " - "epilog loop\n"); - return false; - } - } - return true; } --- 1578,1583 ---- *************** vect_analyze_loop_2 (loop_vec_info loop_ *** 1760,1765 **** --- 1733,1772 ---- return false; } + /* Decide whether we need to create an epilogue loop to handle + remaining scalar iterations. */ + if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) + && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0) + { + if (ctz_hwi (LOOP_VINFO_INT_NITERS (loop_vinfo) + - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)) + < exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))) + LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true; + } + else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) + || (tree_ctz (LOOP_VINFO_NITERS (loop_vinfo)) + < (unsigned)exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo)))) + LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true; + + /* If an epilogue loop is required make sure we can create one. */ + if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) + || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required\n"); + if (!vect_can_advance_ivs_p (loop_vinfo) + || !slpeel_can_duplicate_loop_p (LOOP_VINFO_LOOP (loop_vinfo), + single_exit (LOOP_VINFO_LOOP + (loop_vinfo)))) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "not vectorized: can't create required " + "epilog loop\n"); + return false; + } + } + return true; } *************** vect_estimate_min_profitable_iters (loop *** 2689,2695 **** int scalar_single_iter_cost = 0; int scalar_outside_cost = 0; int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); ! int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo); void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo); /* Cost model disabled. */ --- 2696,2702 ---- int scalar_single_iter_cost = 0; int scalar_outside_cost = 0; int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); ! int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo); void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo); /* Cost model disabled. */ *************** vect_estimate_min_profitable_iters (loop *** 2880,2886 **** else { /* Cost model check occurs at prologue generation. */ ! if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0) scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken) + vect_get_stmt_cost (cond_branch_not_taken); /* Cost model check occurs at epilogue generation. */ --- 2887,2893 ---- else { /* Cost model check occurs at prologue generation. */ ! if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0) scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken) + vect_get_stmt_cost (cond_branch_not_taken); /* Cost model check occurs at epilogue generation. */ *************** vect_loop_kill_debug_uses (struct loop * *** 5574,5620 **** /* This function builds ni_name = number of iterations. Statements ! are queued onto SEQ. */ static tree ! vect_build_loop_niters (loop_vec_info loop_vinfo, gimple_seq *seq) { - tree ni_name, var; - gimple_seq stmts = NULL; tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo)); ! var = create_tmp_var (TREE_TYPE (ni), "niters"); ! ni_name = force_gimple_operand (ni, &stmts, false, var); ! ! if (stmts) ! gimple_seq_add_seq (seq, stmts); ! return ni_name; } /* This function generates the following statements: ! ni_name = number of iterations loop executes ! ratio = ni_name / vf ! ratio_mult_vf_name = ratio * vf ! and places them in COND_EXPR_STMT_LIST. */ static void vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, tree ni_name, tree *ratio_mult_vf_name_ptr, ! tree *ratio_name_ptr, ! gimple_seq *cond_expr_stmt_list) { - gimple_seq stmts; tree ni_minus_gap_name; tree var; tree ratio_name; tree ratio_mult_vf_name; tree ni = LOOP_VINFO_NITERS (loop_vinfo); int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); tree log_vf; log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf)); --- 5581,5631 ---- /* This function builds ni_name = number of iterations. Statements ! are emitted on the loop preheader edge. */ static tree ! vect_build_loop_niters (loop_vec_info loop_vinfo) { tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo)); + if (TREE_CODE (ni) == INTEGER_CST) + return ni; + else + { + tree ni_name, var; + gimple_seq stmts = NULL; + edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo)); ! var = create_tmp_var (TREE_TYPE (ni), "niters"); ! ni_name = force_gimple_operand (ni, &stmts, false, var); ! if (stmts) ! gsi_insert_seq_on_edge_immediate (pe, stmts); ! return ni_name; ! } } /* This function generates the following statements: ! ni_name = number of iterations loop executes ! ratio = ni_name / vf ! ratio_mult_vf_name = ratio * vf ! and places them on the loop preheader edge. */ static void vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, tree ni_name, tree *ratio_mult_vf_name_ptr, ! tree *ratio_name_ptr) { tree ni_minus_gap_name; tree var; tree ratio_name; tree ratio_mult_vf_name; tree ni = LOOP_VINFO_NITERS (loop_vinfo); int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo)); tree log_vf; log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf)); *************** vect_generate_tmps_on_preheader (loop_ve *** 5630,5640 **** if (!is_gimple_val (ni_minus_gap_name)) { var = create_tmp_var (TREE_TYPE (ni), "ni_gap"); ! ! stmts = NULL; ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts, true, var); ! gimple_seq_add_seq (cond_expr_stmt_list, stmts); } } else --- 5641,5650 ---- if (!is_gimple_val (ni_minus_gap_name)) { var = create_tmp_var (TREE_TYPE (ni), "ni_gap"); ! gimple stmts = NULL; ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts, true, var); ! gsi_insert_seq_on_edge_immediate (pe, stmts); } } else *************** vect_generate_tmps_on_preheader (loop_ve *** 5647,5656 **** if (!is_gimple_val (ratio_name)) { var = create_tmp_var (TREE_TYPE (ni), "bnd"); ! ! stmts = NULL; ratio_name = force_gimple_operand (ratio_name, &stmts, true, var); ! gimple_seq_add_seq (cond_expr_stmt_list, stmts); } *ratio_name_ptr = ratio_name; --- 5657,5665 ---- if (!is_gimple_val (ratio_name)) { var = create_tmp_var (TREE_TYPE (ni), "bnd"); ! gimple stmts = NULL; ratio_name = force_gimple_operand (ratio_name, &stmts, true, var); ! gsi_insert_seq_on_edge_immediate (pe, stmts); } *ratio_name_ptr = ratio_name; *************** vect_generate_tmps_on_preheader (loop_ve *** 5663,5673 **** if (!is_gimple_val (ratio_mult_vf_name)) { var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf"); ! ! stmts = NULL; ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts, true, var); ! gimple_seq_add_seq (cond_expr_stmt_list, stmts); } *ratio_mult_vf_name_ptr = ratio_mult_vf_name; } --- 5672,5681 ---- if (!is_gimple_val (ratio_mult_vf_name)) { var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf"); ! gimple stmts = NULL; ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts, true, var); ! gsi_insert_seq_on_edge_immediate (pe, stmts); } *ratio_mult_vf_name_ptr = ratio_mult_vf_name; } *************** vect_transform_loop (loop_vec_info loop_ *** 5739,5758 **** check_profitability = false; } /* Peel the loop if there are data refs with unknown alignment. ! Only one data ref with unknown store is allowed. ! This clobbers LOOP_VINFO_NITERS but retains the original ! in LOOP_VINFO_NITERS_UNCHANGED. So we cannot avoid re-computing ! niters. */ ! if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo)) { - gimple_seq stmts = NULL; - tree ni_name = vect_build_loop_niters (loop_vinfo, &stmts); - gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); vect_do_peeling_for_alignment (loop_vinfo, ni_name, th, check_profitability); check_profitability = false; } /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a --- 5747,5766 ---- check_profitability = false; } + tree ni_name = vect_build_loop_niters (loop_vinfo); + LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = ni_name; + /* Peel the loop if there are data refs with unknown alignment. ! Only one data ref with unknown store is allowed. */ ! if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)) { vect_do_peeling_for_alignment (loop_vinfo, ni_name, th, check_profitability); check_profitability = false; + /* The above adjusts LOOP_VINFO_NITERS, so cause ni_name to + be re-computed. */ + ni_name = NULL_TREE; } /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a *************** vect_transform_loop (loop_vec_info loop_ *** 5763,5778 **** will remain scalar and will compute the remaining (n%VF) iterations. (VF is the vectorization factor). */ ! if ((int) tree_ctz (LOOP_VINFO_NITERS (loop_vinfo)) ! < exact_log2 (vectorization_factor) || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) { ! tree ni_name, ratio_mult_vf; ! gimple_seq stmts = NULL; ! ni_name = vect_build_loop_niters (loop_vinfo, &stmts); vect_generate_tmps_on_preheader (loop_vinfo, ni_name, &ratio_mult_vf, ! &ratio, &stmts); ! gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); vect_do_peeling_for_loop_bound (loop_vinfo, ni_name, ratio_mult_vf, th, check_profitability); } --- 5771,5784 ---- will remain scalar and will compute the remaining (n%VF) iterations. (VF is the vectorization factor). */ ! if (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) { ! tree ratio_mult_vf; ! if (!ni_name) ! ni_name = vect_build_loop_niters (loop_vinfo); vect_generate_tmps_on_preheader (loop_vinfo, ni_name, &ratio_mult_vf, ! &ratio); vect_do_peeling_for_loop_bound (loop_vinfo, ni_name, ratio_mult_vf, th, check_profitability); } *************** vect_transform_loop (loop_vec_info loop_ *** 5781,5792 **** LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor); else { ! tree ni_name; ! gimple_seq stmts = NULL; ! ni_name = vect_build_loop_niters (loop_vinfo, &stmts); ! vect_generate_tmps_on_preheader (loop_vinfo, ni_name, NULL, ! &ratio, &stmts); ! gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); } /* 1) Make sure the loop header has exactly two entries --- 5787,5795 ---- LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor); else { ! if (!ni_name) ! ni_name = vect_build_loop_niters (loop_vinfo); ! vect_generate_tmps_on_preheader (loop_vinfo, ni_name, NULL, &ratio); } /* 1) Make sure the loop header has exactly two entries
