On Tue, May 14, 2013 at 12:16:07PM +0200, Richard Biener wrote: > Works for me.
... Ok, here is what I've committed to gomp-4_0-branch. tree-vect-data-refs.c was kept (almost) unchanged, as per IRC discussion, something ++todo for the future. 2013-05-14 Jakub Jelinek <ja...@redhat.com> * cfgloop.h (struct loop): Add safelen and force_vect fields. * function.h (struct function): Add has_force_vect_loops field. * omp-low.c (expand_omp_simd): If !broken_loop, create loop for the simd region and set safelen and force_vect fields in it. * tree-vectorizer.c (vectorize_loops): If loop has force_vect set, vectorize it even if flag_vectorize isn't set. Clear loop->force_vect after vectorization. * tree-ssa-loop.c (gate_tree_vectorize): Return true even cfun->has_force_vect_loops. * tree-ssa-loop-ivcanon.c (tree_unroll_loops_completely_1): Don't unroll loops with loop->force_vect. * tree-vect-data-refs.c (vect_analyze_data_ref_dependence): For unknown or bad data dependency, if loop->safelen is non-zero, just decrease *max_vf to loop->safelen if needed and return false. * tree-if-conv.c (main_tree_if_conversion): If-convert also loops with loop->force_vect. (gate_tree_if_conversion): Return true even if cfun->has_force_vect_loops. --- gcc/cfgloop.h.jj 2013-05-13 16:49:44.000000000 +0200 +++ gcc/cfgloop.h 2013-05-14 13:59:47.179036079 +0200 @@ -168,6 +168,15 @@ struct GTY ((chain_next ("%h.next"))) lo describes what is the state of the estimation. */ enum loop_estimation estimate_state; + /* If > 0, an integer, where the user asserted that for any + I in [ 0, nb_iterations ) and for any J in + [ I, min ( I + safelen, nb_iterations ) ), the Ith and Jth iterations + of the loop can be safely evaluated concurrently. */ + int safelen; + + /* True if we should try harder to vectorize this loop. */ + bool force_vect; + /* Upper bound on number of iterations of a loop. */ struct nb_iter_bound *bounds; --- gcc/function.h.jj 2013-05-13 16:49:03.000000000 +0200 +++ gcc/function.h 2013-05-14 14:06:31.102720074 +0200 @@ -641,6 +641,10 @@ struct GTY(()) function { adjusts one of its arguments and forwards to another function. */ unsigned int is_thunk : 1; + + /* Nonzero if the current function contains any loops with + loop->force_vect set. */ + unsigned int has_force_vect_loops : 1; }; /* Add the decl D to the local_decls list of FUN. */ --- gcc/omp-low.c.jj 2013-05-13 16:37:05.000000000 +0200 +++ gcc/omp-low.c 2013-05-14 14:54:43.154188242 +0200 @@ -4960,6 +4960,8 @@ expand_omp_simd (struct omp_region *regi edge e, ne; tree *counts = NULL; int i; + tree safelen = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt), + OMP_CLAUSE_SAFELEN); type = TREE_TYPE (fd->loop.v); entry_bb = region->entry; @@ -5157,6 +5159,34 @@ expand_omp_simd (struct omp_region *regi set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb); set_immediate_dominator (CDI_DOMINATORS, l2_bb, l1_bb); set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb); + + if (!broken_loop) + { + struct loop *loop = alloc_loop (); + loop->header = l1_bb; + loop->latch = e->dest; + add_loop (loop, l1_bb->loop_father); + if (safelen == NULL_TREE) + loop->safelen = INT_MAX; + else + { + safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen); + if (!host_integerp (safelen, 1) + || (unsigned HOST_WIDE_INT) tree_low_cst (safelen, 1) + > INT_MAX) + loop->safelen = INT_MAX; + else + loop->safelen = tree_low_cst (safelen, 1); + } + /* If not -fno-tree-vectorize, hint that we want to vectorize + the loop. */ + if (flag_tree_vectorize + || !global_options_set.x_flag_tree_vectorize) + { + loop->force_vect = true; + cfun->has_force_vect_loops = true; + } + } } --- gcc/tree-vectorizer.c.jj 2013-05-13 16:49:03.000000000 +0200 +++ gcc/tree-vectorizer.c 2013-05-14 14:13:43.434236251 +0200 @@ -101,7 +101,8 @@ vectorize_loops (void) than all previously defined loops. This fact allows us to run only over initial loops skipping newly generated ones. */ FOR_EACH_LOOP (li, loop, 0) - if (optimize_loop_nest_for_speed_p (loop)) + if ((flag_tree_vectorize && optimize_loop_nest_for_speed_p (loop)) + || loop->force_vect) { loop_vec_info loop_vinfo; vect_location = find_loop_location (loop); @@ -122,6 +123,9 @@ vectorize_loops (void) LOC_FILE (vect_location), LOC_LINE (vect_location)); vect_transform_loop (loop_vinfo); num_vectorized_loops++; + /* Now that the loop has been vectorized, allow it to be unrolled + etc. */ + loop->force_vect = false; } vect_location = UNKNOWN_LOC; --- gcc/tree-ssa-loop.c.jj 2013-05-13 16:46:36.000000000 +0200 +++ gcc/tree-ssa-loop.c 2013-05-14 14:14:27.320984029 +0200 @@ -225,7 +225,7 @@ tree_vectorize (void) static bool gate_tree_vectorize (void) { - return flag_tree_vectorize; + return flag_tree_vectorize || cfun->has_force_vect_loops; } struct gimple_opt_pass pass_vectorize = --- gcc/tree-ssa-loop-ivcanon.c.jj 2013-05-13 16:46:36.000000000 +0200 +++ gcc/tree-ssa-loop-ivcanon.c 2013-05-14 14:14:07.088100214 +0200 @@ -1123,6 +1123,11 @@ tree_unroll_loops_completely_1 (bool may if (changed) return true; + /* Don't unroll #pragma omp simd loops until the vectorizer + attempts to vectorize those. */ + if (loop->force_vect) + return false; + /* Try to unroll this loop. */ loop_father = loop_outer (loop); if (!loop_father) --- gcc/tree-vect-data-refs.c.jj 2013-05-13 16:49:08.000000000 +0200 +++ gcc/tree-vect-data-refs.c 2013-05-14 14:26:06.972967958 +0200 @@ -255,6 +255,15 @@ vect_analyze_data_ref_dependence (struct /* Unknown data dependence. */ if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) { + /* If user asserted safelen consecutive iterations can be + executed concurrently, assume independence. */ + if (loop->safelen >= 2) + { + if (loop->safelen < *max_vf) + *max_vf = loop->safelen; + return false; + } + if (STMT_VINFO_GATHER_P (stmtinfo_a) || STMT_VINFO_GATHER_P (stmtinfo_b)) { @@ -291,6 +300,15 @@ vect_analyze_data_ref_dependence (struct /* Known data dependence. */ if (DDR_NUM_DIST_VECTS (ddr) == 0) { + /* If user asserted safelen consecutive iterations can be + executed concurrently, assume independence. */ + if (loop->safelen >= 2) + { + if (loop->safelen < *max_vf) + *max_vf = loop->safelen; + return false; + } + if (STMT_VINFO_GATHER_P (stmtinfo_a) || STMT_VINFO_GATHER_P (stmtinfo_b)) { --- gcc/tree-if-conv.c.jj 2013-05-13 16:49:06.000000000 +0200 +++ gcc/tree-if-conv.c 2013-05-14 14:38:16.928803349 +0200 @@ -1822,6 +1822,10 @@ main_tree_if_conversion (void) return 0; FOR_EACH_LOOP (li, loop, 0) + if (flag_tree_loop_if_convert == 1 + || flag_tree_loop_if_convert_stores == 1 + || flag_tree_vectorize + || loop->force_vect) changed |= tree_if_conversion (loop); if (changed) @@ -1848,7 +1852,8 @@ main_tree_if_conversion (void) static bool gate_tree_if_conversion (void) { - return ((flag_tree_vectorize && flag_tree_loop_if_convert != 0) + return (((flag_tree_vectorize || cfun->has_force_vect_loops) + && flag_tree_loop_if_convert != 0) || flag_tree_loop_if_convert == 1 || flag_tree_loop_if_convert_stores == 1); } Jakub