The testcase in PR56878 and my previous changes to the vectorizer expose that there is amongst the DRs with unknown misalignment with the same number of related accesses a better candidate if the DRs base address is invariant in some loops. This is because then you can hoist the misalign computation for the runtime check out of some loops. My vectorizer changes now simply order DRs differently and the peeling code picks the first. The following patch makes us prefer more invariant DRs over less invariant ones which restores the previous peeling choice for the testcase.
Bootstrapped on x86_64-unknown-linux-gnu. Richard. 2013-04-11 Richard Biener <rguent...@suse.de> PR tree-optimization/56878 * tree-flow.h (outermost_invariant_loop_for_expr): Declare. * tree-ssa-loop-ivopts.c (outermost_invariant_loop_for_expr): New function. * tree-vect-data-refs.c (vect_enhance_data_refs_alignment): Prefer to align the DR with the most invariant base address. Index: gcc/tree-flow.h =================================================================== *** gcc/tree-flow.h (revision 197743) --- gcc/tree-flow.h (working copy) *************** extern void tree_check_data_deps (void); *** 749,754 **** --- 749,755 ---- /* In tree-ssa-loop-ivopts.c */ bool expr_invariant_in_loop_p (struct loop *, tree); bool stmt_invariant_in_loop_p (struct loop *, gimple); + struct loop *outermost_invariant_loop_for_expr (struct loop *, tree); bool multiplier_allowed_in_address_p (HOST_WIDE_INT, enum machine_mode, addr_space_t); bool may_be_nonaddressable_p (tree expr); Index: gcc/tree-ssa-loop-ivopts.c =================================================================== *** gcc/tree-ssa-loop-ivopts.c (revision 197743) --- gcc/tree-ssa-loop-ivopts.c (working copy) *************** find_interesting_uses_cond (struct ivopt *** 1367,1372 **** --- 1367,1420 ---- record_use (data, NULL, civ, stmt, USE_COMPARE); } + /* Returns the outermost loop EXPR is obviously invariant in + relative to the loop LOOP, i.e. if all its operands are defined + outside of the returned loop. Returns NULL if EXPR is not + even obviously invariant in LOOP. */ + + struct loop * + outermost_invariant_loop_for_expr (struct loop *loop, tree expr) + { + basic_block def_bb; + unsigned i, len; + + if (is_gimple_min_invariant (expr)) + return current_loops->tree_root; + + if (TREE_CODE (expr) == SSA_NAME) + { + def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr)); + if (def_bb) + { + if (flow_bb_inside_loop_p (loop, def_bb)) + return NULL; + return superloop_at_depth (loop, + loop_depth (def_bb->loop_father) + 1); + } + + return current_loops->tree_root; + } + + if (!EXPR_P (expr)) + return NULL; + + unsigned maxdepth = 0; + len = TREE_OPERAND_LENGTH (expr); + for (i = 0; i < len; i++) + { + struct loop *ivloop; + if (!TREE_OPERAND (expr, i)) + continue; + + ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i)); + if (!ivloop) + return NULL; + maxdepth = MAX (maxdepth, loop_depth (ivloop)); + } + + return superloop_at_depth (loop, maxdepth); + } + /* Returns true if expression EXPR is obviously invariant in LOOP, i.e. if all its operands are defined outside of the LOOP. LOOP should not be the function body. */ Index: gcc/tree-vect-data-refs.c =================================================================== *** gcc/tree-vect-data-refs.c (revision 197743) --- gcc/tree-vect-data-refs.c (working copy) *************** vect_enhance_data_refs_alignment (loop_v *** 1456,1475 **** } else { ! /* If we don't know all the misalignment values, we prefer ! peeling for data-ref that has maximum number of data-refs with the same alignment, unless the target prefers to align stores over load. */ if (all_misalignments_unknown) { ! if (same_align_drs_max ! < STMT_VINFO_SAME_ALIGN_REFS (stmt_info).length () ! || !dr0) { ! same_align_drs_max ! = STMT_VINFO_SAME_ALIGN_REFS (stmt_info).length (); dr0 = dr; } if (!first_store && DR_IS_WRITE (dr)) first_store = dr; --- 1456,1490 ---- } else { ! /* If we don't know any misalignment values, we prefer ! peeling for data-ref that has the maximum number of data-refs with the same alignment, unless the target prefers to align stores over load. */ if (all_misalignments_unknown) { ! unsigned same_align_drs ! = STMT_VINFO_SAME_ALIGN_REFS (stmt_info).length (); ! if (!dr0 ! || same_align_drs_max < same_align_drs) { ! same_align_drs_max = same_align_drs; dr0 = dr; } + /* For data-refs with the same number of related + accesses prefer the one where the misalign + computation will be invariant in the outermost loop. */ + else if (same_align_drs_max == same_align_drs) + { + struct loop *ivloop0, *ivloop; + ivloop0 = outermost_invariant_loop_for_expr + (loop, DR_BASE_ADDRESS (dr0)); + ivloop = outermost_invariant_loop_for_expr + (loop, DR_BASE_ADDRESS (dr)); + if ((ivloop && !ivloop0) + || (ivloop && ivloop0 + && flow_loop_nested_p (ivloop, ivloop0))) + dr0 = dr; + } if (!first_store && DR_IS_WRITE (dr)) first_store = dr; *************** vect_enhance_data_refs_alignment (loop_v *** 1478,1485 **** /* If there are both known and unknown misaligned accesses in the loop, we choose peeling amount according to the known accesses. */ - - if (!supportable_dr_alignment) { dr0 = dr; --- 1493,1498 ----