The testcase in PR56878 and my previous changes to the vectorizer
expose that there is amongst the DRs with unknown misalignment
with the same number of related accesses a better candidate if
the DRs base address is invariant in some loops.  This is because
then you can hoist the misalign computation for the runtime check
out of some loops.  My vectorizer changes now simply order
DRs differently and the peeling code picks the first.  The
following patch makes us prefer more invariant DRs over less
invariant ones which restores the previous peeling choice
for the testcase.

Bootstrapped on x86_64-unknown-linux-gnu.

Richard.

2013-04-11  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/56878
        * tree-flow.h (outermost_invariant_loop_for_expr): Declare.
        * tree-ssa-loop-ivopts.c (outermost_invariant_loop_for_expr):
        New function.
        * tree-vect-data-refs.c (vect_enhance_data_refs_alignment):
        Prefer to align the DR with the most invariant base address.

Index: gcc/tree-flow.h
===================================================================
*** gcc/tree-flow.h     (revision 197743)
--- gcc/tree-flow.h     (working copy)
*************** extern void tree_check_data_deps (void);
*** 749,754 ****
--- 749,755 ----
  /* In tree-ssa-loop-ivopts.c  */
  bool expr_invariant_in_loop_p (struct loop *, tree);
  bool stmt_invariant_in_loop_p (struct loop *, gimple);
+ struct loop *outermost_invariant_loop_for_expr (struct loop *, tree);
  bool multiplier_allowed_in_address_p (HOST_WIDE_INT, enum machine_mode,
                                      addr_space_t);
  bool may_be_nonaddressable_p (tree expr);
Index: gcc/tree-ssa-loop-ivopts.c
===================================================================
*** gcc/tree-ssa-loop-ivopts.c  (revision 197743)
--- gcc/tree-ssa-loop-ivopts.c  (working copy)
*************** find_interesting_uses_cond (struct ivopt
*** 1367,1372 ****
--- 1367,1420 ----
    record_use (data, NULL, civ, stmt, USE_COMPARE);
  }
  
+ /* Returns the outermost loop EXPR is obviously invariant in
+    relative to the loop LOOP, i.e. if all its operands are defined
+    outside of the returned loop.  Returns NULL if EXPR is not
+    even obviously invariant in LOOP.  */
+ 
+ struct loop *
+ outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
+ {
+   basic_block def_bb;
+   unsigned i, len;
+ 
+   if (is_gimple_min_invariant (expr))
+     return current_loops->tree_root;
+ 
+   if (TREE_CODE (expr) == SSA_NAME)
+     {
+       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
+       if (def_bb)
+       {
+         if (flow_bb_inside_loop_p (loop, def_bb))
+           return NULL;
+         return superloop_at_depth (loop,
+                                    loop_depth (def_bb->loop_father) + 1);
+       }
+ 
+       return current_loops->tree_root;
+     }
+ 
+   if (!EXPR_P (expr))
+     return NULL;
+ 
+   unsigned maxdepth = 0;
+   len = TREE_OPERAND_LENGTH (expr);
+   for (i = 0; i < len; i++)
+     {
+       struct loop *ivloop;
+       if (!TREE_OPERAND (expr, i))
+       continue;
+ 
+       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, 
i));
+       if (!ivloop)
+       return NULL;
+       maxdepth = MAX (maxdepth, loop_depth (ivloop));
+     }
+ 
+   return superloop_at_depth (loop, maxdepth);
+ }
+ 
  /* Returns true if expression EXPR is obviously invariant in LOOP,
     i.e. if all its operands are defined outside of the LOOP.  LOOP
     should not be the function body.  */
Index: gcc/tree-vect-data-refs.c
===================================================================
*** gcc/tree-vect-data-refs.c   (revision 197743)
--- gcc/tree-vect-data-refs.c   (working copy)
*************** vect_enhance_data_refs_alignment (loop_v
*** 1456,1475 ****
              }
            else
              {
!               /* If we don't know all the misalignment values, we prefer
!                  peeling for data-ref that has maximum number of data-refs
                   with the same alignment, unless the target prefers to align
                   stores over load.  */
                if (all_misalignments_unknown)
                  {
!                   if (same_align_drs_max 
!                       < STMT_VINFO_SAME_ALIGN_REFS (stmt_info).length ()
!                       || !dr0)
                      {
!                       same_align_drs_max
!                         = STMT_VINFO_SAME_ALIGN_REFS (stmt_info).length ();
                        dr0 = dr;
                      }
  
                    if (!first_store && DR_IS_WRITE (dr))
                      first_store = dr;
--- 1456,1490 ----
              }
            else
              {
!               /* If we don't know any misalignment values, we prefer
!                  peeling for data-ref that has the maximum number of data-refs
                   with the same alignment, unless the target prefers to align
                   stores over load.  */
                if (all_misalignments_unknown)
                  {
!                 unsigned same_align_drs
!                   = STMT_VINFO_SAME_ALIGN_REFS (stmt_info).length ();
!                   if (!dr0
!                     || same_align_drs_max < same_align_drs)
                      {
!                       same_align_drs_max = same_align_drs;
                        dr0 = dr;
                      }
+                 /* For data-refs with the same number of related
+                    accesses prefer the one where the misalign
+                    computation will be invariant in the outermost loop.  */
+                 else if (same_align_drs_max == same_align_drs)
+                   {
+                     struct loop *ivloop0, *ivloop;
+                     ivloop0 = outermost_invariant_loop_for_expr
+                         (loop, DR_BASE_ADDRESS (dr0));
+                     ivloop = outermost_invariant_loop_for_expr
+                         (loop, DR_BASE_ADDRESS (dr));
+                     if ((ivloop && !ivloop0)
+                         || (ivloop && ivloop0
+                             && flow_loop_nested_p (ivloop, ivloop0)))
+                       dr0 = dr;
+                   }
  
                    if (!first_store && DR_IS_WRITE (dr))
                      first_store = dr;
*************** vect_enhance_data_refs_alignment (loop_v
*** 1478,1485 ****
                /* If there are both known and unknown misaligned accesses in 
the
                   loop, we choose peeling amount according to the known
                   accesses.  */
- 
- 
                if (!supportable_dr_alignment)
                  {
                    dr0 = dr;
--- 1493,1498 ----

Reply via email to