On Mon, 7 Nov 2016, Richard Biener wrote:

> 
> Currently we force peeling for gaps whenever element overrun can occur
> but for aligned accesses we know that the loads won't trap and thus
> we can avoid this.
> 
> Bootstrap and regtest running on x86_64-unknown-linux-gnu (I expect
> some testsuite fallout here so didn't bother to invent a new testcase).
> 
> Just in case somebody thinks the overrun is a bad idea in general
> (even when not trapping).  Like for ASAN or valgrind.

This is what I applied.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

Richard.

2016-11-08  Richard Biener  <rguent...@suse.de>

        * tree-vect-stmts.c (get_group_load_store_type): If the
        access is aligned do not trigger peeling for gaps.
        * tree-vect-data-refs.c (vect_compute_data_ref_alignment): Do not
        force alignment of vars with DECL_USER_ALIGN.

        * gcc.dg/vect/vect-nb-iter-ub-2.c: Adjust.

diff --git a/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-2.c 
b/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-2.c
index bc07b4b..4e13702 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-2.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-nb-iter-ub-2.c
@@ -3,7 +3,7 @@
 #include "tree-vect.h"
 
 int ii[32];
-char cc[66] =
+char cc[66] __attribute__((aligned(1))) =
   { 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0, 8, 0, 9, 0,
     10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15, 0, 16, 0, 17, 0, 18, 0, 19, 0,
     20, 0, 21, 0, 22, 0, 23, 0, 24, 0, 25, 0, 26, 0, 27, 0, 28, 0, 29, 0,
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index b03cb1e..f014d68 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -831,6 +831,19 @@ vect_compute_data_ref_alignment (struct data_reference *dr)
          return true;
        }
 
+      if (DECL_USER_ALIGN (base))
+       {
+         if (dump_enabled_p ())
+           {
+             dump_printf_loc (MSG_NOTE, vect_location,
+                              "not forcing alignment of user-aligned "
+                              "variable: ");
+             dump_generic_expr (MSG_NOTE, TDF_SLIM, base);
+             dump_printf (MSG_NOTE, "\n");
+           }
+         return true;
+       }
+
       /* Force the alignment of the decl.
         NOTE: This is the only change to the code we make during
         the analysis phase, before deciding to vectorize the loop.  */
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 15aec21..c29e73d 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -1770,6 +1770,11 @@ get_group_load_store_type (gimple *stmt, tree vectype, 
bool slp,
                               " non-consecutive accesses\n");
              return false;
            }
+         /* If the access is aligned an overrun is fine.  */
+         if (overrun_p
+             && aligned_access_p
+                  (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt))))
+           overrun_p = false;
          if (overrun_p && !can_overrun_p)
            {
              if (dump_enabled_p ())
@@ -1789,6 +1794,10 @@ get_group_load_store_type (gimple *stmt, tree vectype, 
bool slp,
       /* If there is a gap at the end of the group then these optimizations
         would access excess elements in the last iteration.  */
       bool would_overrun_p = (gap != 0);
+      /* If the access is aligned an overrun is fine.  */
+      if (would_overrun_p
+         && aligned_access_p (STMT_VINFO_DATA_REF (stmt_info)))
+       would_overrun_p = false;
       if (!STMT_VINFO_STRIDED_P (stmt_info)
          && (can_overrun_p || !would_overrun_p)
          && compare_step_with_zero (stmt) > 0)

Reply via email to