https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79824

--- Comment #2 from Richard Biener <rguenth at gcc dot gnu.org> ---
Ok, so in the SLP case this is actually safe because we only ever load vectors
that end up having at least a single element used (we do not load from unused
gaps).  In the non-SLP case I botched up the patch, plain interleaving with
a single-element is rejected with gaps bigger than vector size (whoo!) and
with multi-element I failed to properly use 'first_stmt' for the alignment
test and thus we always get peeling for gaps.

Index: gcc/tree-vect-stmts.c
===================================================================
--- gcc/tree-vect-stmts.c       (revision 245908)
+++ gcc/tree-vect-stmts.c       (working copy)
@@ -1796,7 +1796,8 @@ get_group_load_store_type (gimple *stmt,
       bool would_overrun_p = (gap != 0);
       /* If the access is aligned an overrun is fine.  */
       if (would_overrun_p
-         && aligned_access_p (STMT_VINFO_DATA_REF (stmt_info)))
+         && aligned_access_p
+               (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt))))
        would_overrun_p = false;
       if (!STMT_VINFO_STRIDED_P (stmt_info)
          && (can_overrun_p || !would_overrun_p)

makes

void __attribute__((noinline))
foo (TYPE *__restrict a, TYPE *__restrict b)
{
  int n;

  b = __builtin_assume_aligned (b, sizeof (TYPE) * 2);
  a = __builtin_assume_aligned (a, sizeof (TYPE) * 2);
  for (n = 0; n < COUNT; n++)
    {
      a[n] = b[n * 4] + b[n * 4 + 1];
    }
}

fail on x86_64.  So I am testing

Index: gcc/tree-vect-stmts.c
===================================================================
--- gcc/tree-vect-stmts.c       (revision 245908)
+++ gcc/tree-vect-stmts.c       (working copy)
@@ -1731,7 +1731,7 @@ get_group_load_store_type (gimple *stmt,
   bool single_element_p = (stmt == first_stmt
                           && !GROUP_NEXT_ELEMENT (stmt_info));
   unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
-  int nunits = TYPE_VECTOR_SUBPARTS (vectype);
+  unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype);

   /* True if the vectorized statements would access beyond the last
      statement in the group.  */
@@ -1794,9 +1794,13 @@ get_group_load_store_type (gimple *stmt,
       /* If there is a gap at the end of the group then these optimizations
         would access excess elements in the last iteration.  */
       bool would_overrun_p = (gap != 0);
-      /* If the access is aligned an overrun is fine.  */
+      /* If the access is aligned an overrun is fine, but only if the
+         overrun is not inside an unused vector (if the gap is as large
+        or larger than a vector).  */
       if (would_overrun_p
-         && aligned_access_p (STMT_VINFO_DATA_REF (stmt_info)))
+         && gap < nunits
+         && aligned_access_p
+               (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt))))
        would_overrun_p = false;
       if (!STMT_VINFO_STRIDED_P (stmt_info)
          && (can_overrun_p || !would_overrun_p)

Reply via email to