The following adds handling of gaps by representing them with NULL
entries in SLP_TREE_SCALAR_STMTS for the unpermuted load node.

The SLP discovery changes could be elided if we manually build the
load node instead.

        * tree-vect-slp.cc (vect_build_slp_tree_1): Handle NULL stmt.
        (vect_build_slp_tree_2): Likewise.  Release load permutation
        when there's a NULL in SLP_TREE_SCALAR_STMTS and assert there's
        no actual permutation in that case.
        (vect_lower_load_permutations): Handle gaps in loads.

        * gcc.dg/vect/slp-51.c: New testcase.
---
 gcc/testsuite/gcc.dg/vect/slp-51.c | 17 +++++++++++
 gcc/tree-vect-slp.cc               | 49 ++++++++++++++++++------------
 2 files changed, 47 insertions(+), 19 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/slp-51.c

diff --git a/gcc/testsuite/gcc.dg/vect/slp-51.c 
b/gcc/testsuite/gcc.dg/vect/slp-51.c
new file mode 100644
index 00000000000..91ae763be30
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/slp-51.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+
+void foo (int * __restrict x, int *y)
+{
+  x = __builtin_assume_aligned (x, __BIGGEST_ALIGNMENT__);
+  y = __builtin_assume_aligned (y, __BIGGEST_ALIGNMENT__);
+  for (int i = 0; i < 1024; ++i)
+    {
+      x[4*i+0] = y[4*i+0];
+      x[4*i+1] = y[4*i+2] * 2;
+      x[4*i+2] = y[4*i+0] + 3;
+      x[4*i+3] = y[4*i+2] * 2 - 5;
+    }
+}
+
+/* Check we can handle SLP with gaps and an interleaving scheme.  */
+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" { target { 
vect_int && vect_int_mult } } } } */
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 6f3822af950..fdefee90e92 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -1080,10 +1080,15 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char 
*swap,
   stmt_vec_info stmt_info;
   FOR_EACH_VEC_ELT (stmts, i, stmt_info)
     {
-      gimple *stmt = stmt_info->stmt;
       swap[i] = 0;
       matches[i] = false;
+      if (!stmt_info)
+       {
+         matches[i] = true;
+         continue;
+       }
 
+      gimple *stmt = stmt_info->stmt;
       if (dump_enabled_p ())
        dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for %G", stmt);
 
@@ -1984,10 +1989,16 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
          stmt_vec_info first_stmt_info
            = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node)[0]);
          bool any_permute = false;
+         bool any_null = false;
          FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
            {
              int load_place;
-             if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
+             if (! load_info)
+               {
+                 load_place = j;
+                 any_null = true;
+               }
+             else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
                load_place = vect_get_place_in_interleaving_chain
                    (load_info, first_stmt_info);
              else
@@ -1996,6 +2007,11 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
              any_permute |= load_place != j;
              load_permutation.quick_push (load_place);
            }
+         if (any_null)
+           {
+             gcc_assert (!any_permute);
+             load_permutation.release ();
+           }
 
          if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
            {
@@ -3978,24 +3994,11 @@ vect_lower_load_permutations (loop_vec_info loop_vinfo,
   stmt_vec_info first
     = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (loads[0])[0]);
 
-  /* ???  In principle we have to consider a gap up to the next full
-     vector, but we have to actually represent a scalar stmt for the
-     gaps value so delay handling this.  The same is true for
-     inbetween gaps which the load places in the load-permutation
-     represent.  It's probably not worth trying an intermediate packing
-     to vectors without gap even if that might handle some more cases.
-     Instead get the gap case correct in some way.  */
-  unsigned group_lanes = 0;
-  for (stmt_vec_info s = first; s; s = DR_GROUP_NEXT_ELEMENT (s))
-    {
-      if ((s == first && DR_GROUP_GAP (s) != 0)
-         || (s != first && DR_GROUP_GAP (s) != 1))
-       return;
-      group_lanes++;
-    }
   /* Only a power-of-two number of lanes matches interleaving with N levels.
+     The non-SLP path also supports DR_GROUP_SIZE == 3.
      ???  An even number of lanes could be reduced to 1<<ceil_log2(N)-1 lanes
      at each step.  */
+  unsigned group_lanes = DR_GROUP_SIZE (first);
   if (exact_log2 (group_lanes) == -1)
     return;
 
@@ -4017,11 +4020,19 @@ vect_lower_load_permutations (loop_vec_info loop_vinfo,
        continue;
 
       /* First build (and possibly re-use) a load node for the
-        unpermuted group.  */
+        unpermuted group.  Gaps in the middle and on the end are
+        represented with NULL stmts.  */
       vec<stmt_vec_info> stmts;
       stmts.create (group_lanes);
       for (stmt_vec_info s = first; s; s = DR_GROUP_NEXT_ELEMENT (s))
-       stmts.quick_push (s);
+       {
+         if (s != first)
+           for (unsigned i = 1; i < DR_GROUP_GAP (s); ++i)
+             stmts.quick_push (NULL);
+         stmts.quick_push (s);
+       }
+      for (unsigned i = 0; i < DR_GROUP_GAP (first); ++i)
+       stmts.quick_push (NULL);
       poly_uint64 max_nunits;
       bool *matches = XALLOCAVEC (bool, group_lanes);
       unsigned limit = 1;
-- 
2.35.3

Reply via email to