Hi Richi,

We decided to take the regression in any code-gen this could
give and fix it properly next stage-1.  As such here's a new
patch based on your previous feedback.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

        * tree-vect-slp.c (vect_analyze_slp_instance): Moved load/store lanes
        check to ...
        * tree-vect-loop.c (vect_analyze_loop_2): ..Here

gcc/testsuite/ChangeLog:

        * gcc.dg/vect/slp-11b.c: Update output scan.
        * gcc.dg/vect/slp-perm-6.c: Likewise.

> -----Original Message-----
> From: rguent...@c653.arch.suse.de <rguent...@c653.arch.suse.de> On
> Behalf Of Richard Biener
> Sent: Thursday, October 22, 2020 9:44 AM
> To: Tamar Christina <tamar.christ...@arm.com>
> Cc: gcc-patches@gcc.gnu.org; nd <n...@arm.com>; o...@ucw.cz
> Subject: Re: [PATCH] SLP: Move load/store-lanes check till late
> 
> On Wed, 21 Oct 2020, Tamar Christina wrote:
> 
> > Hi All,
> >
> > This moves the code that checks for load/store lanes further in the
> > pipeline and places it after slp_optimize.  This would allow us to
> > perform optimizations on the SLP tree and only bail out if we really have a
> permute.
> >
> > With this change it allows us to handle permutes such as {1,1,1,1}
> > which should be handled by a load and replicate.
> >
> > This change however makes it all or nothing. Either all instances can
> > be handled or none at all.  This is why some of the test cases have been
> adjusted.
> 
> So this possibly leaves a loop unvectorized in case there's a ldN/stN
> opportunity but another SLP instance with a permutation not handled by
> interleaving is present.  What I was originally suggesting is to only cancel 
> the
> SLP build if _all_ instances can be handled with ldN/stN.
> 
> Of course I'm also happy with completely removing this heuristics.
> 
> Note some of the comments look off now, also the assignment to ok before
> the goto is pointless and you should probably turn this into a dump print
> instead.
> 
> Thanks,
> Richard.
> 
> > Bootstrapped Regtested on aarch64-none-linux-gnu, -x86_64-pc-linux-gnu
> > and no issues.
> >
> > Ok for master?
> 
> 
> 
> > Thanks,
> > Tamar
> >
> > gcc/ChangeLog:
> >
> >     * tree-vect-slp.c (vect_analyze_slp_instance): Moved load/store
> lanes
> >     check to ...
> >     * tree-vect-loop.c (vect_analyze_loop_2): ..Here
> >
> > gcc/testsuite/ChangeLog:
> >
> >     * gcc.dg/vect/slp-11b.c: Update output scan.
> >     * gcc.dg/vect/slp-perm-6.c: Likewise.
> >
> >
> 
> --
> Richard Biener <rguent...@suse.de>
> SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409
> Nuernberg, Germany; GF: Felix Imend
diff --git a/gcc/testsuite/gcc.dg/vect/slp-11b.c b/gcc/testsuite/gcc.dg/vect/slp-11b.c
index 0cc23770badf0e00ef98769a2dd14a92dca32cca..fe5bb0c3ce7682c7cef1313e342d95aba3fe11b2 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-11b.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-11b.c
@@ -45,4 +45,4 @@ int main (void)
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_strided4 && vect_int_mult } } } } */
 /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! { vect_strided4 && vect_int_mult } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */
+/* { dg-final { scan-tree-dump-times "re-trying with SLP disabled" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-6.c b/gcc/testsuite/gcc.dg/vect/slp-perm-6.c
index 38489291a2659c989121d44c9e9e7bdfaa12f868..07bf8916de7ce88bbb1d65437f8bf6d8ab17efe6 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-perm-6.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-perm-6.c
@@ -106,7 +106,7 @@ int main (int argc, const char* argv[])
 /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_perm3_int && { {! vect_load_lanes } && {! vect_partial_vectors_usage_1 } } } } } } */
 /* The epilogues are vectorized using partial vectors.  */
 /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { target { vect_perm3_int && { {! vect_load_lanes } && vect_partial_vectors_usage_1 } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_load_lanes } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */
 /* { dg-final { scan-tree-dump "Built SLP cancelled: can use load/store-lanes" "vect" { target { vect_perm3_int && vect_load_lanes } } } } */
 /* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */
 /* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 6fa185daa2836062814f9c9a6659011a3153c6a2..56873b93ef9905ff76929f471de4d32559268304 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -2365,6 +2365,78 @@ start_over:
 				       "unsupported SLP instances\n");
 	  goto again;
 	}
+
+      /* Check whether any load in ALL SLP instances is possibly permuted.  */
+      slp_tree load_node, slp_root;
+      unsigned i, x;
+      slp_instance instance;
+      bool can_use_lanes = true;
+      FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), x, instance)
+	{
+	  slp_root = SLP_INSTANCE_TREE (instance);
+	  int group_size = SLP_TREE_LANES (slp_root);
+	  tree vectype = SLP_TREE_VECTYPE (slp_root);
+	  bool loads_permuted = false;
+	  FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
+	    {
+	      if (!SLP_TREE_LOAD_PERMUTATION (load_node).exists ())
+		continue;
+	      unsigned j;
+	      stmt_vec_info load_info;
+	      FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
+		if (SLP_TREE_LOAD_PERMUTATION (load_node)[j] != j)
+		  {
+		    loads_permuted = true;
+		    break;
+		  }
+	    }
+
+	  /* If the loads and stores can be handled with load/store-lane
+	     instructions record it and move on to the next instance.  */
+	  if (loads_permuted
+	      && vect_store_lanes_supported (vectype, group_size, false))
+	    {
+	      FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
+		{
+		  stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
+		      (SLP_TREE_SCALAR_STMTS (load_node)[0]);
+		  /* Use SLP for strided accesses (or if we can't
+		     load-lanes).  */
+		  if (STMT_VINFO_STRIDED_P (stmt_vinfo)
+		      || ! vect_load_lanes_supported
+			    (STMT_VINFO_VECTYPE (stmt_vinfo),
+			     DR_GROUP_SIZE (stmt_vinfo), false))
+		    break;
+		}
+
+	      can_use_lanes
+		= can_use_lanes && i == SLP_INSTANCE_LOADS (instance).length ();
+
+	      if (can_use_lanes && dump_enabled_p ())
+		dump_printf_loc (MSG_NOTE, vect_location,
+				 "SLP instance %p can use load/store-lanes\n",
+				 instance);
+	    }
+	  else
+	    {
+	      can_use_lanes = false;
+	      break;
+	    }
+	}
+
+      /* If all SLP instances can use load/store-lanes abort SLP and try again
+	 with SLP disabled.  */
+      if (can_use_lanes)
+	{
+	  ok = opt_result::failure_at (vect_location,
+				       "Built SLP cancelled: can use "
+				       "load/store-lanes\n");
+	  if (dump_enabled_p ())
+	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+			     "Built SLP cancelled: all SLP instances support "
+			     "load/store-lanes\n");
+	  goto again;
+	}
     }
 
   /* Dissolve SLP-only groups.  */
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index e97fbe897a76008d50ee94c3b1b009344cc37d4a..76382ac16387ea50b27d0bcce57e664898434498 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -208,7 +208,7 @@ typedef struct _slp_oprnd_info
 
 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
    operand.  */
-static vec<slp_oprnd_info> 
+static vec<slp_oprnd_info>
 vect_create_oprnd_info (int nops, int group_size)
 {
   int i;
@@ -1096,7 +1096,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
 	    {
 	      if (dump_enabled_p ())
 		{
-		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
+		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 				   "Build SLP failed: different operation "
 				   "in stmt %G", stmt);
 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -2263,56 +2263,11 @@ vect_build_slp_instance (vec_info *vinfo,
 			     "SLP size %u vs. limit %u.\n",
 			     tree_size, max_tree_size);
 
-	  /* Check whether any load is possibly permuted.  */
-	  slp_tree load_node;
-	  bool loads_permuted = false;
-	  FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance), i, load_node)
-	    {
-	      if (!SLP_TREE_LOAD_PERMUTATION (load_node).exists ())
-		continue;
-	      unsigned j;
-	      stmt_vec_info load_info;
-	      FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
-		if (SLP_TREE_LOAD_PERMUTATION (load_node)[j] != j)
-		  {
-		    loads_permuted = true;
-		    break;
-		  }
-	    }
-
-	  /* If the loads and stores can be handled with load/store-lane
-	     instructions do not generate this SLP instance.  */
-	  if (is_a <loop_vec_info> (vinfo)
-	      && loads_permuted
-	      && kind == slp_inst_kind_store
-	      && vect_store_lanes_supported
-		   (STMT_VINFO_VECTYPE (scalar_stmts[0]), group_size, false))
-	    {
-	      slp_tree load_node;
-	      FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance), i, load_node)
-		{
-		  stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
-		      (SLP_TREE_SCALAR_STMTS (load_node)[0]);
-		  /* Use SLP for strided accesses (or if we can't load-lanes).  */
-		  if (STMT_VINFO_STRIDED_P (stmt_vinfo)
-		      || ! vect_load_lanes_supported
-		      (STMT_VINFO_VECTYPE (stmt_vinfo),
-		       DR_GROUP_SIZE (stmt_vinfo), false))
-		    break;
-		}
-	      if (i == SLP_INSTANCE_LOADS (new_instance).length ())
-		{
-		  if (dump_enabled_p ())
-		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-				     "Built SLP cancelled: can use "
-				     "load/store-lanes\n");
-		  vect_free_slp_instance (new_instance);
-		  return false;
-		}
-	    }
-
-	  /* Fixup SLP reduction chains.  */
-	  if (kind == slp_inst_kind_reduc_chain)
+	  /* If this is a reduction chain with a conversion in front
+	     amend the SLP tree with a node for that.  */
+	  if (!dr
+	      && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
+	      && STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
 	    {
 	      /* If this is a reduction chain with a conversion in front
 		 amend the SLP tree with a node for that.  */
@@ -3877,7 +3832,7 @@ vect_bb_partition_graph (bb_vec_info bb_vinfo)
    and return it.  Do not account defs that are marked in LIFE and
    update LIFE according to uses of NODE.  */
 
-static void 
+static void
 vect_bb_slp_scalar_cost (vec_info *vinfo,
 			 slp_tree node, vec<bool, va_heap> *life,
 			 stmt_vector_for_cost *cost_vec,
@@ -3888,7 +3843,7 @@ vect_bb_slp_scalar_cost (vec_info *vinfo,
   slp_tree child;
 
   if (visited.add (node))
-    return; 
+    return;
 
   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
     {
@@ -4143,7 +4098,7 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal,
 	{
 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 			   "Failed to SLP the basic block.\n");
-	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
+	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 			   "not vectorized: failed to find SLP opportunities "
 			   "in basic block.\n");
 	}
@@ -5008,7 +4963,7 @@ vect_transform_slp_perm_load (vec_info *vinfo,
 	  if (!analyze_only)
 	    {
 	      tree mask_vec = NULL_TREE;
-		  
+
 	      if (! noop_p)
 		mask_vec = vect_gen_perm_mask_checked (vectype, indices);
 

Reply via email to