https://gcc.gnu.org/g:c19aedfc0c3531e2c2b7c92e7b062be29ecbf0f8

commit r16-4726-gc19aedfc0c3531e2c2b7c92e7b062be29ecbf0f8
Author: Richard Biener <[email protected]>
Date:   Fri Oct 24 12:31:54 2025 +0200

    tree-optimization/120687 - legitimise some permutes before optimizing
    
    The following does a simple legitimising attempt on the SLP graph
    permutations before trying to optimize them.  For the case we have
    a single non-zero layout we can force that to all partitions if
    it is compatible.  This way we end up with the most canonical
    (and possibly no-op) load permutations and permutes.
    
    I have refrained from trying to use internal_node_cost to actually
    check if the result is legitimate (it would need at least the
    change to anticipate redundant load permute eliding).  This relies
    on start_choosing_layouts chosing layout zero for everything we
    cannot handle (like non-bijective permutes).
    
    What's still missing is to try to process disconnected parts of the
    SLP graph separately.  We should possibly try to handle those
    separately through all of the SLP optimize process for simplicity.
    
    This also includes a fix for a dumping ICE when we permute the
    root node of a reduction chain that was associated.
    
            PR tree-optimization/120687
            * tree-vect-slp.cc (vect_optimize_slp_pass::is_compatible_layout):
            New overload for checking a whole partition.
            (vect_optimize_slp_pass::legitimize): New function trying
            a single layout for all partitions for now.
            (vect_optimize_slp_pass::run): Try legitimizing to a single
            layout before propagating.
            (vect_slp_analyze_operations): For dumping deal with
            SLP_TREE_SCALAR_STMTS being empty or element zero being NULL.

Diff:
---
 gcc/tree-vect-slp.cc | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 90 insertions(+), 3 deletions(-)

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 66c45185892f..f64f874abfff 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -6302,6 +6302,7 @@ private:
 
   /* Layout selection.  */
   bool is_compatible_layout (slp_tree, unsigned int);
+  bool is_compatible_layout (const slpg_partition_info &, unsigned int);
   int change_layout_cost (slp_tree, unsigned int, unsigned int);
   slpg_partition_layout_costs &partition_layout_costs (unsigned int,
                                                       unsigned int);
@@ -6309,6 +6310,7 @@ private:
                               int, unsigned int);
   int internal_node_cost (slp_tree, int, unsigned int);
   void start_choosing_layouts ();
+  bool legitimize ();
 
   /* Cost propagation.  */
   slpg_layout_cost edge_layout_cost (graph_edge *, unsigned int,
@@ -6715,6 +6717,29 @@ vect_optimize_slp_pass::is_compatible_layout (slp_tree 
node,
   return true;
 }
 
+/* Return true if layout LAYOUT_I is compatible with the number of SLP lanes
+   that NODE would operate on for each NODE in PARTITION.
+   This test is independent of NODE's actual operations.  */
+
+bool
+vect_optimize_slp_pass::is_compatible_layout (const slpg_partition_info
+                                               &partition,
+                                             unsigned int layout_i)
+{
+  for (unsigned int order_i = partition.node_begin;
+       order_i < partition.node_end; ++order_i)
+    {
+      unsigned int node_i = m_partitioned_nodes[order_i];
+      auto &vertex = m_vertices[node_i];
+
+      /* The layout is incompatible if it is individually incompatible
+        with any node in the partition.  */
+      if (!is_compatible_layout (vertex.node, layout_i))
+       return false;
+    }
+  return true;
+}
+
 /* Return the cost (in arbtirary units) of going from layout FROM_LAYOUT_I
    to layout TO_LAYOUT_I for a node like NODE.  Return -1 if either of the
    layouts is incompatible with NODE or if the change is not possible for
@@ -8034,6 +8059,62 @@ vect_optimize_slp_pass::decide_masked_load_lanes ()
     }
 }
 
+/* Perform legitimizing attempts.  This is intended to improve the
+   situation when layout 0 is not valid which is a situation the cost
+   based propagation does not handle well.
+   Return true if further layout optimization is possible, false if
+   the layout configuration should be considered final.  */
+
+bool
+vect_optimize_slp_pass::legitimize ()
+{
+  /* Perform a very simple legitimizing attempt by attempting to choose
+     a single layout for all partitions that will make all permutations
+     a noop.  That should also be the optimal layout choice in case
+     layout zero is legitimate.
+     ???  Disconnected components of the SLP graph could have distinct
+     single layouts.  */
+  int single_layout_i = -1;
+  unsigned deferred_up_to = -1U;
+  for (unsigned partition_i = 0; partition_i < m_partitions.length ();
+       ++partition_i)
+    {
+      auto &partition = m_partitions[partition_i];
+      if (single_layout_i == -1)
+       {
+         single_layout_i = partition.layout;
+         deferred_up_to = partition_i;
+       }
+      else if (partition.layout == single_layout_i || partition.layout == -1)
+       ;
+      else
+       single_layout_i = 0;
+      if (single_layout_i == 0)
+       return true;
+
+      if (single_layout_i != -1
+         && !is_compatible_layout (partition, single_layout_i))
+       return true;
+    }
+
+  if (single_layout_i <= 0)
+    return true;
+
+  for (unsigned partition_i = 0; partition_i < deferred_up_to; ++partition_i)
+    if (!is_compatible_layout (m_partitions[partition_i],
+                              single_layout_i))
+      return true;
+
+  for (unsigned partition_i = 0; partition_i < m_partitions.length ();
+       ++partition_i)
+    {
+      auto &partition = m_partitions[partition_i];
+      partition.layout = single_layout_i;
+    }
+
+  return false;
+}
+
 /* Main entry point for the SLP graph optimization pass.  */
 
 void
@@ -8044,8 +8125,11 @@ vect_optimize_slp_pass::run ()
   start_choosing_layouts ();
   if (m_perms.length () > 1)
     {
-      forward_pass ();
-      backward_pass ();
+      if (legitimize ())
+       {
+         forward_pass ();
+         backward_pass ();
+       }
       if (dump_enabled_p ())
        dump ();
       materialize ();
@@ -9036,8 +9120,11 @@ vect_slp_analyze_operations (vec_info *vinfo)
          stmt_vec_info stmt_info;
          if (!SLP_INSTANCE_ROOT_STMTS (instance).is_empty ())
            stmt_info = SLP_INSTANCE_ROOT_STMTS (instance)[0];
-         else
+         else if (!SLP_TREE_SCALAR_STMTS (node).is_empty ()
+                  && SLP_TREE_SCALAR_STMTS (node)[0])
            stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
+         else
+           stmt_info = SLP_TREE_REPRESENTATIVE (node);
          if (is_a <loop_vec_info> (vinfo))
            {
              if (dump_enabled_p ())

Reply via email to