For vec_contruct, the components must be live at the same time if
they're not loaded from memory, when the number of those components
exceeds available registers, spill happens. Try to account that with a
rough estimation.
??? Ideally, we should have an overall estimation of register pressure
if we know the live range of all variables.

The patch can avoid regressions due to .i.e. vec_contruct with 32 char.
Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.

Ok for trunk?

gcc/ChangeLog:

        * config/i386/i386.cc (ix86_vector_costs::add_stmt_cost): Take
        register pressure into account for vec_construct when the
        components are not loaded from memory.
---
 gcc/config/i386/i386.cc | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 683ac643bc8..f8417555930 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -24706,6 +24706,7 @@ ix86_vector_costs::add_stmt_cost (int count, 
vect_cost_for_stmt kind,
       stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
       unsigned i;
       tree op;
+      unsigned reg_needed = 0;
       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
        if (TREE_CODE (op) == SSA_NAME)
          TREE_VISITED (op) = 0;
@@ -24737,11 +24738,30 @@ ix86_vector_costs::add_stmt_cost (int count, 
vect_cost_for_stmt kind,
                  && (gimple_assign_rhs_code (def) != BIT_FIELD_REF
                      || !VECTOR_TYPE_P (TREE_TYPE
                                (TREE_OPERAND (gimple_assign_rhs1 (def), 0))))))
-           stmt_cost += ix86_cost->sse_to_integer;
+           {
+             stmt_cost += ix86_cost->sse_to_integer;
+             reg_needed++;
+           }
        }
       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
        if (TREE_CODE (op) == SSA_NAME)
          TREE_VISITED (op) = 0;
+
+      /* For vec_contruct, the components must be live at the same time if
+        they're not loaded from memory, when the number of those components
+        exceeds available registers, spill happens. Try to account that with a
+        rough estimation. Currently only handle integral modes since scalar fp
+        shares sse_regs with vectors.
+        ??? Ideally, we should have an overall estimation of register pressure
+        if we know the live range of all variables.  */
+      if (!fp && kind == vec_construct
+         && reg_needed > target_avail_regs)
+       {
+         unsigned spill_cost = ix86_builtin_vectorization_cost (scalar_store,
+                                                                vectype,
+                                                                misalign);
+         stmt_cost += spill_cost * (reg_needed - target_avail_regs);
+       }
     }
   if (stmt_cost == -1)
     stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
-- 
2.31.1

Reply via email to