On Fri, Dec 1, 2023 at 10:26 PM Richard Biener
<richard.guent...@gmail.com> wrote:
>
> On Fri, Dec 1, 2023 at 3:39 AM liuhongt <hongtao....@intel.com> wrote:
> >
> > > Hmm, I would suggest you put reg_needed into the class and accumulate
> > > over all vec_construct, with your patch you pessimize a single v32qi
> > > over two separate v16qi for example.  Also currently the whole block is
> > > gated with INTEGRAL_TYPE_P but register pressure would be also
> > > a concern for floating point vectors.  finish_cost would then apply an
> > > adjustment.
> >
> > Changed.
> >
> > > 'target_avail_regs' is for GENERAL_REGS, does that include APX regs?
> > > I don't see anything similar for FP regs, but I guess the target should 
> > > know
> > > or maybe there's a #regs in regclass query already.
> > Haven't see any, use below setting.
> >
> > unsigned target_avail_sse = TARGET_64BIT ? (TARGET_AVX512F ? 32 : 16) : 8;
> >
> > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> > No big impact on SPEC2017.
> > Observe 1 big improvement from other benchmark by avoiding vectorization 
> > with
> > vec_construct v32qi which caused lots of spills.
> >
> > Ok for trunk?
>
> LGTM, let's see what x86 maintainers think.
+Honza and Uros.
Any comments?
>
> Richard.
>
> > For vec_contruct, the components must be live at the same time if
> > they're not loaded from memory, when the number of those components
> > exceeds available registers, spill happens. Try to account that with a
> > rough estimation.
> > ??? Ideally, we should have an overall estimation of register pressure
> > if we know the live range of all variables.
> >
> > gcc/ChangeLog:
> >
> >         * config/i386/i386.cc (ix86_vector_costs::add_stmt_cost):
> >         Count sse_reg/gpr_regs for components not loaded from memory.
> >         (ix86_vector_costs:ix86_vector_costs): New constructor.
> >         (ix86_vector_costs::m_num_gpr_needed[3]): New private memeber.
> >         (ix86_vector_costs::m_num_sse_needed[3]): Ditto.
> >         (ix86_vector_costs::finish_cost): Estimate overall register
> >         pressure cost.
> >         (ix86_vector_costs::ix86_vect_estimate_reg_pressure): New
> >         function.
> > ---
> >  gcc/config/i386/i386.cc | 54 ++++++++++++++++++++++++++++++++++++++---
> >  1 file changed, 50 insertions(+), 4 deletions(-)
> >
> > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> > index 9390f525b99..dcaea6c2096 100644
> > --- a/gcc/config/i386/i386.cc
> > +++ b/gcc/config/i386/i386.cc
> > @@ -24562,15 +24562,34 @@ ix86_noce_conversion_profitable_p (rtx_insn *seq, 
> > struct noce_if_info *if_info)
> >  /* x86-specific vector costs.  */
> >  class ix86_vector_costs : public vector_costs
> >  {
> > -  using vector_costs::vector_costs;
> > +public:
> > +  ix86_vector_costs (vec_info *, bool);
> >
> >    unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
> >                               stmt_vec_info stmt_info, slp_tree node,
> >                               tree vectype, int misalign,
> >                               vect_cost_model_location where) override;
> >    void finish_cost (const vector_costs *) override;
> > +
> > +private:
> > +
> > +  /* Estimate register pressure of the vectorized code.  */
> > +  void ix86_vect_estimate_reg_pressure ();
> > +  /* Number of GENERAL_REGS/SSE_REGS used in the vectorizer, it's used for
> > +     estimation of register pressure.
> > +     ??? Currently it's only used by vec_construct/scalar_to_vec
> > +     where we know it's not loaded from memory.  */
> > +  unsigned m_num_gpr_needed[3];
> > +  unsigned m_num_sse_needed[3];
> >  };
> >
> > +ix86_vector_costs::ix86_vector_costs (vec_info* vinfo, bool 
> > costing_for_scalar)
> > +  : vector_costs (vinfo, costing_for_scalar),
> > +    m_num_gpr_needed (),
> > +    m_num_sse_needed ()
> > +{
> > +}
> > +
> >  /* Implement targetm.vectorize.create_costs.  */
> >
> >  static vector_costs *
> > @@ -24748,8 +24767,7 @@ ix86_vector_costs::add_stmt_cost (int count, 
> > vect_cost_for_stmt kind,
> >      }
> >    else if ((kind == vec_construct || kind == scalar_to_vec)
> >            && node
> > -          && SLP_TREE_DEF_TYPE (node) == vect_external_def
> > -          && INTEGRAL_TYPE_P (TREE_TYPE (vectype)))
> > +          && SLP_TREE_DEF_TYPE (node) == vect_external_def)
> >      {
> >        stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, 
> > misalign);
> >        unsigned i;
> > @@ -24785,7 +24803,15 @@ ix86_vector_costs::add_stmt_cost (int count, 
> > vect_cost_for_stmt kind,
> >                   && (gimple_assign_rhs_code (def) != BIT_FIELD_REF
> >                       || !VECTOR_TYPE_P (TREE_TYPE
> >                                 (TREE_OPERAND (gimple_assign_rhs1 (def), 
> > 0))))))
> > -           stmt_cost += ix86_cost->sse_to_integer;
> > +           {
> > +             if (fp)
> > +               m_num_sse_needed[where]++;
> > +             else
> > +               {
> > +                 m_num_gpr_needed[where]++;
> > +                 stmt_cost += ix86_cost->sse_to_integer;
> > +               }
> > +           }
> >         }
> >        FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
> >         if (TREE_CODE (op) == SSA_NAME)
> > @@ -24821,6 +24847,24 @@ ix86_vector_costs::add_stmt_cost (int count, 
> > vect_cost_for_stmt kind,
> >    return retval;
> >  }
> >
> > +void
> > +ix86_vector_costs::ix86_vect_estimate_reg_pressure ()
> > +{
> > +  unsigned gpr_spill_cost = COSTS_N_INSNS (ix86_cost->int_store [2]) / 2;
> > +  unsigned sse_spill_cost = COSTS_N_INSNS (ix86_cost->sse_store[0]) / 2;
> > +
> > +  /* Any better way to have target available fp registers, currently use 
> > SSE_REGS.  */
> > +  unsigned target_avail_sse = TARGET_64BIT ? (TARGET_AVX512F ? 32 : 16) : 
> > 8;
> > +  for (unsigned i = 0; i != 3; i++)
> > +    {
> > +      if (m_num_gpr_needed[i] > target_avail_regs)
> > +       m_costs[i] += gpr_spill_cost * (m_num_gpr_needed[i] - 
> > target_avail_regs);
> > +      /* Only measure sse registers pressure.  */
> > +      if (TARGET_SSE && (m_num_sse_needed[i] > target_avail_sse))
> > +       m_costs[i] += sse_spill_cost * (m_num_sse_needed[i] - 
> > target_avail_sse);
> > +    }
> > +}
> > +
> >  void
> >  ix86_vector_costs::finish_cost (const vector_costs *scalar_costs)
> >  {
> > @@ -24843,6 +24887,8 @@ ix86_vector_costs::finish_cost (const vector_costs 
> > *scalar_costs)
> >         m_costs[vect_body] = INT_MAX;
> >      }
> >
> > +  ix86_vect_estimate_reg_pressure ();
> > +
> >    vector_costs::finish_cost (scalar_costs);
> >  }
> >
> > --
> > 2.31.1
> >



-- 
BR,
Hongtao

Reply via email to