Ping.

On Tue, Jan 12, 2021 at 02:02:36PM +1030, Alan Modra wrote:
> Ping
> https://gcc.gnu.org/pipermail/gcc-patches/2020-October/555759.html
> 
> On Thu, Oct 08, 2020 at 09:28:00AM +1030, Alan Modra wrote:
> > When optimizing for size we shouldn't be using metrics based on speed
> > or vice-versa.  rtlanal.c:get_full_rtx_cost wants both speed and size
> > metric from rs6000_rtx_costs independent of the global optimize_size.
> > 
> > Note that the patch changes param_simultaneous_prefetches,
> > param_l1_cache_size, param_l1_cache_line_size and param_l2_cache_size,
> > which were previously all set to zero for optimize_size.  I think that
> > was a bug.  Those params are a function of the processor.
> > 
> >     * config/rs6000/rs6000.h (rs6000_cost): Don't declare.
> >     (struct processor_costs): Move to..
> >     * config/rs6000/rs6000.c: ..here.
> >     (rs6000_cost): Make static.
> >     (rs6000_option_override_internal): Ignore optimize_size when
> >     setting up rs6000_cost.
> >     (rs6000_insn_cost): Take into account optimize_size here
> >     instead.
> >     (rs6000_emit_parity): Likewise.
> >     (rs6000_rtx_costs): Don't use rs6000_cost when !speed.
> > 
> > diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> > index d455aa52427..14ecbad5df4 100644
> > --- a/gcc/config/rs6000/rs6000.c
> > +++ b/gcc/config/rs6000/rs6000.c
> > @@ -497,7 +497,26 @@ rs6000_store_data_bypass_p (rtx_insn *out_insn, 
> > rtx_insn *in_insn)
> >  
> >  /* Processor costs (relative to an add) */
> >  
> > -const struct processor_costs *rs6000_cost;
> > +struct processor_costs {
> > +  const int mulsi;   /* cost of SImode multiplication.  */
> > +  const int mulsi_const;  /* cost of SImode multiplication by constant.  */
> > +  const int mulsi_const9; /* cost of SImode mult by short constant.  */
> > +  const int muldi;   /* cost of DImode multiplication.  */
> > +  const int divsi;   /* cost of SImode division.  */
> > +  const int divdi;   /* cost of DImode division.  */
> > +  const int fp;              /* cost of simple SFmode and DFmode insns.  */
> > +  const int dmul;    /* cost of DFmode multiplication (and fmadd).  */
> > +  const int sdiv;    /* cost of SFmode division (fdivs).  */
> > +  const int ddiv;    /* cost of DFmode division (fdiv).  */
> > +  const int cache_line_size;    /* cache line size in bytes. */
> > +  const int l1_cache_size; /* size of l1 cache, in kilobytes.  */
> > +  const int l2_cache_size; /* size of l2 cache, in kilobytes.  */
> > +  const int simultaneous_prefetches; /* number of parallel prefetch
> > +                                   operations.  */
> > +  const int sfdf_convert;  /* cost of SF->DF conversion.  */
> > +};
> > +
> > +static const struct processor_costs *rs6000_cost;
> >  
> >  /* Instruction size costs on 32bit processors.  */
> >  static const
> > @@ -4618,131 +4637,128 @@ rs6000_option_override_internal (bool 
> > global_init_p)
> >      }
> >  
> >    /* Initialize rs6000_cost with the appropriate target costs.  */
> > -  if (optimize_size)
> > -    rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
> > -  else
> > -    switch (rs6000_tune)
> > -      {
> > -      case PROCESSOR_RS64A:
> > -   rs6000_cost = &rs64a_cost;
> > -   break;
> > +  switch (rs6000_tune)
> > +    {
> > +    case PROCESSOR_RS64A:
> > +      rs6000_cost = &rs64a_cost;
> > +      break;
> >  
> > -      case PROCESSOR_MPCCORE:
> > -   rs6000_cost = &mpccore_cost;
> > -   break;
> > +    case PROCESSOR_MPCCORE:
> > +      rs6000_cost = &mpccore_cost;
> > +      break;
> >  
> > -      case PROCESSOR_PPC403:
> > -   rs6000_cost = &ppc403_cost;
> > -   break;
> > +    case PROCESSOR_PPC403:
> > +      rs6000_cost = &ppc403_cost;
> > +      break;
> >  
> > -      case PROCESSOR_PPC405:
> > -   rs6000_cost = &ppc405_cost;
> > -   break;
> > +    case PROCESSOR_PPC405:
> > +      rs6000_cost = &ppc405_cost;
> > +      break;
> >  
> > -      case PROCESSOR_PPC440:
> > -   rs6000_cost = &ppc440_cost;
> > -   break;
> > +    case PROCESSOR_PPC440:
> > +      rs6000_cost = &ppc440_cost;
> > +      break;
> >  
> > -      case PROCESSOR_PPC476:
> > -   rs6000_cost = &ppc476_cost;
> > -   break;
> > +    case PROCESSOR_PPC476:
> > +      rs6000_cost = &ppc476_cost;
> > +      break;
> >  
> > -      case PROCESSOR_PPC601:
> > -   rs6000_cost = &ppc601_cost;
> > -   break;
> > +    case PROCESSOR_PPC601:
> > +      rs6000_cost = &ppc601_cost;
> > +      break;
> >  
> > -      case PROCESSOR_PPC603:
> > -   rs6000_cost = &ppc603_cost;
> > -   break;
> > +    case PROCESSOR_PPC603:
> > +      rs6000_cost = &ppc603_cost;
> > +      break;
> >  
> > -      case PROCESSOR_PPC604:
> > -   rs6000_cost = &ppc604_cost;
> > -   break;
> > +    case PROCESSOR_PPC604:
> > +      rs6000_cost = &ppc604_cost;
> > +      break;
> >  
> > -      case PROCESSOR_PPC604e:
> > -   rs6000_cost = &ppc604e_cost;
> > -   break;
> > +    case PROCESSOR_PPC604e:
> > +      rs6000_cost = &ppc604e_cost;
> > +      break;
> >  
> > -      case PROCESSOR_PPC620:
> > -   rs6000_cost = &ppc620_cost;
> > -   break;
> > +    case PROCESSOR_PPC620:
> > +      rs6000_cost = &ppc620_cost;
> > +      break;
> >  
> > -      case PROCESSOR_PPC630:
> > -   rs6000_cost = &ppc630_cost;
> > -   break;
> > +    case PROCESSOR_PPC630:
> > +      rs6000_cost = &ppc630_cost;
> > +      break;
> >  
> > -      case PROCESSOR_CELL:
> > -   rs6000_cost = &ppccell_cost;
> > -   break;
> > +    case PROCESSOR_CELL:
> > +      rs6000_cost = &ppccell_cost;
> > +      break;
> >  
> > -      case PROCESSOR_PPC750:
> > -      case PROCESSOR_PPC7400:
> > -   rs6000_cost = &ppc750_cost;
> > -   break;
> > +    case PROCESSOR_PPC750:
> > +    case PROCESSOR_PPC7400:
> > +      rs6000_cost = &ppc750_cost;
> > +      break;
> >  
> > -      case PROCESSOR_PPC7450:
> > -   rs6000_cost = &ppc7450_cost;
> > -   break;
> > +    case PROCESSOR_PPC7450:
> > +      rs6000_cost = &ppc7450_cost;
> > +      break;
> >  
> > -      case PROCESSOR_PPC8540:
> > -      case PROCESSOR_PPC8548:
> > -   rs6000_cost = &ppc8540_cost;
> > -   break;
> > +    case PROCESSOR_PPC8540:
> > +    case PROCESSOR_PPC8548:
> > +      rs6000_cost = &ppc8540_cost;
> > +      break;
> >  
> > -      case PROCESSOR_PPCE300C2:
> > -      case PROCESSOR_PPCE300C3:
> > -   rs6000_cost = &ppce300c2c3_cost;
> > -   break;
> > +    case PROCESSOR_PPCE300C2:
> > +    case PROCESSOR_PPCE300C3:
> > +      rs6000_cost = &ppce300c2c3_cost;
> > +      break;
> >  
> > -      case PROCESSOR_PPCE500MC:
> > -   rs6000_cost = &ppce500mc_cost;
> > -   break;
> > +    case PROCESSOR_PPCE500MC:
> > +      rs6000_cost = &ppce500mc_cost;
> > +      break;
> >  
> > -      case PROCESSOR_PPCE500MC64:
> > -   rs6000_cost = &ppce500mc64_cost;
> > -   break;
> > +    case PROCESSOR_PPCE500MC64:
> > +      rs6000_cost = &ppce500mc64_cost;
> > +      break;
> >  
> > -      case PROCESSOR_PPCE5500:
> > -   rs6000_cost = &ppce5500_cost;
> > -   break;
> > +    case PROCESSOR_PPCE5500:
> > +      rs6000_cost = &ppce5500_cost;
> > +      break;
> >  
> > -      case PROCESSOR_PPCE6500:
> > -   rs6000_cost = &ppce6500_cost;
> > -   break;
> > +    case PROCESSOR_PPCE6500:
> > +      rs6000_cost = &ppce6500_cost;
> > +      break;
> >  
> > -      case PROCESSOR_TITAN:
> > -   rs6000_cost = &titan_cost;
> > -   break;
> > +    case PROCESSOR_TITAN:
> > +      rs6000_cost = &titan_cost;
> > +      break;
> >  
> > -      case PROCESSOR_POWER4:
> > -      case PROCESSOR_POWER5:
> > -   rs6000_cost = &power4_cost;
> > -   break;
> > +    case PROCESSOR_POWER4:
> > +    case PROCESSOR_POWER5:
> > +      rs6000_cost = &power4_cost;
> > +      break;
> >  
> > -      case PROCESSOR_POWER6:
> > -   rs6000_cost = &power6_cost;
> > -   break;
> > +    case PROCESSOR_POWER6:
> > +      rs6000_cost = &power6_cost;
> > +      break;
> >  
> > -      case PROCESSOR_POWER7:
> > -   rs6000_cost = &power7_cost;
> > -   break;
> > +    case PROCESSOR_POWER7:
> > +      rs6000_cost = &power7_cost;
> > +      break;
> >  
> > -      case PROCESSOR_POWER8:
> > -   rs6000_cost = &power8_cost;
> > -   break;
> > +    case PROCESSOR_POWER8:
> > +      rs6000_cost = &power8_cost;
> > +      break;
> >  
> > -      case PROCESSOR_POWER9:
> > -      case PROCESSOR_POWER10:
> > -   rs6000_cost = &power9_cost;
> > -   break;
> > +    case PROCESSOR_POWER9:
> > +    case PROCESSOR_POWER10:
> > +      rs6000_cost = &power9_cost;
> > +      break;
> >  
> > -      case PROCESSOR_PPCA2:
> > -   rs6000_cost = &ppca2_cost;
> > -   break;
> > +    case PROCESSOR_PPCA2:
> > +      rs6000_cost = &ppca2_cost;
> > +      break;
> >  
> > -      default:
> > -   gcc_unreachable ();
> > -      }
> > +    default:
> > +      gcc_unreachable ();
> > +    }
> >  
> >    if (global_init_p)
> >      {
> > @@ -21438,15 +21454,17 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int 
> > outer_code,
> >  
> >      case PLUS:
> >      case MINUS:
> > -      if (FLOAT_MODE_P (mode))
> > +      if (speed && FLOAT_MODE_P (mode))
> >     *total = rs6000_cost->fp;
> >        else
> >     *total = COSTS_N_INSNS (1);
> >        return false;
> >  
> >      case MULT:
> > -      if (CONST_INT_P (XEXP (x, 1))
> > -     && satisfies_constraint_I (XEXP (x, 1)))
> > +      if (!speed)
> > +   *total = COSTS_N_INSNS (1);
> > +      else if (CONST_INT_P (XEXP (x, 1))
> > +          && satisfies_constraint_I (XEXP (x, 1)))
> >     {
> >       if (INTVAL (XEXP (x, 1)) >= -256
> >           && INTVAL (XEXP (x, 1)) <= 255)
> > @@ -21465,7 +21483,9 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int 
> > outer_code,
> >        return false;
> >  
> >      case FMA:
> > -      if (mode == SFmode)
> > +      if (!speed)
> > +   *total = COSTS_N_INSNS (1);
> > +      else if (mode == SFmode)
> >     *total = rs6000_cost->fp;
> >        else
> >     *total = rs6000_cost->dmul;
> > @@ -21475,8 +21495,10 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int 
> > outer_code,
> >      case MOD:
> >        if (FLOAT_MODE_P (mode))
> >     {
> > -     *total = mode == DFmode ? rs6000_cost->ddiv
> > -                             : rs6000_cost->sdiv;
> > +     if (!speed)
> > +       *total = COSTS_N_INSNS (1);
> > +     else
> > +       *total = mode == DFmode ? rs6000_cost->ddiv : rs6000_cost->sdiv;
> >       return false;
> >     }
> >        /* FALLTHRU */
> > @@ -21495,7 +21517,9 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int 
> > outer_code,
> >     }
> >        else
> >     {
> > -     if (GET_MODE (XEXP (x, 1)) == DImode)
> > +     if (!speed)
> > +       *total = COSTS_N_INSNS (1);
> > +     else if (GET_MODE (XEXP (x, 1)) == DImode)
> >         *total = rs6000_cost->divdi;
> >       else
> >         *total = rs6000_cost->divsi;
> > @@ -21587,7 +21611,9 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int 
> > outer_code,
> >        if (outer_code == TRUNCATE
> >       && GET_CODE (XEXP (x, 0)) == MULT)
> >     {
> > -     if (mode == DImode)
> > +     if (!speed)
> > +       *total = COSTS_N_INSNS (1);
> > +     else if (mode == DImode)
> >         *total = rs6000_cost->muldi;
> >       else
> >         *total = rs6000_cost->mulsi;
> > @@ -21622,11 +21648,16 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int 
> > outer_code,
> >      case FIX:
> >      case UNSIGNED_FIX:
> >      case FLOAT_TRUNCATE:
> > -      *total = rs6000_cost->fp;
> > +      if (!speed)
> > +   *total = COSTS_N_INSNS (1);
> > +      else
> > +   *total = rs6000_cost->fp;
> >        return false;
> >  
> >      case FLOAT_EXTEND:
> > -      if (mode == DFmode)
> > +      if (!speed)
> > +   *total = COSTS_N_INSNS (1);
> > +      else if (mode == DFmode)
> >     *total = rs6000_cost->sfdf_convert;
> >        else
> >     *total = rs6000_cost->fp;
> > @@ -21773,6 +21804,10 @@ rs6000_insn_cost (rtx_insn *insn, bool speed)
> >        n = length / 4;
> >      }
> >  
> > +  const struct processor_costs *proc_cost = rs6000_cost;
> > +  if (optimize_size)
> > +    proc_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
> > +
> >    enum attr_type type = get_attr_type (insn);
> >  
> >    switch (type)
> > @@ -21787,16 +21822,16 @@ rs6000_insn_cost (rtx_insn *insn, bool speed)
> >        switch (get_attr_size (insn))
> >     {
> >     case SIZE_8:
> > -     cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
> > +     cost = COSTS_N_INSNS (n - 1) + proc_cost->mulsi_const9;
> >       break;
> >     case SIZE_16:
> > -     cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
> > +     cost = COSTS_N_INSNS (n - 1) + proc_cost->mulsi_const;
> >       break;
> >     case SIZE_32:
> > -     cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
> > +     cost = COSTS_N_INSNS (n - 1) + proc_cost->mulsi;
> >       break;
> >     case SIZE_64:
> > -     cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
> > +     cost = COSTS_N_INSNS (n - 1) + proc_cost->muldi;
> >       break;
> >     default:
> >       gcc_unreachable ();
> > @@ -21806,10 +21841,10 @@ rs6000_insn_cost (rtx_insn *insn, bool speed)
> >        switch (get_attr_size (insn))
> >     {
> >     case SIZE_32:
> > -     cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi;
> > +     cost = COSTS_N_INSNS (n - 1) + proc_cost->divsi;
> >       break;
> >     case SIZE_64:
> > -     cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi;
> > +     cost = COSTS_N_INSNS (n - 1) + proc_cost->divdi;
> >       break;
> >     default:
> >       gcc_unreachable ();
> > @@ -21817,16 +21852,16 @@ rs6000_insn_cost (rtx_insn *insn, bool speed)
> >        break;
> >  
> >      case TYPE_FP:
> > -      cost = n * rs6000_cost->fp;
> > +      cost = n * proc_cost->fp;
> >        break;
> >      case TYPE_DMUL:
> > -      cost = n * rs6000_cost->dmul;
> > +      cost = n * proc_cost->dmul;
> >        break;
> >      case TYPE_SDIV:
> > -      cost = n * rs6000_cost->sdiv;
> > +      cost = n * proc_cost->sdiv;
> >        break;
> >      case TYPE_DDIV:
> > -      cost = n * rs6000_cost->ddiv;
> > +      cost = n * proc_cost->ddiv;
> >        break;
> >  
> >      case TYPE_SYNC:
> > @@ -22388,7 +22423,7 @@ rs6000_emit_parity (rtx dst, rtx src)
> >    if (mode == SImode)
> >      {
> >        /* Is mult+shift >= shift+xor+shift+xor?  */
> > -      if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
> > +      if (!optimize_size && rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
> >     {
> >       rtx tmp1, tmp2, tmp3, tmp4;
> >  
> > @@ -22411,7 +22446,7 @@ rs6000_emit_parity (rtx dst, rtx src)
> >    else
> >      {
> >        /* Is mult+shift >= shift+xor+shift+xor+shift+xor?  */
> > -      if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
> > +      if (!optimize_size && rs6000_cost->muldi >= COSTS_N_INSNS (5))
> >     {
> >       rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
> >  
> > diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
> > index bbd8060e143..9daf55adc02 100644
> > --- a/gcc/config/rs6000/rs6000.h
> > +++ b/gcc/config/rs6000/rs6000.h
> > @@ -1869,29 +1869,6 @@ extern scalar_int_mode rs6000_pmode;
> >  #define REVERSE_CONDITION(CODE, MODE) rs6000_reverse_condition (MODE, CODE)
> >  
> >  
> > -/* Target cpu costs.  */
> > -
> > -struct processor_costs {
> > -  const int mulsi;   /* cost of SImode multiplication.  */
> > -  const int mulsi_const;  /* cost of SImode multiplication by constant.  */
> > -  const int mulsi_const9; /* cost of SImode mult by short constant.  */
> > -  const int muldi;   /* cost of DImode multiplication.  */
> > -  const int divsi;   /* cost of SImode division.  */
> > -  const int divdi;   /* cost of DImode division.  */
> > -  const int fp;              /* cost of simple SFmode and DFmode insns.  */
> > -  const int dmul;    /* cost of DFmode multiplication (and fmadd).  */
> > -  const int sdiv;    /* cost of SFmode division (fdivs).  */
> > -  const int ddiv;    /* cost of DFmode division (fdiv).  */
> > -  const int cache_line_size;    /* cache line size in bytes. */
> > -  const int l1_cache_size; /* size of l1 cache, in kilobytes.  */
> > -  const int l2_cache_size; /* size of l2 cache, in kilobytes.  */
> > -  const int simultaneous_prefetches; /* number of parallel prefetch
> > -                                   operations.  */
> > -  const int sfdf_convert;  /* cost of SF->DF conversion.  */
> > -};
> > -
> > -extern const struct processor_costs *rs6000_cost;
> > -
> >  /* Control the assembler format that we output.  */
> >  
> >  /* A C string constant describing how to begin a comment in the target

-- 
Alan Modra
Australia Development Lab, IBM

Reply via email to