Re: [PATCH 8/8] [RS6000] rs6000_rtx_costs for !speed

2021-01-21 Thread Alan Modra via Gcc-patches
Ping.

On Tue, Jan 12, 2021 at 02:02:36PM +1030, Alan Modra wrote:
> Ping
> https://gcc.gnu.org/pipermail/gcc-patches/2020-October/555759.html
> 
> On Thu, Oct 08, 2020 at 09:28:00AM +1030, Alan Modra wrote:
> > When optimizing for size we shouldn't be using metrics based on speed
> > or vice-versa.  rtlanal.c:get_full_rtx_cost wants both speed and size
> > metric from rs6000_rtx_costs independent of the global optimize_size.
> > 
> > Note that the patch changes param_simultaneous_prefetches,
> > param_l1_cache_size, param_l1_cache_line_size and param_l2_cache_size,
> > which were previously all set to zero for optimize_size.  I think that
> > was a bug.  Those params are a function of the processor.
> > 
> > * config/rs6000/rs6000.h (rs6000_cost): Don't declare.
> > (struct processor_costs): Move to..
> > * config/rs6000/rs6000.c: ..here.
> > (rs6000_cost): Make static.
> > (rs6000_option_override_internal): Ignore optimize_size when
> > setting up rs6000_cost.
> > (rs6000_insn_cost): Take into account optimize_size here
> > instead.
> > (rs6000_emit_parity): Likewise.
> > (rs6000_rtx_costs): Don't use rs6000_cost when !speed.
> > 
> > diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> > index d455aa52427..14ecbad5df4 100644
> > --- a/gcc/config/rs6000/rs6000.c
> > +++ b/gcc/config/rs6000/rs6000.c
> > @@ -497,7 +497,26 @@ rs6000_store_data_bypass_p (rtx_insn *out_insn, 
> > rtx_insn *in_insn)
> >  
> >  /* Processor costs (relative to an add) */
> >  
> > -const struct processor_costs *rs6000_cost;
> > +struct processor_costs {
> > +  const int mulsi;   /* cost of SImode multiplication.  */
> > +  const int mulsi_const;  /* cost of SImode multiplication by constant.  */
> > +  const int mulsi_const9; /* cost of SImode mult by short constant.  */
> > +  const int muldi;   /* cost of DImode multiplication.  */
> > +  const int divsi;   /* cost of SImode division.  */
> > +  const int divdi;   /* cost of DImode division.  */
> > +  const int fp;  /* cost of simple SFmode and DFmode insns.  */
> > +  const int dmul;/* cost of DFmode multiplication (and fmadd).  */
> > +  const int sdiv;/* cost of SFmode division (fdivs).  */
> > +  const int ddiv;/* cost of DFmode division (fdiv).  */
> > +  const int cache_line_size;/* cache line size in bytes. */
> > +  const int l1_cache_size; /* size of l1 cache, in kilobytes.  */
> > +  const int l2_cache_size; /* size of l2 cache, in kilobytes.  */
> > +  const int simultaneous_prefetches; /* number of parallel prefetch
> > +   operations.  */
> > +  const int sfdf_convert;  /* cost of SF->DF conversion.  */
> > +};
> > +
> > +static const struct processor_costs *rs6000_cost;
> >  
> >  /* Instruction size costs on 32bit processors.  */
> >  static const
> > @@ -4618,131 +4637,128 @@ rs6000_option_override_internal (bool 
> > global_init_p)
> >  }
> >  
> >/* Initialize rs6000_cost with the appropriate target costs.  */
> > -  if (optimize_size)
> > -rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
> > -  else
> > -switch (rs6000_tune)
> > -  {
> > -  case PROCESSOR_RS64A:
> > -   rs6000_cost = &rs64a_cost;
> > -   break;
> > +  switch (rs6000_tune)
> > +{
> > +case PROCESSOR_RS64A:
> > +  rs6000_cost = &rs64a_cost;
> > +  break;
> >  
> > -  case PROCESSOR_MPCCORE:
> > -   rs6000_cost = &mpccore_cost;
> > -   break;
> > +case PROCESSOR_MPCCORE:
> > +  rs6000_cost = &mpccore_cost;
> > +  break;
> >  
> > -  case PROCESSOR_PPC403:
> > -   rs6000_cost = &ppc403_cost;
> > -   break;
> > +case PROCESSOR_PPC403:
> > +  rs6000_cost = &ppc403_cost;
> > +  break;
> >  
> > -  case PROCESSOR_PPC405:
> > -   rs6000_cost = &ppc405_cost;
> > -   break;
> > +case PROCESSOR_PPC405:
> > +  rs6000_cost = &ppc405_cost;
> > +  break;
> >  
> > -  case PROCESSOR_PPC440:
> > -   rs6000_cost = &ppc440_cost;
> > -   break;
> > +case PROCESSOR_PPC440:
> > +  rs6000_cost = &ppc440_cost;
> > +  break;
> >  
> > -  case PROCESSOR_PPC476:
> > -   rs6000_cost = &ppc476_cost;
> > -   break;
> > +case PROCESSOR_PPC476:
> > +  rs6000_cost = &ppc476_cost;
> > +  break;
> >  
> > -  case PROCESSOR_PPC601:
> > -   rs6000_cost = &ppc601_cost;
> > -   break;
> > +case PROCESSOR_PPC601:
> > +  rs6000_cost = &ppc601_cost;
> > +  break;
> >  
> > -  case PROCESSOR_PPC603:
> > -   rs6000_cost = &ppc603_cost;
> > -   break;
> > +case PROCESSOR_PPC603:
> > +  rs6000_cost = &ppc603_cost;
> > +  break;
> >  
> > -  case PROCESSOR_PPC604:
> > -   rs6000_cost = &ppc604_cost;
> > -   break;
> > +case PROCESSOR_PPC604:
> > +  rs6000_cost = &ppc604_cost;
> > +  break;
> >  
> > -  case PROCESSOR_PPC604e:
> > -   rs6000_cost = &ppc604e_cost;
> > -   break;
> > +case PROCESSOR_PPC604e:
> > +  rs6000_cost = &ppc6

[PATCH 8/8] [RS6000] rs6000_rtx_costs for !speed

2020-10-07 Thread Alan Modra via Gcc-patches
When optimizing for size we shouldn't be using metrics based on speed
or vice-versa.  rtlanal.c:get_full_rtx_cost wants both speed and size
metric from rs6000_rtx_costs independent of the global optimize_size.

Note that the patch changes param_simultaneous_prefetches,
param_l1_cache_size, param_l1_cache_line_size and param_l2_cache_size,
which were previously all set to zero for optimize_size.  I think that
was a bug.  Those params are a function of the processor.

* config/rs6000/rs6000.h (rs6000_cost): Don't declare.
(struct processor_costs): Move to..
* config/rs6000/rs6000.c: ..here.
(rs6000_cost): Make static.
(rs6000_option_override_internal): Ignore optimize_size when
setting up rs6000_cost.
(rs6000_insn_cost): Take into account optimize_size here
instead.
(rs6000_emit_parity): Likewise.
(rs6000_rtx_costs): Don't use rs6000_cost when !speed.

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index d455aa52427..14ecbad5df4 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -497,7 +497,26 @@ rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn 
*in_insn)
 
 /* Processor costs (relative to an add) */
 
-const struct processor_costs *rs6000_cost;
+struct processor_costs {
+  const int mulsi;   /* cost of SImode multiplication.  */
+  const int mulsi_const;  /* cost of SImode multiplication by constant.  */
+  const int mulsi_const9; /* cost of SImode mult by short constant.  */
+  const int muldi;   /* cost of DImode multiplication.  */
+  const int divsi;   /* cost of SImode division.  */
+  const int divdi;   /* cost of DImode division.  */
+  const int fp;  /* cost of simple SFmode and DFmode insns.  */
+  const int dmul;/* cost of DFmode multiplication (and fmadd).  */
+  const int sdiv;/* cost of SFmode division (fdivs).  */
+  const int ddiv;/* cost of DFmode division (fdiv).  */
+  const int cache_line_size;/* cache line size in bytes. */
+  const int l1_cache_size; /* size of l1 cache, in kilobytes.  */
+  const int l2_cache_size; /* size of l2 cache, in kilobytes.  */
+  const int simultaneous_prefetches; /* number of parallel prefetch
+   operations.  */
+  const int sfdf_convert;  /* cost of SF->DF conversion.  */
+};
+
+static const struct processor_costs *rs6000_cost;
 
 /* Instruction size costs on 32bit processors.  */
 static const
@@ -4618,131 +4637,128 @@ rs6000_option_override_internal (bool global_init_p)
 }
 
   /* Initialize rs6000_cost with the appropriate target costs.  */
-  if (optimize_size)
-rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
-  else
-switch (rs6000_tune)
-  {
-  case PROCESSOR_RS64A:
-   rs6000_cost = &rs64a_cost;
-   break;
+  switch (rs6000_tune)
+{
+case PROCESSOR_RS64A:
+  rs6000_cost = &rs64a_cost;
+  break;
 
-  case PROCESSOR_MPCCORE:
-   rs6000_cost = &mpccore_cost;
-   break;
+case PROCESSOR_MPCCORE:
+  rs6000_cost = &mpccore_cost;
+  break;
 
-  case PROCESSOR_PPC403:
-   rs6000_cost = &ppc403_cost;
-   break;
+case PROCESSOR_PPC403:
+  rs6000_cost = &ppc403_cost;
+  break;
 
-  case PROCESSOR_PPC405:
-   rs6000_cost = &ppc405_cost;
-   break;
+case PROCESSOR_PPC405:
+  rs6000_cost = &ppc405_cost;
+  break;
 
-  case PROCESSOR_PPC440:
-   rs6000_cost = &ppc440_cost;
-   break;
+case PROCESSOR_PPC440:
+  rs6000_cost = &ppc440_cost;
+  break;
 
-  case PROCESSOR_PPC476:
-   rs6000_cost = &ppc476_cost;
-   break;
+case PROCESSOR_PPC476:
+  rs6000_cost = &ppc476_cost;
+  break;
 
-  case PROCESSOR_PPC601:
-   rs6000_cost = &ppc601_cost;
-   break;
+case PROCESSOR_PPC601:
+  rs6000_cost = &ppc601_cost;
+  break;
 
-  case PROCESSOR_PPC603:
-   rs6000_cost = &ppc603_cost;
-   break;
+case PROCESSOR_PPC603:
+  rs6000_cost = &ppc603_cost;
+  break;
 
-  case PROCESSOR_PPC604:
-   rs6000_cost = &ppc604_cost;
-   break;
+case PROCESSOR_PPC604:
+  rs6000_cost = &ppc604_cost;
+  break;
 
-  case PROCESSOR_PPC604e:
-   rs6000_cost = &ppc604e_cost;
-   break;
+case PROCESSOR_PPC604e:
+  rs6000_cost = &ppc604e_cost;
+  break;
 
-  case PROCESSOR_PPC620:
-   rs6000_cost = &ppc620_cost;
-   break;
+case PROCESSOR_PPC620:
+  rs6000_cost = &ppc620_cost;
+  break;
 
-  case PROCESSOR_PPC630:
-   rs6000_cost = &ppc630_cost;
-   break;
+case PROCESSOR_PPC630:
+  rs6000_cost = &ppc630_cost;
+  break;
 
-  case PROCESSOR_CELL:
-   rs6000_cost = &ppccell_cost;
-   break;
+case PROCESSOR_CELL:
+  rs6000_cost = &ppccell_cost;
+  break;
 
-  case PROCESSOR_PPC750:
-  case PROCESSOR_PPC7400:
-