Ping. On Tue, Jan 12, 2021 at 02:02:36PM +1030, Alan Modra wrote: > Ping > https://gcc.gnu.org/pipermail/gcc-patches/2020-October/555759.html > > On Thu, Oct 08, 2020 at 09:28:00AM +1030, Alan Modra wrote: > > When optimizing for size we shouldn't be using metrics based on speed > > or vice-versa. rtlanal.c:get_full_rtx_cost wants both speed and size > > metric from rs6000_rtx_costs independent of the global optimize_size. > > > > Note that the patch changes param_simultaneous_prefetches, > > param_l1_cache_size, param_l1_cache_line_size and param_l2_cache_size, > > which were previously all set to zero for optimize_size. I think that > > was a bug. Those params are a function of the processor. > > > > * config/rs6000/rs6000.h (rs6000_cost): Don't declare. > > (struct processor_costs): Move to.. > > * config/rs6000/rs6000.c: ..here. > > (rs6000_cost): Make static. > > (rs6000_option_override_internal): Ignore optimize_size when > > setting up rs6000_cost. > > (rs6000_insn_cost): Take into account optimize_size here > > instead. > > (rs6000_emit_parity): Likewise. > > (rs6000_rtx_costs): Don't use rs6000_cost when !speed. > > > > diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c > > index d455aa52427..14ecbad5df4 100644 > > --- a/gcc/config/rs6000/rs6000.c > > +++ b/gcc/config/rs6000/rs6000.c > > @@ -497,7 +497,26 @@ rs6000_store_data_bypass_p (rtx_insn *out_insn, > > rtx_insn *in_insn) > > > > /* Processor costs (relative to an add) */ > > > > -const struct processor_costs *rs6000_cost; > > +struct processor_costs { > > + const int mulsi; /* cost of SImode multiplication. */ > > + const int mulsi_const; /* cost of SImode multiplication by constant. */ > > + const int mulsi_const9; /* cost of SImode mult by short constant. */ > > + const int muldi; /* cost of DImode multiplication. */ > > + const int divsi; /* cost of SImode division. */ > > + const int divdi; /* cost of DImode division. */ > > + const int fp; /* cost of simple SFmode and DFmode insns. */ > > + const int dmul; /* cost of DFmode multiplication (and fmadd). */ > > + const int sdiv; /* cost of SFmode division (fdivs). */ > > + const int ddiv; /* cost of DFmode division (fdiv). */ > > + const int cache_line_size; /* cache line size in bytes. */ > > + const int l1_cache_size; /* size of l1 cache, in kilobytes. */ > > + const int l2_cache_size; /* size of l2 cache, in kilobytes. */ > > + const int simultaneous_prefetches; /* number of parallel prefetch > > + operations. */ > > + const int sfdf_convert; /* cost of SF->DF conversion. */ > > +}; > > + > > +static const struct processor_costs *rs6000_cost; > > > > /* Instruction size costs on 32bit processors. */ > > static const > > @@ -4618,131 +4637,128 @@ rs6000_option_override_internal (bool > > global_init_p) > > } > > > > /* Initialize rs6000_cost with the appropriate target costs. */ > > - if (optimize_size) > > - rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost; > > - else > > - switch (rs6000_tune) > > - { > > - case PROCESSOR_RS64A: > > - rs6000_cost = &rs64a_cost; > > - break; > > + switch (rs6000_tune) > > + { > > + case PROCESSOR_RS64A: > > + rs6000_cost = &rs64a_cost; > > + break; > > > > - case PROCESSOR_MPCCORE: > > - rs6000_cost = &mpccore_cost; > > - break; > > + case PROCESSOR_MPCCORE: > > + rs6000_cost = &mpccore_cost; > > + break; > > > > - case PROCESSOR_PPC403: > > - rs6000_cost = &ppc403_cost; > > - break; > > + case PROCESSOR_PPC403: > > + rs6000_cost = &ppc403_cost; > > + break; > > > > - case PROCESSOR_PPC405: > > - rs6000_cost = &ppc405_cost; > > - break; > > + case PROCESSOR_PPC405: > > + rs6000_cost = &ppc405_cost; > > + break; > > > > - case PROCESSOR_PPC440: > > - rs6000_cost = &ppc440_cost; > > - break; > > + case PROCESSOR_PPC440: > > + rs6000_cost = &ppc440_cost; > > + break; > > > > - case PROCESSOR_PPC476: > > - rs6000_cost = &ppc476_cost; > > - break; > > + case PROCESSOR_PPC476: > > + rs6000_cost = &ppc476_cost; > > + break; > > > > - case PROCESSOR_PPC601: > > - rs6000_cost = &ppc601_cost; > > - break; > > + case PROCESSOR_PPC601: > > + rs6000_cost = &ppc601_cost; > > + break; > > > > - case PROCESSOR_PPC603: > > - rs6000_cost = &ppc603_cost; > > - break; > > + case PROCESSOR_PPC603: > > + rs6000_cost = &ppc603_cost; > > + break; > > > > - case PROCESSOR_PPC604: > > - rs6000_cost = &ppc604_cost; > > - break; > > + case PROCESSOR_PPC604: > > + rs6000_cost = &ppc604_cost; > > + break; > > > > - case PROCESSOR_PPC604e: > > - rs6000_cost = &ppc604e_cost; > > - break; > > + case PROCESSOR_PPC604e: > > + rs6000_cost = &ppc604e_cost; > > + break; > > > > - case PROCESSOR_PPC620: > > - rs6000_cost = &ppc620_cost; > > - break; > > + case PROCESSOR_PPC620: > > + rs6000_cost = &ppc620_cost; > > + break; > > > > - case PROCESSOR_PPC630: > > - rs6000_cost = &ppc630_cost; > > - break; > > + case PROCESSOR_PPC630: > > + rs6000_cost = &ppc630_cost; > > + break; > > > > - case PROCESSOR_CELL: > > - rs6000_cost = &ppccell_cost; > > - break; > > + case PROCESSOR_CELL: > > + rs6000_cost = &ppccell_cost; > > + break; > > > > - case PROCESSOR_PPC750: > > - case PROCESSOR_PPC7400: > > - rs6000_cost = &ppc750_cost; > > - break; > > + case PROCESSOR_PPC750: > > + case PROCESSOR_PPC7400: > > + rs6000_cost = &ppc750_cost; > > + break; > > > > - case PROCESSOR_PPC7450: > > - rs6000_cost = &ppc7450_cost; > > - break; > > + case PROCESSOR_PPC7450: > > + rs6000_cost = &ppc7450_cost; > > + break; > > > > - case PROCESSOR_PPC8540: > > - case PROCESSOR_PPC8548: > > - rs6000_cost = &ppc8540_cost; > > - break; > > + case PROCESSOR_PPC8540: > > + case PROCESSOR_PPC8548: > > + rs6000_cost = &ppc8540_cost; > > + break; > > > > - case PROCESSOR_PPCE300C2: > > - case PROCESSOR_PPCE300C3: > > - rs6000_cost = &ppce300c2c3_cost; > > - break; > > + case PROCESSOR_PPCE300C2: > > + case PROCESSOR_PPCE300C3: > > + rs6000_cost = &ppce300c2c3_cost; > > + break; > > > > - case PROCESSOR_PPCE500MC: > > - rs6000_cost = &ppce500mc_cost; > > - break; > > + case PROCESSOR_PPCE500MC: > > + rs6000_cost = &ppce500mc_cost; > > + break; > > > > - case PROCESSOR_PPCE500MC64: > > - rs6000_cost = &ppce500mc64_cost; > > - break; > > + case PROCESSOR_PPCE500MC64: > > + rs6000_cost = &ppce500mc64_cost; > > + break; > > > > - case PROCESSOR_PPCE5500: > > - rs6000_cost = &ppce5500_cost; > > - break; > > + case PROCESSOR_PPCE5500: > > + rs6000_cost = &ppce5500_cost; > > + break; > > > > - case PROCESSOR_PPCE6500: > > - rs6000_cost = &ppce6500_cost; > > - break; > > + case PROCESSOR_PPCE6500: > > + rs6000_cost = &ppce6500_cost; > > + break; > > > > - case PROCESSOR_TITAN: > > - rs6000_cost = &titan_cost; > > - break; > > + case PROCESSOR_TITAN: > > + rs6000_cost = &titan_cost; > > + break; > > > > - case PROCESSOR_POWER4: > > - case PROCESSOR_POWER5: > > - rs6000_cost = &power4_cost; > > - break; > > + case PROCESSOR_POWER4: > > + case PROCESSOR_POWER5: > > + rs6000_cost = &power4_cost; > > + break; > > > > - case PROCESSOR_POWER6: > > - rs6000_cost = &power6_cost; > > - break; > > + case PROCESSOR_POWER6: > > + rs6000_cost = &power6_cost; > > + break; > > > > - case PROCESSOR_POWER7: > > - rs6000_cost = &power7_cost; > > - break; > > + case PROCESSOR_POWER7: > > + rs6000_cost = &power7_cost; > > + break; > > > > - case PROCESSOR_POWER8: > > - rs6000_cost = &power8_cost; > > - break; > > + case PROCESSOR_POWER8: > > + rs6000_cost = &power8_cost; > > + break; > > > > - case PROCESSOR_POWER9: > > - case PROCESSOR_POWER10: > > - rs6000_cost = &power9_cost; > > - break; > > + case PROCESSOR_POWER9: > > + case PROCESSOR_POWER10: > > + rs6000_cost = &power9_cost; > > + break; > > > > - case PROCESSOR_PPCA2: > > - rs6000_cost = &ppca2_cost; > > - break; > > + case PROCESSOR_PPCA2: > > + rs6000_cost = &ppca2_cost; > > + break; > > > > - default: > > - gcc_unreachable (); > > - } > > + default: > > + gcc_unreachable (); > > + } > > > > if (global_init_p) > > { > > @@ -21438,15 +21454,17 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int > > outer_code, > > > > case PLUS: > > case MINUS: > > - if (FLOAT_MODE_P (mode)) > > + if (speed && FLOAT_MODE_P (mode)) > > *total = rs6000_cost->fp; > > else > > *total = COSTS_N_INSNS (1); > > return false; > > > > case MULT: > > - if (CONST_INT_P (XEXP (x, 1)) > > - && satisfies_constraint_I (XEXP (x, 1))) > > + if (!speed) > > + *total = COSTS_N_INSNS (1); > > + else if (CONST_INT_P (XEXP (x, 1)) > > + && satisfies_constraint_I (XEXP (x, 1))) > > { > > if (INTVAL (XEXP (x, 1)) >= -256 > > && INTVAL (XEXP (x, 1)) <= 255) > > @@ -21465,7 +21483,9 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int > > outer_code, > > return false; > > > > case FMA: > > - if (mode == SFmode) > > + if (!speed) > > + *total = COSTS_N_INSNS (1); > > + else if (mode == SFmode) > > *total = rs6000_cost->fp; > > else > > *total = rs6000_cost->dmul; > > @@ -21475,8 +21495,10 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int > > outer_code, > > case MOD: > > if (FLOAT_MODE_P (mode)) > > { > > - *total = mode == DFmode ? rs6000_cost->ddiv > > - : rs6000_cost->sdiv; > > + if (!speed) > > + *total = COSTS_N_INSNS (1); > > + else > > + *total = mode == DFmode ? rs6000_cost->ddiv : rs6000_cost->sdiv; > > return false; > > } > > /* FALLTHRU */ > > @@ -21495,7 +21517,9 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int > > outer_code, > > } > > else > > { > > - if (GET_MODE (XEXP (x, 1)) == DImode) > > + if (!speed) > > + *total = COSTS_N_INSNS (1); > > + else if (GET_MODE (XEXP (x, 1)) == DImode) > > *total = rs6000_cost->divdi; > > else > > *total = rs6000_cost->divsi; > > @@ -21587,7 +21611,9 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int > > outer_code, > > if (outer_code == TRUNCATE > > && GET_CODE (XEXP (x, 0)) == MULT) > > { > > - if (mode == DImode) > > + if (!speed) > > + *total = COSTS_N_INSNS (1); > > + else if (mode == DImode) > > *total = rs6000_cost->muldi; > > else > > *total = rs6000_cost->mulsi; > > @@ -21622,11 +21648,16 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int > > outer_code, > > case FIX: > > case UNSIGNED_FIX: > > case FLOAT_TRUNCATE: > > - *total = rs6000_cost->fp; > > + if (!speed) > > + *total = COSTS_N_INSNS (1); > > + else > > + *total = rs6000_cost->fp; > > return false; > > > > case FLOAT_EXTEND: > > - if (mode == DFmode) > > + if (!speed) > > + *total = COSTS_N_INSNS (1); > > + else if (mode == DFmode) > > *total = rs6000_cost->sfdf_convert; > > else > > *total = rs6000_cost->fp; > > @@ -21773,6 +21804,10 @@ rs6000_insn_cost (rtx_insn *insn, bool speed) > > n = length / 4; > > } > > > > + const struct processor_costs *proc_cost = rs6000_cost; > > + if (optimize_size) > > + proc_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost; > > + > > enum attr_type type = get_attr_type (insn); > > > > switch (type) > > @@ -21787,16 +21822,16 @@ rs6000_insn_cost (rtx_insn *insn, bool speed) > > switch (get_attr_size (insn)) > > { > > case SIZE_8: > > - cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9; > > + cost = COSTS_N_INSNS (n - 1) + proc_cost->mulsi_const9; > > break; > > case SIZE_16: > > - cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const; > > + cost = COSTS_N_INSNS (n - 1) + proc_cost->mulsi_const; > > break; > > case SIZE_32: > > - cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi; > > + cost = COSTS_N_INSNS (n - 1) + proc_cost->mulsi; > > break; > > case SIZE_64: > > - cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi; > > + cost = COSTS_N_INSNS (n - 1) + proc_cost->muldi; > > break; > > default: > > gcc_unreachable (); > > @@ -21806,10 +21841,10 @@ rs6000_insn_cost (rtx_insn *insn, bool speed) > > switch (get_attr_size (insn)) > > { > > case SIZE_32: > > - cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi; > > + cost = COSTS_N_INSNS (n - 1) + proc_cost->divsi; > > break; > > case SIZE_64: > > - cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi; > > + cost = COSTS_N_INSNS (n - 1) + proc_cost->divdi; > > break; > > default: > > gcc_unreachable (); > > @@ -21817,16 +21852,16 @@ rs6000_insn_cost (rtx_insn *insn, bool speed) > > break; > > > > case TYPE_FP: > > - cost = n * rs6000_cost->fp; > > + cost = n * proc_cost->fp; > > break; > > case TYPE_DMUL: > > - cost = n * rs6000_cost->dmul; > > + cost = n * proc_cost->dmul; > > break; > > case TYPE_SDIV: > > - cost = n * rs6000_cost->sdiv; > > + cost = n * proc_cost->sdiv; > > break; > > case TYPE_DDIV: > > - cost = n * rs6000_cost->ddiv; > > + cost = n * proc_cost->ddiv; > > break; > > > > case TYPE_SYNC: > > @@ -22388,7 +22423,7 @@ rs6000_emit_parity (rtx dst, rtx src) > > if (mode == SImode) > > { > > /* Is mult+shift >= shift+xor+shift+xor? */ > > - if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3)) > > + if (!optimize_size && rs6000_cost->mulsi_const >= COSTS_N_INSNS (3)) > > { > > rtx tmp1, tmp2, tmp3, tmp4; > > > > @@ -22411,7 +22446,7 @@ rs6000_emit_parity (rtx dst, rtx src) > > else > > { > > /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */ > > - if (rs6000_cost->muldi >= COSTS_N_INSNS (5)) > > + if (!optimize_size && rs6000_cost->muldi >= COSTS_N_INSNS (5)) > > { > > rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; > > > > diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h > > index bbd8060e143..9daf55adc02 100644 > > --- a/gcc/config/rs6000/rs6000.h > > +++ b/gcc/config/rs6000/rs6000.h > > @@ -1869,29 +1869,6 @@ extern scalar_int_mode rs6000_pmode; > > #define REVERSE_CONDITION(CODE, MODE) rs6000_reverse_condition (MODE, CODE) > > > > > > -/* Target cpu costs. */ > > - > > -struct processor_costs { > > - const int mulsi; /* cost of SImode multiplication. */ > > - const int mulsi_const; /* cost of SImode multiplication by constant. */ > > - const int mulsi_const9; /* cost of SImode mult by short constant. */ > > - const int muldi; /* cost of DImode multiplication. */ > > - const int divsi; /* cost of SImode division. */ > > - const int divdi; /* cost of DImode division. */ > > - const int fp; /* cost of simple SFmode and DFmode insns. */ > > - const int dmul; /* cost of DFmode multiplication (and fmadd). */ > > - const int sdiv; /* cost of SFmode division (fdivs). */ > > - const int ddiv; /* cost of DFmode division (fdiv). */ > > - const int cache_line_size; /* cache line size in bytes. */ > > - const int l1_cache_size; /* size of l1 cache, in kilobytes. */ > > - const int l2_cache_size; /* size of l2 cache, in kilobytes. */ > > - const int simultaneous_prefetches; /* number of parallel prefetch > > - operations. */ > > - const int sfdf_convert; /* cost of SF->DF conversion. */ > > -}; > > - > > -extern const struct processor_costs *rs6000_cost; > > - > > /* Control the assembler format that we output. */ > > > > /* A C string constant describing how to begin a comment in the target
-- Alan Modra Australia Development Lab, IBM