On 18/11/2020 14:55, Wilco Dijkstra via Gcc-patches wrote: > Add an initial cost table for Cortex-A76 - this is copied from > cotexa57_extra_costs but updates it based on the Optimization Guide. > Use the new cost table on all Neoverse tunings and ensure the tunings > are consistent for all. As a result more compact code is generated > with more combined shift+alu operations. Eg. -mcpu=cortex-a76 will now > merge the shifts in: > > int f(int x, int y) { return (x & y << 3) * (x | y << 3); } > > and w2, w0, w1, lsl 3 > orr w0, w0, w1, lsl 3 > mul w0, w2, w0 > ret > > SPEC2017 codesize improves by 0.02% and SPECINT2017 shows 0.24% gain. > > Bootstrap OK, regress passes, OK for commit? > > ChangeLog: > 2020-11-18 Wilco Dijkstra <wdijk...@arm.com> > > * config/aarch64/aarch64.c (neoversen1_tunings): Use new > cortexa76_extra_costs. > (neoversev1_tunings): Likewise. > (neoversen2_tunines): Likewise. > * config/arm/aarch-cost-tables.h (cortexa76_extra_costs): > add new costs. > > --- > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c > index > 6bf2f9aa344f9150dec72db660d951e50521285c..65ff49d2b4125013466f90a54ff698ae810580f0 > 100644 > --- a/gcc/config/aarch64/aarch64.c > +++ b/gcc/config/aarch64/aarch64.c > @@ -1312,7 +1312,7 @@ static const struct tune_params thunderx3t110_tunings = > > static const struct tune_params neoversen1_tunings = > { > - &cortexa57_extra_costs, > + &cortexa76_extra_costs, > &generic_addrcost_table, > &generic_regmove_cost, > &cortexa57_vector_cost, > @@ -1338,7 +1338,7 @@ static const struct tune_params neoversen1_tunings = > > static const struct tune_params neoversev1_tunings = > { > - &cortexa57_extra_costs, > + &cortexa76_extra_costs, > &generic_addrcost_table, > &generic_regmove_cost, > &cortexa57_vector_cost, > @@ -1364,7 +1364,7 @@ static const struct tune_params neoversev1_tunings = > > static const struct tune_params neoversen2_tunings = > { > - &cortexa57_extra_costs, > + &cortexa76_extra_costs, > &generic_addrcost_table, > &generic_regmove_cost, > &cortexa57_vector_cost, > diff --git a/gcc/config/arm/aarch-cost-tables.h > b/gcc/config/arm/aarch-cost-tables.h > index > cf8186599018cc5e51cf44e4f2080a502d895e1d..1b9d53d07b54bddf1767121236b06d2b4581631c > 100644 > --- a/gcc/config/arm/aarch-cost-tables.h > +++ b/gcc/config/arm/aarch-cost-tables.h > @@ -331,6 +331,109 @@ const struct cpu_cost_table cortexa57_extra_costs = > } > }; > > +const struct cpu_cost_table cortexa76_extra_costs = > +{ > + /* ALU */ > + { > + 0, /* arith. */ > + 0, /* logical. */ > + 0, /* shift. */ > + 0, /* shift_reg. */ > + COSTS_N_INSNS (1), /* arith_shift. */ > + COSTS_N_INSNS (1), /* arith_shift_reg. */ > + 0, /* log_shift. */ > + COSTS_N_INSNS (1), /* log_shift_reg. */ > + 0, /* extend. */ > + COSTS_N_INSNS (1), /* extend_arith. */ > + COSTS_N_INSNS (1), /* bfi. */ > + 0, /* bfx. */ > + 0, /* clz. */ > + 0, /* rev. */ > + 0, /* non_exec. */ > + true /* non_exec_costs_exec. */ > + }, > + { > + /* MULT SImode */ > + { > + COSTS_N_INSNS (1), /* simple. */ > + COSTS_N_INSNS (2), /* flag_setting. */ > + COSTS_N_INSNS (1), /* extend. */ > + COSTS_N_INSNS (1), /* add. */ > + COSTS_N_INSNS (1), /* extend_add. */ > + COSTS_N_INSNS (6) /* idiv. */ > + }, > + /* MULT DImode */ > + { > + COSTS_N_INSNS (3), /* simple. */ > + 0, /* flag_setting (N/A). */ > + COSTS_N_INSNS (1), /* extend. */ > + COSTS_N_INSNS (3), /* add. */ > + COSTS_N_INSNS (1), /* extend_add. */ > + COSTS_N_INSNS (10) /* idiv. */ > + } > + }, > + /* LD/ST */ > + { > + COSTS_N_INSNS (3), /* load. */ > + COSTS_N_INSNS (3), /* load_sign_extend. */ > + COSTS_N_INSNS (3), /* ldrd. */ > + COSTS_N_INSNS (2), /* ldm_1st. */ > + 1, /* ldm_regs_per_insn_1st. */ > + 2, /* ldm_regs_per_insn_subsequent. */ > + COSTS_N_INSNS (4), /* loadf. */ > + COSTS_N_INSNS (4), /* loadd. */ > + COSTS_N_INSNS (5), /* load_unaligned. */ > + 0, /* store. */ > + 0, /* strd. */ > + 0, /* stm_1st. */ > + 1, /* stm_regs_per_insn_1st. */ > + 2, /* stm_regs_per_insn_subsequent. */ > + 0, /* storef. */ > + 0, /* stored. */ > + COSTS_N_INSNS (1), /* store_unaligned. */ > + COSTS_N_INSNS (1), /* loadv. */ > + COSTS_N_INSNS (1) /* storev. */ > + }, > + { > + /* FP SFmode */ > + { > + COSTS_N_INSNS (10), /* div. */ > + COSTS_N_INSNS (2), /* mult. */ > + COSTS_N_INSNS (3), /* mult_addsub. */ > + COSTS_N_INSNS (3), /* fma. */ > + COSTS_N_INSNS (1), /* addsub. */ > + 0, /* fpconst. */ > + 0, /* neg. */ > + 0, /* compare. */ > + COSTS_N_INSNS (1), /* widen. */ > + COSTS_N_INSNS (1), /* narrow. */ > + COSTS_N_INSNS (1), /* toint. */ > + COSTS_N_INSNS (1), /* fromint. */ > + COSTS_N_INSNS (1) /* roundint. */ > + }, > + /* FP DFmode */ > + { > + COSTS_N_INSNS (15), /* div. */ > + COSTS_N_INSNS (2), /* mult. */ > + COSTS_N_INSNS (3), /* mult_addsub. */ > + COSTS_N_INSNS (3), /* fma. */ > + COSTS_N_INSNS (1), /* addsub. */ > + 0, /* fpconst. */ > + 0, /* neg. */ > + 0, /* compare. */ > + COSTS_N_INSNS (1), /* widen. */ > + COSTS_N_INSNS (1), /* narrow. */ > + COSTS_N_INSNS (1), /* toint. */ > + COSTS_N_INSNS (1), /* fromint. */ > + COSTS_N_INSNS (1) /* roundint. */ > + } > + }, > + /* Vector */ > + { > + COSTS_N_INSNS (1) /* alu. */ > + } > +}; > + > const struct cpu_cost_table exynosm1_extra_costs = > { > /* ALU */ >
OK. R.