Re: [PATCH] AArch64: Add cost table for Cortex-A76
On 18/11/2020 14:55, Wilco Dijkstra via Gcc-patches wrote: > Add an initial cost table for Cortex-A76 - this is copied from > cotexa57_extra_costs but updates it based on the Optimization Guide. > Use the new cost table on all Neoverse tunings and ensure the tunings > are consistent for all. As a result more compact code is generated > with more combined shift+alu operations. Eg. -mcpu=cortex-a76 will now > merge the shifts in: > > int f(int x, int y) { return (x & y << 3) * (x | y << 3); } > > and w2, w0, w1, lsl 3 > orr w0, w0, w1, lsl 3 > mul w0, w2, w0 > ret > > SPEC2017 codesize improves by 0.02% and SPECINT2017 shows 0.24% gain. > > Bootstrap OK, regress passes, OK for commit? > > ChangeLog: > 2020-11-18 Wilco Dijkstra > > * config/aarch64/aarch64.c (neoversen1_tunings): Use new > cortexa76_extra_costs. > (neoversev1_tunings): Likewise. > (neoversen2_tunines): Likewise. > * config/arm/aarch-cost-tables.h (cortexa76_extra_costs): > add new costs. > > --- > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c > index > 6bf2f9aa344f9150dec72db660d951e50521285c..65ff49d2b4125013466f90a54ff698ae810580f0 > 100644 > --- a/gcc/config/aarch64/aarch64.c > +++ b/gcc/config/aarch64/aarch64.c > @@ -1312,7 +1312,7 @@ static const struct tune_params thunderx3t110_tunings = > > static const struct tune_params neoversen1_tunings = > { > - _extra_costs, > + _extra_costs, >_addrcost_table, >_regmove_cost, >_vector_cost, > @@ -1338,7 +1338,7 @@ static const struct tune_params neoversen1_tunings = > > static const struct tune_params neoversev1_tunings = > { > - _extra_costs, > + _extra_costs, >_addrcost_table, >_regmove_cost, >_vector_cost, > @@ -1364,7 +1364,7 @@ static const struct tune_params neoversev1_tunings = > > static const struct tune_params neoversen2_tunings = > { > - _extra_costs, > + _extra_costs, >_addrcost_table, >_regmove_cost, >_vector_cost, > diff --git a/gcc/config/arm/aarch-cost-tables.h > b/gcc/config/arm/aarch-cost-tables.h > index > cf8186599018cc5e51cf44e4f2080a502d895e1d..1b9d53d07b54bddf1767121236b06d2b4581631c > 100644 > --- a/gcc/config/arm/aarch-cost-tables.h > +++ b/gcc/config/arm/aarch-cost-tables.h > @@ -331,6 +331,109 @@ const struct cpu_cost_table cortexa57_extra_costs = >} > }; > > +const struct cpu_cost_table cortexa76_extra_costs = > +{ > + /* ALU */ > + { > +0, /* arith. */ > +0, /* logical. */ > +0, /* shift. */ > +0, /* shift_reg. */ > +COSTS_N_INSNS (1), /* arith_shift. */ > +COSTS_N_INSNS (1), /* arith_shift_reg. */ > +0, /* log_shift. */ > +COSTS_N_INSNS (1), /* log_shift_reg. */ > +0, /* extend. */ > +COSTS_N_INSNS (1), /* extend_arith. */ > +COSTS_N_INSNS (1), /* bfi. */ > +0, /* bfx. */ > +0, /* clz. */ > +0, /* rev. */ > +0, /* non_exec. */ > +true /* non_exec_costs_exec. */ > + }, > + { > +/* MULT SImode */ > +{ > + COSTS_N_INSNS (1), /* simple. */ > + COSTS_N_INSNS (2), /* flag_setting. */ > + COSTS_N_INSNS (1), /* extend. */ > + COSTS_N_INSNS (1), /* add. */ > + COSTS_N_INSNS (1), /* extend_add. */ > + COSTS_N_INSNS (6) /* idiv. */ > +}, > +/* MULT DImode */ > +{ > + COSTS_N_INSNS (3), /* simple. */ > + 0, /* flag_setting (N/A). */ > + COSTS_N_INSNS (1), /* extend. */ > + COSTS_N_INSNS (3), /* add. */ > + COSTS_N_INSNS (1), /* extend_add. */ > + COSTS_N_INSNS (10) /* idiv. */ > +} > + }, > + /* LD/ST */ > + { > +COSTS_N_INSNS (3), /* load. */ > +COSTS_N_INSNS (3), /* load_sign_extend. */ > +COSTS_N_INSNS (3), /* ldrd. */ > +COSTS_N_INSNS (2), /* ldm_1st. */ > +1, /* ldm_regs_per_insn_1st. */ > +2, /* ldm_regs_per_insn_subsequent. */ > +COSTS_N_INSNS (4), /* loadf. */ > +COSTS_N_INSNS (4), /* loadd. */ > +COSTS_N_INSNS (5), /* load_unaligned. */ > +0, /* store. */ > +0, /* strd. */ > +0, /* stm_1st. */ > +1, /* stm_regs_per_insn_1st. */ > +2, /* stm_regs_per_insn_subsequent. */ > +0, /* storef. */ > +0, /* stored. */ > +COSTS_N_INSNS (1), /* store_unaligned. */ > +COSTS_N_INSNS (1), /* loadv. */ > +COSTS_N_INSNS (1) /* storev. */ > + }, > + { > +/* FP SFmode */ > +{ > +
[PATCH] AArch64: Add cost table for Cortex-A76
Add an initial cost table for Cortex-A76 - this is copied from cotexa57_extra_costs but updates it based on the Optimization Guide. Use the new cost table on all Neoverse tunings and ensure the tunings are consistent for all. As a result more compact code is generated with more combined shift+alu operations. Eg. -mcpu=cortex-a76 will now merge the shifts in: int f(int x, int y) { return (x & y << 3) * (x | y << 3); } and w2, w0, w1, lsl 3 orr w0, w0, w1, lsl 3 mul w0, w2, w0 ret SPEC2017 codesize improves by 0.02% and SPECINT2017 shows 0.24% gain. Bootstrap OK, regress passes, OK for commit? ChangeLog: 2020-11-18 Wilco Dijkstra * config/aarch64/aarch64.c (neoversen1_tunings): Use new cortexa76_extra_costs. (neoversev1_tunings): Likewise. (neoversen2_tunines): Likewise. * config/arm/aarch-cost-tables.h (cortexa76_extra_costs): add new costs. --- diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 6bf2f9aa344f9150dec72db660d951e50521285c..65ff49d2b4125013466f90a54ff698ae810580f0 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -1312,7 +1312,7 @@ static const struct tune_params thunderx3t110_tunings = static const struct tune_params neoversen1_tunings = { - _extra_costs, + _extra_costs, _addrcost_table, _regmove_cost, _vector_cost, @@ -1338,7 +1338,7 @@ static const struct tune_params neoversen1_tunings = static const struct tune_params neoversev1_tunings = { - _extra_costs, + _extra_costs, _addrcost_table, _regmove_cost, _vector_cost, @@ -1364,7 +1364,7 @@ static const struct tune_params neoversev1_tunings = static const struct tune_params neoversen2_tunings = { - _extra_costs, + _extra_costs, _addrcost_table, _regmove_cost, _vector_cost, diff --git a/gcc/config/arm/aarch-cost-tables.h b/gcc/config/arm/aarch-cost-tables.h index cf8186599018cc5e51cf44e4f2080a502d895e1d..1b9d53d07b54bddf1767121236b06d2b4581631c 100644 --- a/gcc/config/arm/aarch-cost-tables.h +++ b/gcc/config/arm/aarch-cost-tables.h @@ -331,6 +331,109 @@ const struct cpu_cost_table cortexa57_extra_costs = } }; +const struct cpu_cost_table cortexa76_extra_costs = +{ + /* ALU */ + { +0, /* arith. */ +0, /* logical. */ +0, /* shift. */ +0, /* shift_reg. */ +COSTS_N_INSNS (1), /* arith_shift. */ +COSTS_N_INSNS (1), /* arith_shift_reg. */ +0,/* log_shift. */ +COSTS_N_INSNS (1), /* log_shift_reg. */ +0, /* extend. */ +COSTS_N_INSNS (1), /* extend_arith. */ +COSTS_N_INSNS (1), /* bfi. */ +0, /* bfx. */ +0, /* clz. */ +0, /* rev. */ +0, /* non_exec. */ +true /* non_exec_costs_exec. */ + }, + { +/* MULT SImode */ +{ + COSTS_N_INSNS (1), /* simple. */ + COSTS_N_INSNS (2), /* flag_setting. */ + COSTS_N_INSNS (1), /* extend. */ + COSTS_N_INSNS (1), /* add. */ + COSTS_N_INSNS (1), /* extend_add. */ + COSTS_N_INSNS (6) /* idiv. */ +}, +/* MULT DImode */ +{ + COSTS_N_INSNS (3), /* simple. */ + 0, /* flag_setting (N/A). */ + COSTS_N_INSNS (1), /* extend. */ + COSTS_N_INSNS (3), /* add. */ + COSTS_N_INSNS (1), /* extend_add. */ + COSTS_N_INSNS (10) /* idiv. */ +} + }, + /* LD/ST */ + { +COSTS_N_INSNS (3), /* load. */ +COSTS_N_INSNS (3), /* load_sign_extend. */ +COSTS_N_INSNS (3), /* ldrd. */ +COSTS_N_INSNS (2), /* ldm_1st. */ +1, /* ldm_regs_per_insn_1st. */ +2, /* ldm_regs_per_insn_subsequent. */ +COSTS_N_INSNS (4), /* loadf. */ +COSTS_N_INSNS (4), /* loadd. */ +COSTS_N_INSNS (5), /* load_unaligned. */ +0, /* store. */ +0, /* strd. */ +0, /* stm_1st. */ +1, /* stm_regs_per_insn_1st. */ +2, /* stm_regs_per_insn_subsequent. */ +0, /* storef. */ +0, /* stored. */ +COSTS_N_INSNS (1), /* store_unaligned. */ +COSTS_N_INSNS (1), /* loadv. */ +COSTS_N_INSNS (1) /* storev. */ + }, + { +/* FP SFmode */ +{ + COSTS_N_INSNS (10), /* div. */ + COSTS_N_INSNS (2), /* mult. */ + COSTS_N_INSNS (3), /* mult_addsub. */ + COSTS_N_INSNS (3), /* fma. */ + COSTS_N_INSNS (1), /* addsub. */ + 0, /* fpconst. */ + 0, /* neg. */ + 0,