Hi,
Attached is the patch to provide star-mc1 specific cost functions and tables.
Given these individual implementation, developers are able to make
their own adjustment to fine-tune star-mc1 performance without affecting
other cpu configurations.
Bootstrapped and tested on arm-none-eabi.
Is it OK for trunk?
Regards,
jasonwucj
From e9081bb6d7fc1521036dbceec59ba2eae532c04c Mon Sep 17 00:00:00 2001
From: Chung-Ju Wu <jasonw...@gmail.com>
Date: Thu, 26 May 2022 03:47:23 +0000
Subject: [PATCH 2/3] arm: Add individual star-mc1 cost tables and cost
functions
Signed-off-by: Chung-Ju Wu <jasonw...@gmail.com>
gcc/ChangeLog:
* config/arm/arm-cpus.in (star-mc1): Use star_mc1 costs.
* config/arm/arm.cc (arm_star_mc1_branch_cost): New function.
(star_mc1_extra_costs): New struct.
(arm_star_mc1_tune): New struct.
---
gcc/config/arm/arm-cpus.in | 2 +-
gcc/config/arm/arm.cc | 139 +++++++++++++++++++++++++++++++++++++
2 files changed, 140 insertions(+), 1 deletion(-)
diff --git a/gcc/config/arm/arm-cpus.in b/gcc/config/arm/arm-cpus.in
index 5a63bc548e5..6a346e4a93d 100644
--- a/gcc/config/arm/arm-cpus.in
+++ b/gcc/config/arm/arm-cpus.in
@@ -1645,7 +1645,7 @@ begin cpu star-mc1
option nofp remove ALL_FP
option nodsp remove armv7em
isa quirk_no_asmcpu quirk_vlldm
- costs v7m
+ costs star_mc1
end cpu star-mc1
# V8 R-profile implementations.
diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index 70c2d50f0cc..c8f96f92a59 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -295,6 +295,7 @@ static int arm_default_branch_cost (bool, bool);
static int arm_cortex_a5_branch_cost (bool, bool);
static int arm_cortex_m_branch_cost (bool, bool);
static int arm_cortex_m7_branch_cost (bool, bool);
+static int arm_star_mc1_branch_cost (bool, bool);
static bool arm_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
const vec_perm_indices &);
@@ -1847,6 +1848,113 @@ const struct cpu_cost_table v7m_extra_costs =
}
};
+const struct cpu_cost_table star_mc1_extra_costs =
+{
+ /* ALU */
+ {
+ 0, /* arith. */
+ 0, /* logical. */
+ 0, /* shift. */
+ 0, /* shift_reg. */
+ 0, /* arith_shift. */
+ COSTS_N_INSNS (1), /* arith_shift_reg. */
+ 0, /* log_shift. */
+ COSTS_N_INSNS (1), /* log_shift_reg. */
+ 0, /* extend. */
+ COSTS_N_INSNS (1), /* extend_arith. */
+ 0, /* bfi. */
+ 0, /* bfx. */
+ 0, /* clz. */
+ 0, /* rev. */
+ COSTS_N_INSNS (1), /* non_exec. */
+ false /* non_exec_costs_exec. */
+ },
+ {
+ /* MULT SImode */
+ {
+ COSTS_N_INSNS (1), /* simple. */
+ COSTS_N_INSNS (1), /* flag_setting. */
+ COSTS_N_INSNS (2), /* extend. */
+ COSTS_N_INSNS (1), /* add. */
+ COSTS_N_INSNS (3), /* extend_add. */
+ COSTS_N_INSNS (8) /* idiv. */
+ },
+ /* MULT DImode */
+ {
+ 0, /* simple (N/A). */
+ 0, /* flag_setting (N/A). */
+ COSTS_N_INSNS (2), /* extend. */
+ 0, /* add (N/A). */
+ COSTS_N_INSNS (3), /* extend_add. */
+ 0 /* idiv (N/A). */
+ }
+ },
+ /* LD/ST */
+ {
+ COSTS_N_INSNS (2), /* load. */
+ 0, /* load_sign_extend. */
+ COSTS_N_INSNS (3), /* ldrd. */
+ COSTS_N_INSNS (2), /* ldm_1st. */
+ 1, /* ldm_regs_per_insn_1st. */
+ 1, /* ldm_regs_per_insn_subsequent. */
+ COSTS_N_INSNS (2), /* loadf. */
+ COSTS_N_INSNS (3), /* loadd. */
+ COSTS_N_INSNS (1), /* load_unaligned. */
+ COSTS_N_INSNS (2), /* store. */
+ COSTS_N_INSNS (3), /* strd. */
+ COSTS_N_INSNS (2), /* stm_1st. */
+ 1, /* stm_regs_per_insn_1st. */
+ 1, /* stm_regs_per_insn_subsequent. */
+ COSTS_N_INSNS (2), /* storef. */
+ COSTS_N_INSNS (3), /* stored. */
+ COSTS_N_INSNS (1), /* store_unaligned. */
+ COSTS_N_INSNS (1), /* loadv. */
+ COSTS_N_INSNS (1) /* storev. */
+ },
+ {
+ /* FP SFmode */
+ {
+ COSTS_N_INSNS (7), /* div. */
+ COSTS_N_INSNS (2), /* mult. */
+ COSTS_N_INSNS (5), /* mult_addsub. */
+ COSTS_N_INSNS (3), /* fma. */
+ COSTS_N_INSNS (1), /* addsub. */
+ 0, /* fpconst. */
+ 0, /* neg. */
+ 0, /* compare. */
+ 0, /* widen. */
+ 0, /* narrow. */
+ 0, /* toint. */
+ 0, /* fromint. */
+ 0 /* roundint. */
+ },
+ /* FP DFmode */
+ {
+ COSTS_N_INSNS (15), /* div. */
+ COSTS_N_INSNS (5), /* mult. */
+ COSTS_N_INSNS (7), /* mult_addsub. */
+ COSTS_N_INSNS (7), /* fma. */
+ COSTS_N_INSNS (3), /* addsub. */
+ 0, /* fpconst. */
+ 0, /* neg. */
+ 0, /* compare. */
+ 0, /* widen. */
+ 0, /* narrow. */
+ 0, /* toint. */
+ 0, /* fromint. */
+ 0 /* roundint. */
+ }
+ },
+ /* Vector */
+ {
+ COSTS_N_INSNS (1), /* alu. */
+ COSTS_N_INSNS (4), /* mult. */
+ COSTS_N_INSNS (1), /* movi. */
+ COSTS_N_INSNS (2), /* dup. */
+ COSTS_N_INSNS (2) /* extract. */
+ }
+};
+
const struct addr_mode_cost_table generic_addr_mode_costs =
{
/* int. */
@@ -2370,6 +2478,30 @@ const struct tune_params arm_cortex_m7_tune =
tune_params::SCHED_AUTOPREF_OFF
};
+/* star-mc1 tuning. */
+
+const struct tune_params arm_star_mc1_tune =
+{
+ &star_mc1_extra_costs,
+ &generic_addr_mode_costs, /* Addressing mode costs. */
+ NULL, /* Sched adj cost. */
+ arm_star_mc1_branch_cost,
+ &arm_default_vec_cost,
+ 1, /* Constant limit. */
+ 2, /* Max cond insns. */
+ 8, /* Memset max inline. */
+ 1, /* Issue rate. */
+ ARM_PREFETCH_NOT_BENEFICIAL,
+ tune_params::PREF_CONST_POOL_TRUE,
+ tune_params::PREF_LDRD_FALSE,
+ tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
+ tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
+ tune_params::DISPARAGE_FLAGS_NEITHER,
+ tune_params::PREF_NEON_STRINGOPS_FALSE,
+ tune_params::FUSE_NOTHING,
+ tune_params::SCHED_AUTOPREF_OFF
+};
+
/* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
cortex-m23. */
@@ -12622,6 +12754,13 @@ arm_cortex_m7_branch_cost (bool speed_p, bool
predictable_p)
return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
}
+static int
+arm_star_mc1_branch_cost (bool speed_p, bool predictable_p)
+{
+ return (TARGET_32BIT && speed_p) ? 1
+ : arm_default_branch_cost (speed_p, predictable_p);
+}
+
static bool fp_consts_inited = false;
static REAL_VALUE_TYPE value_fp0;
--
2.31.1