On Wed, 17 Nov 2021 at 20:40, Palmer Dabbelt <pal...@rivosinc.com> wrote:
> [This is my first time trying my Rivos address on the lists, so sorry if > something goes off the rails.] > > On Wed, 17 Nov 2021 06:05:04 PST (-0800), gcc-patches@gcc.gnu.org wrote: > > Hi Philipp: > > > > Thanks for the patch, I like this approach, that can easily configure > > different capabilities for each core :) > > > > So there are only a few minor comments for this patch. > > > > On Mon, Nov 15, 2021 at 5:49 AM Philipp Tomsich > > <philipp.toms...@vrull.eu> wrote: > >> > >> From: Philipp Tomsich <philipp.toms...@theobroma-systems.com> > >> > >> The Ventana VT1 core supports quad-issue and instruction fusion. > >> This implemented TARGET_SCHED_MACRO_FUSION_P to keep fusible sequences > >> together and adds idiom matcheing for the supported fusion cases. > > There's a typo at "matcheing". > > >> > >> gcc/ChangeLog: > >> > >> * config/riscv/riscv.c (enum riscv_fusion_pairs): Add symbolic > >> constants to identify supported fusion patterns. > >> (struct riscv_tune_param): Add fusible_op field. > >> (riscv_macro_fusion_p): Implement. > >> (riscv_fusion_enabled_p): Implement. > >> (riscv_macro_fusion_pair_p): Implement and recoginze fusible > >> idioms for Ventana VT1. > >> (TARGET_SCHED_MACRO_FUSION_P): Point to riscv_macro_fusion_p. > >> (TARGET_SCHED_MACRO_FUSION_PAIR_P): Point to > riscv_macro_fusion_pair_p. > >> > >> Signed-off-by: Philipp Tomsich <philipp.toms...@vrull.eu> > > This doesn't match the From (though admittedly I'm pretty new to the SoB > stuff in GCC, so I'm not sure if that's even a rule here). > I noticed that I hadn't reset the authors and that patman had inserted a Signed-off-by: for that reason, right after I sent this out. Given that it's all me and there's both individual assignment paperwork and company disclaimers on file for all of the email-addresses, this should be fine. >> --- > >> > >> gcc/config/riscv/riscv.c | 196 +++++++++++++++++++++++++++++++++++++++ > >> 1 file changed, 196 insertions(+) > >> > >> diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c > >> index 6b918db65e9..8eac52101a3 100644 > >> --- a/gcc/config/riscv/riscv.c > >> +++ b/gcc/config/riscv/riscv.c > >> @@ -211,6 +211,19 @@ struct riscv_integer_op { > >> The worst case is LUI, ADDI, SLLI, ADDI, SLLI, ADDI, SLLI, ADDI. */ > >> #define RISCV_MAX_INTEGER_OPS 8 > >> > >> +enum riscv_fusion_pairs > >> +{ > >> + RISCV_FUSE_NOTHING = 0, > >> + RISCV_FUSE_ZEXTW = (1 << 0), > >> + RISCV_FUSE_ZEXTH = (1 << 1), > >> + RISCV_FUSE_ZEXTWS = (1 << 2), > >> + RISCV_FUSE_LDINDEXED = (1 << 3), > > > > RISCV_FUSE_LDINDEXED -> RISCV_FUSE_LD_INDEXED > > > > Could you add some comment for above enums, like that: > > /* slli rx, rx, 32 + srli rx, rx, 32 */ > > RISCV_FUSE_ZEXTW > > > > So that we could know what kind of instruction will be funded for this > enum. > > > >> + RISCV_FUSE_LUI_ADDI = (1 << 4), > >> + RISCV_FUSE_AUIPC_ADDI = (1 << 5), > >> + RISCV_FUSE_LUI_LD = (1 << 6), > >> + RISCV_FUSE_AUIPC_LD = (1 << 7), > >> +}; > >> + > >> /* Costs of various operations on the different architectures. */ > >> > >> struct riscv_tune_param > >> @@ -224,6 +237,7 @@ struct riscv_tune_param > >> unsigned short branch_cost; > >> unsigned short memory_cost; > >> bool slow_unaligned_access; > >> + unsigned int fusible_ops; > >> }; > >> > >> /* Information about one micro-arch we know about. */ > >> @@ -289,6 +303,7 @@ static const struct riscv_tune_param > rocket_tune_info = { > >> 3, /* branch_cost */ > >> 5, /* memory_cost */ > >> true, /* > slow_unaligned_access */ > >> + RISCV_FUSE_NOTHING, /* fusible_ops */ > >> }; > > There's some tab/space issues here (and in the below ones). They align > when merged, but the new lines are spaces-only and the old ones have > internal spaces mixed with tabs (IIRC that's to the GCC style, if not we > should fix these to at least be consistent). > > >> > >> /* Costs to use when optimizing for Sifive 7 Series. */ > >> @@ -302,6 +317,7 @@ static const struct riscv_tune_param > sifive_7_tune_info = { > >> 4, /* branch_cost */ > >> 3, /* memory_cost */ > >> true, /* > slow_unaligned_access */ > >> + RISCV_FUSE_NOTHING, /* fusible_ops */ > >> }; > >> > >> /* Costs to use when optimizing for T-HEAD c906. */ > >> @@ -328,6 +344,7 @@ static const struct riscv_tune_param > optimize_size_tune_info = { > >> 1, /* branch_cost */ > >> 2, /* memory_cost */ > >> false, /* > slow_unaligned_access */ > >> + RISCV_FUSE_NOTHING, /* fusible_ops */ > >> }; > >> > >> /* Costs to use when optimizing for Ventana Micro VT1. */ > >> @@ -341,6 +358,10 @@ static const struct riscv_tune_param > ventana_vt1_tune_info = { > >> 4, /* branch_cost */ > >> 5, /* memory_cost */ > >> false, /* > slow_unaligned_access */ > >> + ( RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH | /* fusible_ops */ > >> + RISCV_FUSE_ZEXTWS | RISCV_FUSE_LDINDEXED | > >> + RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI | > >> + RISCV_FUSE_LUI_LD | RISCV_FUSE_AUIPC_LD ) > >> }; > >> > >> static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, > bool *); > >> @@ -4909,6 +4930,177 @@ riscv_issue_rate (void) > >> return tune_param->issue_rate; > >> } > >> > >> +/* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target > supports > >> + instruction fusion of some sort. */ > >> + > >> +static bool > >> +riscv_macro_fusion_p (void) > >> +{ > >> + return tune_param->fusible_ops != RISCV_FUSE_NOTHING; > >> +} > >> + > >> +/* Return true iff the instruction fusion described by OP is enabled. > */ > >> + > >> +static bool > >> +riscv_fusion_enabled_p(enum riscv_fusion_pairs op) > > > > space between function name and parentheses. > > > > riscv_fusion_enabled_p (enum riscv_fusion_pairs op) > > > >> +{ > >> + return tune_param->fusible_ops & op; > >> +} > >> + > >> +/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV > and CURR > >> + should be kept together during scheduling. */ > >> + > >> +static bool > >> +riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) > >> +{ > >> + rtx prev_set = single_set (prev); > >> + rtx curr_set = single_set (curr); > >> + /* prev and curr are simple SET insns i.e. no flag setting or > branching. */ > >> + bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr); > >> + > >> + if (!riscv_macro_fusion_p ()) > >> + return false; > >> + > >> + if (simple_sets_p && (riscv_fusion_enabled_p (RISCV_FUSE_ZEXTW) || > >> + riscv_fusion_enabled_p (RISCV_FUSE_ZEXTH))) > >> + { > >> + /* We are trying to match the following: > >> + prev (slli) == (set (reg:DI rD) > >> + (ashift:DI (reg:DI rS) (const_int 32))) > >> + curr (slri) == (set (reg:DI rD) > > This really jumps out as the sort of thing that should be in RTL, but > I'm not sure that's feasible. The short branch->cmov optimization we're > doing for the SiFive 7 series is sort of fusion and done in RTL, but the > mechanism is very different there so it wouldn't apply directly. There > are other ports doing it this way so I'm not opposed to merging this > as-is, but if there's a straight-forward way to port this to RTL then > it's probably be a lot saner to maintain in the long term. I can't come > up with anything sane, though. > > On the subject of maintainability: I don't see any tests for this. > Given that we're exceedingly likely to end up with other targets that > have front-end fusion and thus try to share this code, it'd be really > nice to have either some documentation of what fusions this core > performs or some tests that exercise the important ones. I can't find > any other references to "Ventana VT1" on the internet, but LMK if > there's something I'm missing. > > I'm not sure lacking tests should block merging this (as it's not like > we have much optimization-based testing for the RISC-V port), but > without at least some public documentation it's going to be hard to > produce those outside of Ventana -- essentially I'm worried about ending > up with the same black-box pipeline problem we have for the C906. > > I seem to remember having tests for the SiFive optimization, but I > couldn't find any when I looked. Maybe Jim or Kito remembers where to > find them, otherwise I'll throw some together. > > > > > slri -> srli > > > >> + (lshiftrt:DI (reg:DI rD) (const_int > <shift>))) > >> + with <shift> being either 32 for FUSE_ZEXTW, or > >> + `less than 32 for FUSE_ZEXTWS. */ > > > > `less <-- I guess the ` is kind of an accident? > > > >> + > >> + if (GET_CODE (SET_SRC (prev_set)) == ASHIFT > >> + && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT > >> + && REG_P (SET_DEST (prev_set)) > >> + && REG_P (SET_DEST (curr_set)) > >> + && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set)) > >> + && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST > (curr_set)) > >> + && CONST_INT_P (XEXP (SET_SRC (prev_set), 1)) > >> + && CONST_INT_P (XEXP (SET_SRC (curr_set), 1)) > >> + && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 32 > >> + && (( INTVAL (XEXP (SET_SRC (curr_set), 1)) == 32 > >> + && riscv_fusion_enabled_p(RISCV_FUSE_ZEXTW) ) > > > > space between function name and parentheses. > > > >> + || ( INTVAL (XEXP (SET_SRC (curr_set), 1)) < 32 > >> + && riscv_fusion_enabled_p(RISCV_FUSE_ZEXTWS)))) > > > > space between function name and parentheses. > > > >> + return true; > >> + } > >> + > >> + if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_ZEXTH)) > >> + { > >> + /* We are trying to match the following: > >> + prev (slli) == (set (reg:DI rD) > >> + (ashift:DI (reg:DI rS) (const_int 48))) > >> + curr (slri) == (set (reg:DI rD) > >> + (lshiftrt:DI (reg:DI rD) (const_int > 48))) */ > > > > slri -> srli > > > >> + > >> + if (GET_CODE (SET_SRC (prev_set)) == ASHIFT > >> + && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT > >> + && REG_P (SET_DEST (prev_set)) > >> + && REG_P (SET_DEST (curr_set)) > >> + && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set)) > >> + && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST > (curr_set)) > >> + && CONST_INT_P (XEXP (SET_SRC (prev_set), 1)) > >> + && CONST_INT_P (XEXP (SET_SRC (curr_set), 1)) > >> + && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 48 > >> + && INTVAL (XEXP (SET_SRC (curr_set), 1)) == 48) > >> + return true; > >> + } > >> + > >> + if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LDINDEXED)) > >> + { > >> + /* We are trying to match the following: > >> + prev (add) == (set (reg:DI rD) > >> + (plus:DI (reg:DI rS1) (reg:DI rS2)) > >> + curr (ld) == (set (reg:DI rD) > >> + (mem:DI (reg:DI rD))) */ > >> + > >> + if (MEM_P (SET_SRC (curr_set)) > >> + && REG_P (XEXP (SET_SRC (curr_set), 0)) > >> + && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO (SET_DEST > (prev_set)) > >> + && GET_CODE (SET_SRC (prev_set)) == PLUS > >> + && REG_P (XEXP (SET_SRC (prev_set), 0)) > >> + && REG_P (XEXP (SET_SRC (prev_set), 1))) > >> + return true; > >> + > >> + /* We are trying to match the following: > >> + prev (add) == (set (reg:DI rD) > >> + (plus:DI (reg:DI rS1) (reg:DI rS2))) > >> + curr (lw) == (set (any_extend:DI (mem:SUBX (reg:DI rD)))) */ > >> + > >> + if ((GET_CODE (SET_SRC (curr_set)) == SIGN_EXTEND > >> + || (GET_CODE (SET_SRC (curr_set)) == ZERO_EXTEND)) > >> + && MEM_P (XEXP (SET_SRC (curr_set), 0)) > >> + && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) > >> + && REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == REGNO > (SET_DEST (prev_set)) > > > > This line is over 80 columns. > > > > > >> + && GET_CODE (SET_SRC (prev_set)) == PLUS > >> + && REG_P (XEXP (SET_SRC (prev_set), 0)) > >> + && REG_P (XEXP (SET_SRC (prev_set), 1))) > >> + return true; > >> + } > >> + > >> + if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_ADDI)) > >> + { > >> + /* We are trying to match the following: > >> + prev (lui) == (set (reg:DI rD) (const_int UPPER_IMM_20)) > >> + curr (addi) == (set (reg:DI rD) > >> + (plus:DI (reg:DI rD) (const_int IMM12))) > */ > >> + > >> + if (GET_CODE (SET_SRC (curr_set)) == PLUS > >> + && CONST_INT_P (XEXP (SET_SRC (curr_set), 1)) > >> + && SMALL_OPERAND (INTVAL (XEXP (SET_SRC (curr_set), 1))) > >> + && CONST_INT_P (SET_SRC (prev_set)) > >> + && LUI_OPERAND (INTVAL (SET_SRC (prev_set)))) > >> + return true; > >> + } > >> + > >> + if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_ADDI)) > >> + { > >> + /* We are trying to match the following: > >> + prev (auipc) == (set (reg:DI rD) (unspec:DI [...] > UNSPEC_AUIPC)) > >> + prev (addi) == (set (reg:DI rD) > >> + (plus:DI (reg:DI rD) (const_int > IMM12))) */ > >> + > >> + if (GET_CODE (SET_SRC (prev_set)) == UNSPEC > >> + && XINT (prev_set, 1) == UNSPEC_AUIPC > >> + && GET_CODE (SET_SRC (curr_set)) == PLUS > >> + && SMALL_OPERAND (INTVAL (XEXP (SET_SRC (curr_set), 1)))) > >> + return true; > >> + } > >> + > >> + if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_LD)) > >> + { > >> + /* We are trying to match the following: > >> + prev (lui) == (set (reg:DI rD) (const_int UPPER_IMM_20)) > >> + curr (ld) == (set (reg:DI rD) > >> + (mem:DI (plus:DI (reg:DI rD) (const_int > IMM12)))) */ > >> + > >> + if (CONST_INT_P (SET_SRC (prev_set)) > >> + && LUI_OPERAND (INTVAL (SET_SRC (prev_set))) > >> + && MEM_P (SET_SRC (curr_set)) > >> + && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS) > >> + return true; > >> + } > >> + > >> + if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_LD)) > >> + { > >> + /* We are trying to match the following: > >> + prev (auipc) == (set (reg:DI rD) (unspec:DI [...] > UNSPEC_AUIPC)) > >> + curr (ld) == (set (reg:DI rD) > >> + (mem:DI (plus:DI (reg:DI rD) (const_int > IMM12)))) */ > >> + > >> + if (GET_CODE (SET_SRC (prev_set)) == UNSPEC > >> + && XINT (prev_set, 1) == UNSPEC_AUIPC > >> + && MEM_P (SET_SRC (curr_set)) > >> + && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS) > >> + return true; > >> + } > >> + > >> + return false; > >> +} > >> + > >> /* Auxiliary function to emit RISC-V ELF attribute. */ > >> static void > >> riscv_emit_attribute () > >> @@ -5676,6 +5868,10 @@ riscv_asan_shadow_offset (void) > >> > >> #undef TARGET_SCHED_ISSUE_RATE > >> #define TARGET_SCHED_ISSUE_RATE riscv_issue_rate > >> +#undef TARGET_SCHED_MACRO_FUSION_P > >> +#define TARGET_SCHED_MACRO_FUSION_P riscv_macro_fusion_p > >> +#undef TARGET_SCHED_MACRO_FUSION_PAIR_P > >> +#define TARGET_SCHED_MACRO_FUSION_PAIR_P riscv_macro_fusion_pair_p > >> > >> #undef TARGET_FUNCTION_OK_FOR_SIBCALL > >> #define TARGET_FUNCTION_OK_FOR_SIBCALL riscv_function_ok_for_sibcall > >> -- > >> 2.32.0 > >> >