hi Jiawei I downloaded the series of patches from you and found in some cases it fails to generate zcmp push and pop insns.
test.c char my_getchar(); int test_s0() { int a = my_getchar(); int b = my_getchar(); return a+b; } On Thu Apr 6 06:21:17 GMT 2023 Jiawei jia...@iscas.ac.cn wrote: > >Add Zcmp extension instructions support. Generate push/pop >with follow steps: > > 1. preprocessing: > 1.1. if there is no push rtx, then just return. e.g. > (note 5 1 22 2 [bb 2] NOTE_INSN_BASIC_BLOCK) > (insn/f 22 5 23 2 (set (reg/f:SI 2 sp) > (plus:SI (reg/f:SI 2 sp) > (const_int -32 [0xffffffffffffffe0]))) > (nil)) > (note 23 22 2 2 NOTE_INSN_PROLOGUE_END) > 1.2. if push rtx exists, then we compute the number of > pushed s-registers, n_sreg. > > push rtx should be find before NOTE_INSN_PROLOGUE_END tag > > [2 and 3 happend simultaneously] > > 2. find valid move pattern, mv sN, aN, where N < n_sreg, > and aN is not used the move pattern, and sN is not > defined before the move pattern (from prologue to the > position of move pattern). > > 3. analysis use and reach of every instruction from prologue > to the position of move pattern. > if any sN is used, then we mark the corresponding argument list > candidate as invalid. > e.g. > push {ra,s0-s3}, {}, -32 > sw s0,44(sp) # s0 is used, then argument list is invalid > mv a0,a5 # a0 is defined, then argument list is invalid > ... > mv s0,a0 > mv s1,a1 > mv s2,a2 > > 4. if there is a valid argument list, then replace the pop > push parallel insn, and delete mv pattern. > if not, skip. > >All "zcmpe" means Zcmp with RVE extension. >The push/pop instrunction implement is mostly finished by Sinan Lin. > >Co-Authored by: Sinan Lin <sinan....@linux.alibaba.com> >Co-Authored by: Simon Cook <simon.c...@embecosm.com> >Co-Authored by: Shihua Liao <shi...@iscas.ac.cn> > >gcc/ChangeLog: > > * config.gcc: New object. > * config/riscv/predicates.md (riscv_stack_push_operation): > New predicate. > (riscv_stack_pop_operation): Ditto. > (pop_return_value_constant): Ditto. > * config/riscv/riscv-passes.def (INSERT_PASS_AFTER): New pass. > * config/riscv/riscv-protos.h (riscv_output_popret_p): > New routine. > (riscv_valid_stack_push_pop_p): Ditto. > (riscv_check_regno): Ditto. > (make_pass_zcmp_popret): New pass. > * config/riscv/riscv.cc (struct riscv_frame_info): New variable. > (riscv_output_popret_p): New function. > (riscv_print_pop_size): Ditto. > (riscv_print_reglist): Ditto. > (riscv_print_operand): New case symbols. > (riscv_save_push_pop_count): New function. > (riscv_push_pop_base_sp_adjust): Ditto. > (riscv_use_push_pop): Ditto. > (riscv_compute_frame_info): Adjust frame value. > (riscv_emit_pop_insn): New function. > (riscv_check_regno): Ditto. > (riscv_valid_stack_push_pop_p): Ditto. > (riscv_emit_push_insn): Ditto. > (riscv_expand_prologue): Modify frame pattern. > (riscv_expand_epilogue): Ditto. > * config/riscv/riscv.h (RETURN_VALUE_REGNUM): > (RISCV_ZCE_PUSH_POP_MASK): New mask. > (RISCV_ZCMPE_PUSH_POP_MASK): Ditto. > * config/riscv/riscv.md: Add new reg number and include info. > * config/riscv/t-riscv: New object rules. > * config/riscv/riscv-zcmp-popret.cc: New file. > * config/riscv/zc.md: New file. >--- > gcc/config.gcc | 2 +- > gcc/config/riscv/predicates.md | 16 + > gcc/config/riscv/riscv-passes.def | 1 + > gcc/config/riscv/riscv-protos.h | 4 + > gcc/config/riscv/riscv-zcmp-popret.cc | 260 +++++++++++++++ > gcc/config/riscv/riscv.cc | 437 +++++++++++++++++++++++++- > gcc/config/riscv/riscv.h | 4 + > gcc/config/riscv/riscv.md | 3 + > gcc/config/riscv/t-riscv | 4 + > gcc/config/riscv/zc.md | 47 +++ > 10 files changed, 767 insertions(+), 11 deletions(-) > create mode 100644 gcc/config/riscv/riscv-zcmp-popret.cc > create mode 100644 gcc/config/riscv/zc.md > >diff --git a/gcc/config.gcc b/gcc/config.gcc >index 629d324b5ef..a991c5273f9 100644 >--- a/gcc/config.gcc >+++ b/gcc/config.gcc >@@ -529,7 +529,7 @@ pru-*-*) > ;; > riscv*) > cpu_type=riscv >- extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o >riscv-shorten-memrefs.o riscv-selftests.o riscv-v.o riscv-vsetvl.o" >+ extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o >riscv-shorten-memrefs.o riscv-selftests.o riscv-v.o riscv-vsetvl.o >riscv-zcmp-popret.o" > extra_objs="${extra_objs} riscv-vector-builtins.o >riscv-vector-builtins-shapes.o riscv-vector-builtins-bases.o" > extra_objs="${extra_objs} thead.o" > d_target_objs="riscv-d.o" >diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md >index 0d9d7701c7e..6bff6cd047a 100644 >--- a/gcc/config/riscv/predicates.md >+++ b/gcc/config/riscv/predicates.md >@@ -412,3 +412,19 @@ > (and (match_code "const_int") > (ior (match_operand 0 "not_uimm_extra_bit_operand") > (match_operand 0 "const_nottwobits_operand")))) >+ >+(define_special_predicate "riscv_stack_push_operation" >+ (match_code "parallel") >+{ >+ return riscv_valid_stack_push_pop_p (op, true); >+}) >+ >+(define_special_predicate "riscv_stack_pop_operation" >+ (match_code "parallel") >+{ >+ return riscv_valid_stack_push_pop_p (op, false); >+}) >+ >+(define_predicate "pop_return_value_constant" >+ (and (match_code "const_int") >+ (match_test "INTVAL (op) == 0"))) >diff --git a/gcc/config/riscv/riscv-passes.def >b/gcc/config/riscv/riscv-passes.def >index 4084122cf0a..25625b9af3e 100644 >--- a/gcc/config/riscv/riscv-passes.def >+++ b/gcc/config/riscv/riscv-passes.def >@@ -19,3 +19,4 @@ > > INSERT_PASS_AFTER (pass_rtl_store_motion, 1, pass_shorten_memrefs); > INSERT_PASS_BEFORE (pass_fast_rtl_dce, 1, pass_vsetvl); >+INSERT_PASS_AFTER (pass_cprop_hardreg, 1, pass_zcmp_popret); >diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h >index 4611447ddde..8f243cd5f44 100644 >--- a/gcc/config/riscv/riscv-protos.h >+++ b/gcc/config/riscv/riscv-protos.h >@@ -54,6 +54,7 @@ extern bool riscv_split_64bit_move_p (rtx, rtx); > extern void riscv_split_doubleword_move (rtx, rtx); > extern const char *riscv_output_move (rtx, rtx); > extern const char *riscv_output_return (); >+extern bool riscv_output_popret_p (rtx); > > #ifdef RTX_CODE > extern void riscv_expand_int_scc (rtx, enum rtx_code, rtx, rtx); >@@ -79,6 +80,8 @@ extern void riscv_reinit (void); > extern poly_uint64 riscv_regmode_natural_size (machine_mode); > extern bool riscv_v_ext_vector_mode_p (machine_mode); > extern bool riscv_shamt_matches_mask_p (int, HOST_WIDE_INT); >+extern bool riscv_valid_stack_push_pop_p (rtx, bool); >+extern bool riscv_check_regno(rtx, unsigned); > > /* Routines implemented in riscv-c.cc. */ > void riscv_cpu_cpp_builtins (cpp_reader *); >@@ -99,6 +102,7 @@ extern bool riscv_hard_regno_rename_ok (unsigned, unsigned); > > rtl_opt_pass * make_pass_shorten_memrefs (gcc::context *ctxt); > rtl_opt_pass * make_pass_vsetvl (gcc::context *ctxt); >+rtl_opt_pass * make_pass_zcmp_popret (gcc::context *ctxt); > > /* Information about one CPU we know about. */ > struct riscv_cpu_info { >diff --git a/gcc/config/riscv/riscv-zcmp-popret.cc >b/gcc/config/riscv/riscv-zcmp-popret.cc >new file mode 100644 >index 00000000000..d7b40f6a3e2 >--- /dev/null >+++ b/gcc/config/riscv/riscv-zcmp-popret.cc >@@ -0,0 +1,260 @@ >+#include "config.h" >+#include "system.h" >+#include "coretypes.h" >+#include "tm.h" >+#include "rtl.h" >+#include "backend.h" >+#include "regs.h" >+#include "target.h" >+#include "memmodel.h" >+#include "emit-rtl.h" >+#include "df.h" >+#include "predict.h" >+#include "tree-pass.h" >+#include "tree.h" >+#include "tm_p.h" >+#include "optabs.h" >+#include "recog.h" >+#include "cfgrtl.h" >+ >+#define IN_TARGET_CODE 1 >+ >+namespace { >+ >+/* >+ 1. preprocessing: >+ 1.1. if there is no push rtx, then just return. e.g. >+ (note 5 1 22 2 [bb 2] NOTE_INSN_BASIC_BLOCK) >+ (insn/f 22 5 23 2 (set (reg/f:SI 2 sp) >+ (plus:SI (reg/f:SI 2 sp) >+ (const_int -32 [0xffffffffffffffe0]))) >+ (nil)) >+ (note 23 22 2 2 NOTE_INSN_PROLOGUE_END) >+ 1.2. if push rtx exists, then we compute the number of >+ pushed s-registers, n_sreg. >+ >+ push rtx should be find before NOTE_INSN_PROLOGUE_END tag >+ >+ [2 and 3 happend simultaneously] >+ 2. find valid move pattern, mv sN, aN, where N < n_sreg, >+ and aN is not used the move pattern, and sN is not >+ defined before the move pattern (from prologue to the >+ position of move pattern). >+ 3. analysis use and reach of every instruction from prologue >+ to the position of move pattern. >+ if any sN is used, then we mark the corresponding argument list >+ candidate as invalid. >+ e.g. >+ push {ra,s0-s3}, {}, -32 >+ sw s0,44(sp) # s0 is used, then argument list is invalid >+ mv a0,a5 # a0 is defined, then argument list is invalid >+ ... >+ mv s0,a0 >+ mv s1,a1 >+ mv s2,a2 >+ >+ 4. if there is a valid argument list, then replace the pop >+ push parallel insn, and delete mv pattern. >+ if not, skip. >+*/ >+ >+static void >+emit_zcmp_popret (rtx_insn *pop_rtx, >+ rtx_insn **candidates, >+ basic_block bb) >+{ >+ bool gen_popretz_p = candidates [0]; >+ bool gen_popret_p = candidates [2]; >+ >+ if (!(gen_popret_p || gen_popretz_p)) >+ return; >+ >+ gcc_assert ((gen_popret_p && !gen_popretz_p) >+ || (gen_popretz_p && gen_popret_p)); >+ >+ rtx pop_pat = PATTERN (pop_rtx); >+ unsigned pop_idx = 0, popret_idx = 0; >+ unsigned n_pop_par = XVECLEN (pop_pat, 0); >+ unsigned n_popret_par = n_pop_par >+ + (gen_popretz_p ? 2 : 0) >+ + (gen_popret_p ? 2 : 0); >+ >+ rtx popret_par = gen_rtx_PARALLEL (VOIDmode, >+ rtvec_alloc (n_popret_par)); >+ >+ /* return zero pattern */ >+ if (gen_popretz_p) >+ { >+ XVECEXP (popret_par, 0, 0) = PATTERN (candidates[0]); >+ XVECEXP (popret_par, 0, 1) = PATTERN (candidates[1]); >+ popret_idx += 2; >+ delete_insn (candidates[0]); >+ delete_insn (candidates[1]); >+ } >+ >+ /* copy pop paruence. */ >+ for (; pop_idx < n_pop_par; >+ pop_idx ++, popret_idx ++) >+ { >+ XVECEXP (popret_par, 0, popret_idx) = >+ XVECEXP (pop_pat, 0, pop_idx); >+ } >+ >+ /* ret pattern. */ >+ rtx ret_pat = PATTERN (candidates[2]); >+ gcc_assert (GET_CODE (ret_pat) == PARALLEL); >+ >+ for (int i = 0; i < XVECLEN (ret_pat, 0); >+ i++, popret_idx++) >+ { >+ XVECEXP (popret_par, 0, popret_idx) = >+ XVECEXP (ret_pat, 0, i); >+ } >+ >+ rtx_insn *insn = emit_jump_insn_after ( >+ popret_par, >+ BB_END (bb)); >+ JUMP_LABEL (insn) = simple_return_rtx; >+ >+ REG_NOTES (insn) = REG_NOTES (pop_rtx); >+ RTX_FRAME_RELATED_P (insn) = 1; >+ >+ if (dump_file) >+ { >+ fprintf(dump_file, "new insn:\n"); >+ print_rtl (dump_file, insn); >+ } >+ >+ delete_insn (candidates [2]); >+ delete_insn (pop_rtx); >+} >+ >+static void >+zcmp_popret (void) >+{ >+ basic_block bb; >+ rtx_insn *insn = NULL, *pop_rtx = NULL; >+ rtx_insn *pop_candidates[3] = {NULL, }; >+ /* >+ find NOTE_INSN_EPILOGUE_BEG, but pop_rtx not found => return >+ find NOTE_INSN_EPILOGUE_BEG, and pop_rtx is found => looking for a0 >+ */ >+ >+ FOR_EACH_BB_REVERSE_FN (bb, cfun) >+ { >+ FOR_BB_INSNS_REVERSE (bb, insn) >+ { >+ if (!pop_rtx >+ && NOTE_P (insn) >+ && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG) >+ return; >+ >+ if (NOTE_P (insn) >+ && NOTE_KIND (insn) == NOTE_INSN_FUNCTION_BEG) >+ { >+ if (pop_rtx) >+ emit_zcmp_popret (pop_rtx, pop_candidates, bb); >+ return; >+ }; >+ >+ if (!(NONDEBUG_INSN_P (insn) >+ || CALL_P (insn))) >+ continue; >+ >+ rtx pop_pat = PATTERN (insn); >+ >+ if (GET_CODE (pop_pat) == PARALLEL >+ && riscv_valid_stack_push_pop_p (pop_pat, false)) >+ { >+ pop_rtx = insn; >+ continue; >+ } >+ >+ /* pattern for `ret`. */ >+ if (JUMP_P (insn) >+ && GET_CODE (pop_pat) == PARALLEL >+ && XVECLEN (pop_pat, 0) == 2 >+ && GET_CODE (XVECEXP (pop_pat, 0, 0)) == SIMPLE_RETURN >+ && GET_CODE (XVECEXP (pop_pat, 0, 1)) == USE) >+ { >+ rtx use_reg = XEXP (XVECEXP (pop_pat, 0, 1), 0); >+ if (REG_P (use_reg) >+ && REGNO (use_reg) == RETURN_ADDR_REGNUM) >+ { >+ pop_candidates [2] = insn; >+ continue; >+ } >+ } >+ >+ if (!pop_rtx) >+ continue; >+ >+ /* pattern for return value. */ >+ if (!pop_candidates [0] >+ && GET_CODE (pop_pat) == USE) >+ { >+ rtx_insn *set_insn = PREV_INSN (insn); >+ rtx pat_set = PATTERN (set_insn); >+ >+ if (riscv_check_regno (XEXP (pop_pat, 0), >+ RETURN_VALUE_REGNUM) >+ && insn >+ && pat_set != NULL >+ && GET_CODE (pat_set) == SET >+ && riscv_check_regno (SET_DEST (pat_set), >+ RETURN_VALUE_REGNUM) >+ && CONST_INT_P (SET_SRC (pat_set)) >+ && INTVAL (SET_SRC (pat_set)) == 0) >+ { >+ pop_candidates [0] = set_insn; >+ pop_candidates [1] = insn; >+ break; >+ } >+ } >+ } >+ >+ if (pop_rtx) >+ { >+ emit_zcmp_popret (pop_rtx, pop_candidates, bb); >+ return; >+ } >+ } >+} >+ >+const pass_data pass_data_zcmp_popret = >+{ >+ RTL_PASS, /* type */ >+ "zcmp-popret", /* name */ >+ OPTGROUP_NONE, /* optinfo_flags */ >+ TV_NONE, /* tv_id */ >+ 0, /* properties_required */ >+ 0, /* properties_provided */ >+ 0, /* properties_destroyed */ >+ 0, /* todo_flags_start */ >+ 0, /* todo_flags_finish */ >+}; >+ >+class pass_zcmp_popret : public rtl_opt_pass >+{ >+public: >+ pass_zcmp_popret (gcc::context *ctxt) >+ : rtl_opt_pass (pass_data_zcmp_popret, ctxt) >+ {} >+ >+ /* opt_pass methods: */ >+ virtual bool gate (function *) >+ { return TARGET_ZCMP; } >+ virtual unsigned int execute (function *) >+ { >+ zcmp_popret (); >+ return 0; >+ } >+}; // class pass_zcmp_popret >+ >+} // anon namespace >+ >+rtl_opt_pass * >+make_pass_zcmp_popret (gcc::context *ctxt) >+{ >+ return new pass_zcmp_popret (ctxt); >+} >diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc >index 5f8cbfc15ed..17df2f3f8cf 100644 >--- a/gcc/config/riscv/riscv.cc >+++ b/gcc/config/riscv/riscv.cc >@@ -114,6 +114,9 @@ struct GTY(()) riscv_frame_info { > /* Likewise FPR X. */ > unsigned int fmask; > >+ /* How much the push/pop routines adjust sp (or 0 if unused). */ >+ unsigned push_pop_sp_adjust; >+ > /* How much the GPR save/restore routines adjust sp (or 0 if unused). */ > unsigned save_libcall_adjustment; > >@@ -401,6 +404,20 @@ static const unsigned gpr_save_reg_order[] = { > S10_REGNUM, S11_REGNUM > }; > >+/* Order for the CLOBBERs/USEs of push/pop. */ >+static const unsigned push_save_reg_order[] = { >+ INVALID_REGNUM, RETURN_ADDR_REGNUM, S0_REGNUM, >+ S1_REGNUM, S2_REGNUM, S3_REGNUM, S4_REGNUM, >+ S5_REGNUM, S6_REGNUM, S7_REGNUM, S8_REGNUM, >+ S9_REGNUM, S10_REGNUM, S11_REGNUM >+}; >+ >+/* Order for the CLOBBERs/USEs of push/pop in rve. */ >+static const unsigned push_save_reg_order_zcmpe[] = { >+ INVALID_REGNUM, RETURN_ADDR_REGNUM, S0_REGNUM, >+ S1_REGNUM >+}; >+ > /* A table describing all the processors GCC knows about. */ > static const struct riscv_tune_info riscv_tune_info_table[] = { > #define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO) \ >@@ -2989,6 +3006,17 @@ riscv_output_return () > return "ret"; > } > >+bool >+riscv_output_popret_p (rtx op) >+{ >+ unsigned n_rtx = XVECLEN (op, 0); >+ rtx use = XVECEXP (op, 0, n_rtx - 1); >+ rtx ret = XVECEXP (op, 0, n_rtx - 2); >+ >+ return GET_CODE (ret) == SIMPLE_RETURN >+ && GET_CODE (use) == USE; >+} >+ > > > /* Return true if CMP1 is a suitable second operand for integer ordering > test CODE. See also the *sCC patterns in riscv.md. */ >@@ -4306,6 +4334,74 @@ riscv_memmodel_needs_amo_acquire (enum memmodel model) > } > } > >+/* Print Sp adjustment field of pop instruction. */ >+ >+static void >+riscv_print_pop_size (FILE *file, rtx op) >+{ >+ unsigned sp_adjust_idx = XVECLEN (op, 0) - 1; >+ rtx sp_adjust_rtx = XVECEXP (op, 0, sp_adjust_idx); >+ >+ /* Skip ret or pattern. */ >+ while (GET_CODE (sp_adjust_rtx) != SET) >+ sp_adjust_rtx = XVECEXP (op, 0, --sp_adjust_idx); >+ >+ rtx elt_plus = SET_SRC (sp_adjust_rtx); >+ fprintf (file, "%ld", INTVAL (XEXP (elt_plus, 1))); >+} >+ >+/* Print push/pop register list. */ >+ >+static void >+riscv_print_reglist (FILE *file, rtx op) >+{ >+ /* we only deal with three formats: >+ push {ra} >+ push {ra, s0} >+ push {ra, s0-sN} >+ or >+ pop {ra} >+ pop {ra, s0} >+ pop {ra, s0-sN} >+ registers except ra has to be continuous s-register, >+ and it is supposed to be checked before. >+ register list patterns in push: >+ (set/f (mem/c:SI >+ (plus:SI (reg/f:SI 2 sp) >+ (const_int 28 [0x1c])) [2 S4 A32]) >+ (reg:SI 1 ra)) >+ register list patterns in pop: >+ (set/f (reg:DI 1 ra) >+ (mem/c:DI (plus:DI (reg/f:DI 2 sp) >+ (const_int 8 [0x8])) [2 S8 A64])) >+ */ >+ int total_count = XVECLEN (op, 0); >+ int n_regs = 0; >+ bool push_p = GET_CODE (XVECEXP (op, 0, 0)) == SET >+ && GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) == PLUS; >+ >+ for (int idx = 0; idx < total_count; ++idx) >+ { >+ rtx ele = XVECEXP (op, 0, idx); >+ if (GET_CODE (ele) != SET) >+ continue; >+ >+ bool restore_save_p = push_p ? >+ MEM_P (SET_DEST (ele)) : >+ MEM_P (SET_SRC (ele)); >+ >+ if (restore_save_p) >+ n_regs ++; >+ } >+ >+ if (n_regs > 2) >+ fprintf (file, "ra,s0-s%u", n_regs - 2); >+ else if (n_regs > 1) >+ fprintf (file, "ra,s0"); >+ else >+ fputs("ra", file); >+} >+ > /* Return true if a FENCE should be emitted to before a memory access to > implement the release portion of memory model MODEL. */ > >@@ -4517,6 +4613,14 @@ riscv_print_operand (FILE *file, rtx op, int letter) > fputs (GET_RTX_NAME (code), file); > break; > >+ case 'L': >+ riscv_print_reglist (file, op); >+ break; >+ >+ case 's': >+ riscv_print_pop_size (file, op); >+ break; >+ > case 'S': > { > rtx newop = GEN_INT (ctz_hwi (INTVAL (op))); >@@ -4777,6 +4881,66 @@ riscv_use_save_libcall (const struct riscv_frame_info >*frame) > return frame->save_libcall_adjustment != 0; > } > >+/* Determine how many instructions related to push/pop instructions. */ >+ >+static unsigned >+riscv_save_push_pop_count (unsigned mask) >+{ >+ if (!BITSET_P (mask, GP_REG_FIRST + RETURN_ADDR_REGNUM)) >+ return 0; >+ for (unsigned n = GP_REG_LAST; n > GP_REG_FIRST; n--) >+ if (BITSET_P (mask, n) >+ && !call_used_regs [n]) >+ /* add ra saving and sp adjust. */ >+ return CALLEE_SAVED_REG_NUMBER (n) + 1 + 2; >+ abort (); >+} >+ >+/* Calculate the maximum sp adjustment of push/pop instruction. */ >+ >+static unsigned >+riscv_push_pop_base_sp_adjust (unsigned mask) >+{ >+ unsigned n_regs = riscv_save_push_pop_count (mask) - 1; >+ return (n_regs * UNITS_PER_WORD + 15) & (~0xf); >+} >+ >+/* Determine whether to call push/pop routines. */ >+ >+static bool >+riscv_use_push_pop (const struct riscv_frame_info *frame, const HOST_WIDE_INT >frame_size) >+{ >+ if (!TARGET_ZCMP) >+ return false; >+ >+ /* We do not handler variable argument cases currently. */ >+ if (cfun->machine->varargs_size != 0) >+ return false; >+ >+ HOST_WIDE_INT base_size = riscv_push_pop_base_sp_adjust (frame->mask); >+ /* >+ Pr 960215-1.c in rv64 ouputs >+ >+ addi sp,sp,-32 >+ sd ra,24(sp) >+ sd s0,16(sp) >+ sd s2,8(sp) >+ sd s3,0(sp) >+ it is a rare case that callee saved registers are not non-continous, >+ which breaks the old push implementation, and we just reject this case >+ like save-restore does now. >+ */ >+ if (base_size > frame_size) >+ return false; >+ >+ /* {ra,s0-s10} is invalid. */ >+ if (frame->mask & (1 << (S10_REGNUM - GP_REG_FIRST)) >+ && !(frame->mask & (1 << (S11_REGNUM - GP_REG_FIRST)))) >+ return false; >+ >+ return frame->mask & (1 << (RETURN_ADDR_REGNUM - GP_REG_FIRST)); >+} >+ > /* Determine which GPR save/restore routine to call. */ > > static unsigned >@@ -4934,6 +5098,8 @@ riscv_compute_frame_info (void) > /* Only use save/restore routines when the GPRs are atop the frame. */ > if (known_ne (frame->hard_frame_pointer_offset, frame->total_size)) > frame->save_libcall_adjustment = 0; >+ >+ frame->push_pop_sp_adjust = 0; > } > > /* Make sure that we're not trying to eliminate to the wrong hard frame >@@ -5171,6 +5337,86 @@ riscv_for_each_saved_reg (poly_int64 sp_offset, >riscv_save_restore_fn fn, > } > } > >+static void >+riscv_emit_pop_insn (struct riscv_frame_info *frame, HOST_WIDE_INT offset, >HOST_WIDE_INT size) >+{ >+ unsigned int veclen = riscv_save_push_pop_count (frame->mask); >+ unsigned int n_reg = veclen - 1; >+ rtvec vec = rtvec_alloc (veclen); >+ HOST_WIDE_INT sp_adjust; >+ rtx dwarf = NULL_RTX; >+ >+ const unsigned *reg_order = (TARGET_ZCMP && TARGET_RVE) >+ ? push_save_reg_order_zcmpe >+ : push_save_reg_order; >+ >+ gcc_assert (n_reg >= 1 >+ && TARGET_ZCMP >+ && ((TARGET_RVE && (n_reg <= ARRAY_SIZE (push_save_reg_order_zcmpe))) >+ || (TARGET_ZCMP && (n_reg <= ARRAY_SIZE (push_save_reg_order))))); >+ >+ /* sp adjust pattern */ >+ int max_allow_sp_adjust = riscv_push_pop_base_sp_adjust (frame->mask) + 48; >+ int aligned_size = size; >+ >+ /* if sp adjustment is too large, we should split it first. */ >+ if (aligned_size > max_allow_sp_adjust) >+ { >+ rtx dwarf_pre_sp_adjust = NULL_RTX; >+ rtx pre_adjust_rtx = gen_add3_insn (stack_pointer_rtx, >+ stack_pointer_rtx, >+ GEN_INT (aligned_size - max_allow_sp_adjust)); >+ rtx insn = emit_insn (pre_adjust_rtx); >+ >+ rtx cfa_pre_adjust_rtx = gen_rtx_PLUS (Pmode, stack_pointer_rtx, >+ GEN_INT (aligned_size - max_allow_sp_adjust)); >+ dwarf_pre_sp_adjust = alloc_reg_note (REG_CFA_DEF_CFA, >+ cfa_pre_adjust_rtx, >+ dwarf_pre_sp_adjust); >+ >+ RTX_FRAME_RELATED_P (insn) = 1; >+ REG_NOTES (insn) = dwarf_pre_sp_adjust; >+ >+ sp_adjust = max_allow_sp_adjust; >+ } >+ else >+ sp_adjust = (aligned_size + 15) & (~0xf); >+ >+ /* register save sequence. */ >+ for (unsigned i = 1; i < veclen; ++i) >+ { >+ offset -= UNITS_PER_WORD; >+ unsigned regno = reg_order[i]; >+ rtx reg = gen_rtx_REG (Pmode, regno); >+ rtx mem = gen_frame_mem (Pmode, plus_constant (Pmode, >+ stack_pointer_rtx, >+ offset)); >+ rtx set = gen_rtx_SET (reg, mem); >+ RTVEC_ELT (vec, i - 1) = set; >+ RTX_FRAME_RELATED_P (set) = 1; >+ dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf); >+ } >+ >+ /* sp adjust pattern */ >+ rtx adjust_sp_rtx >+ = gen_rtx_SET (stack_pointer_rtx, >+ plus_constant (Pmode, >+ stack_pointer_rtx, >+ sp_adjust)); >+ RTVEC_ELT (vec, veclen - 1) = adjust_sp_rtx; >+ >+ rtx cfa_adjust_rtx = gen_rtx_PLUS (Pmode, stack_pointer_rtx, >+ const0_rtx); >+ dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf); >+ >+ frame->gp_sp_offset -= (veclen - 1) * UNITS_PER_WORD; >+ frame->push_pop_sp_adjust = sp_adjust; >+ >+ rtx insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, vec)); >+ RTX_FRAME_RELATED_P (insn) = 1; >+ REG_NOTES (insn) = dwarf; >+} >+ > /* For stack frames that can't be allocated with a single ADDI instruction, > compute the best value to initially allocate. It must at a minimum > allocate enough space to spill the callee-saved registers. If TARGET_RVC, >@@ -5270,6 +5516,146 @@ riscv_emit_stack_tie (void) > emit_insn (gen_stack_tiedi (stack_pointer_rtx, hard_frame_pointer_rtx)); > } > >+bool >+riscv_check_regno(rtx pat, unsigned regno) >+{ >+ return REG_P (pat) >+ && REGNO (pat) == regno; >+} >+ >+/* Function to check whether the OP is a valid stack push/pop operation. >+ This part is borrowed from nds32 nds32_valid_stack_push_pop_p */ >+ >+bool >+riscv_valid_stack_push_pop_p (rtx op, bool push_p) >+{ >+ int index; >+ int total_count; >+ int sp_adjust_rtx_index; >+ rtx elt; >+ rtx elt_reg; >+ rtx elt_plus; >+ >+ if (!TARGET_ZCMP) >+ return false; >+ >+ total_count = XVECLEN (op, 0); >+ sp_adjust_rtx_index = push_p ? 0 : total_count - 1; >+ >+ /* At least sp + one callee save/restore register rtx */ >+ if (total_count < 2) >+ return false; >+ >+ /* Perform some quick check for that every element should be 'set', >+ for pop, it might contain `ret` and `ret value` pattern. */ >+ for (index = 0; index < total_count; index++) >+ { >+ elt = XVECEXP (op, 0, index); >+ >+ /* skip pop return value rtx */ >+ if (!push_p && GET_CODE (elt) == SET >+ && riscv_check_regno (SET_DEST (elt), RETURN_VALUE_REGNUM) >+ && total_count >= 4 >+ && index + 1 < total_count >+ && GET_CODE (XVECEXP (op, 0, index + 1)) == USE) >+ { >+ rtx use_reg = XEXP (XVECEXP (op, 0, index + 1), 0); >+ >+ if (!riscv_check_regno (use_reg, RETURN_VALUE_REGNUM)) >+ return false; >+ >+ index += 1; >+ continue; >+ } >+ >+ /* skip ret rtx */ >+ if (!push_p && GET_CODE (elt) == SIMPLE_RETURN >+ && total_count >= 4 >+ && index + 1 < total_count >+ && GET_CODE (XVECEXP (op, 0, index + 1)) == USE) >+ { >+ rtx use_reg = XEXP (XVECEXP (op, 0, index + 1), 0); >+ >+ if (!riscv_check_regno (use_reg, RETURN_ADDR_REGNUM)) >+ return false; >+ >+ index += 1; >+ sp_adjust_rtx_index -= 2; >+ continue; >+ } >+ >+ if (GET_CODE (elt) != SET) >+ return false; >+ } >+ >+ elt = XVECEXP (op, 0, sp_adjust_rtx_index); >+ elt_reg = SET_DEST (elt); >+ elt_plus = SET_SRC (elt); >+ >+ /* Check this is (set (stack_reg) (plus stack_reg const)) pattern. */ >+ if (GET_CODE (elt_plus) != PLUS >+ || !riscv_check_regno (elt_reg, STACK_POINTER_REGNUM)) >+ return false; >+ >+ /* Pass all test, this is a valid rtx. */ >+ return true; >+} >+ >+/* Generate push/pop rtx */ >+ >+static void >+riscv_emit_push_insn (struct riscv_frame_info *frame, HOST_WIDE_INT size) >+{ >+ unsigned int veclen = riscv_save_push_pop_count (frame->mask); >+ unsigned int n_reg = veclen - 1; >+ rtvec vec = rtvec_alloc (veclen); >+ >+ const unsigned *reg_order = (TARGET_ZCMP && TARGET_RVE) >+ ? push_save_reg_order_zcmpe >+ : push_save_reg_order; >+ >+ int aligned_size = (size + 15) & (~0xf); >+ >+ gcc_assert (n_reg >= 1 >+ && TARGET_ZCMP >+ && ((TARGET_RVE && (n_reg <= ARRAY_SIZE (push_save_reg_order_zcmpe))) >+ || (TARGET_ZCMP && (n_reg <= ARRAY_SIZE (push_save_reg_order))))); >+ >+ /* sp adjust pattern */ >+ int max_allow_sp_adjust = riscv_push_pop_base_sp_adjust (frame->mask) + 48; >+ int sp_adjust = aligned_size > max_allow_sp_adjust ? >+ max_allow_sp_adjust >+ : aligned_size; >+ >+ /*TODO: move this part to frame computation function. */ >+ frame->gp_sp_offset = (veclen - 1) * UNITS_PER_WORD; >+ frame->push_pop_sp_adjust = sp_adjust; >+ >+ rtx adjust_sp_rtx >+ = gen_rtx_SET (stack_pointer_rtx, >+ plus_constant (Pmode, >+ stack_pointer_rtx, >+ -sp_adjust)); >+ RTVEC_ELT (vec, 0) = adjust_sp_rtx; >+ >+ /* Register save sequence. */ >+ for (unsigned i = 1; i < veclen; ++i) >+ { >+ sp_adjust -= UNITS_PER_WORD; >+ unsigned regno = reg_order[i]; >+ rtx reg = gen_rtx_REG (Pmode, regno); >+ rtx mem = gen_frame_mem (Pmode, plus_constant (Pmode, >+ stack_pointer_rtx, >+ sp_adjust)); >+ rtx set = gen_rtx_SET (mem, reg); >+ RTVEC_ELT (vec, i) = set; >+ RTX_FRAME_RELATED_P (set) = 1; >+ } >+ >+ rtx insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, vec)); >+ RTX_FRAME_RELATED_P (insn) = 1; >+} >+ > /* Expand the "prologue" pattern. */ > > void >@@ -5278,6 +5664,7 @@ riscv_expand_prologue (void) > struct riscv_frame_info *frame = &cfun->machine->frame; > poly_int64 size = frame->total_size; > unsigned mask = frame->mask; >+ HOST_WIDE_INT step1 = riscv_first_stack_step (frame); > rtx insn; > > if (flag_stack_usage_info) >@@ -5300,19 +5687,32 @@ riscv_expand_prologue (void) > REG_NOTES (insn) = dwarf; > } > >+ if (size.is_constant ()) >+ step1 = MIN (size.to_constant(), step1); >+ if (riscv_use_push_pop (frame, step1)) >+ { >+ riscv_emit_push_insn (frame, step1); >+ >+ step1 = MAX (step1 - frame->push_pop_sp_adjust, 0); >+ size = MAX (size.to_constant() - frame->push_pop_sp_adjust, 0); >+ frame->mask &= ~ ((TARGET_ZCMP && TARGET_RVE) ? >+ RISCV_ZCMPE_PUSH_POP_MASK >+ : RISCV_ZCE_PUSH_POP_MASK); >+ } >+ > /* Save the registers. */ > if ((frame->mask | frame->fmask) != 0) > { >- HOST_WIDE_INT step1 = riscv_first_stack_step (frame); >- if (size.is_constant ()) >- step1 = MIN (size.to_constant(), step1); >- >- insn = gen_add3_insn (stack_pointer_rtx, >- stack_pointer_rtx, >- GEN_INT (-step1)); >- RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; >- size -= step1; >- riscv_for_each_saved_reg (size, riscv_save_reg, false, false); >+ if (step1 > 0) >+ { >+ insn = gen_add3_insn (stack_pointer_rtx, >+ stack_pointer_rtx, >+ GEN_INT (-step1)); >+ RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; >+ size -= step1; >+ } >+ riscv_for_each_saved_reg (size, riscv_save_reg, >+ false /* bool epilogue */, false /* bool maybe_eh_return */); > } > > frame->mask = mask; /* Undo the above fib. */ >@@ -5412,6 +5812,8 @@ riscv_expand_epilogue (int style) > rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM); > rtx insn; > >+ bool use_zcmp_pop = !use_restore_libcall && !(crtl->calls_eh_return); >+ > /* We need to add memory barrier to prevent read from deallocated stack. */ > bool need_barrier_p = known_ne (get_frame_size () > + cfun->machine->frame.arg_pointer_offset, >0); >@@ -5538,6 +5940,18 @@ riscv_expand_epilogue (int style) > if (use_restore_libcall) > frame->mask = 0; /* Temporarily fib that we need not save GPRs. */ > >+ if (use_zcmp_pop && riscv_use_push_pop (frame, step2)) >+ { >+ /* Emit a barrier to prevent loads from a deallocated stack. */ >+ riscv_emit_stack_tie (); >+ need_barrier_p = false; >+ riscv_emit_pop_insn (frame, frame->total_size.to_constant(), step2); >+ frame->mask &= ~ ((TARGET_ZCMP && TARGET_RVE) ? >+ RISCV_ZCMPE_PUSH_POP_MASK >+ : RISCV_ZCE_PUSH_POP_MASK); >+ step2 = 0; >+ } >+ > /* Restore the registers. */ > riscv_for_each_saved_reg (frame->total_size - step2, riscv_restore_reg, > true, style == EXCEPTION_RETURN); >@@ -5552,6 +5966,9 @@ riscv_expand_epilogue (int style) > if (need_barrier_p) > riscv_emit_stack_tie (); > >+ if (use_zcmp_pop) >+ frame->mask = mask; >+ > /* Deallocate the final bit of the frame. */ > if (step2 > 0) > { >diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h >index d05b1d59853..6e6e3ee2c25 100644 >--- a/gcc/config/riscv/riscv.h >+++ b/gcc/config/riscv/riscv.h >@@ -383,6 +383,7 @@ ASM_MISA_SPEC > #define HARD_FRAME_POINTER_REGNUM 8 > #define STACK_POINTER_REGNUM 2 > #define THREAD_POINTER_REGNUM 4 >+#define RETURN_VALUE_REGNUM 10 > > /* These two registers don't really exist: they get eliminated to either > the stack or hard frame pointer. */ >@@ -1097,4 +1098,7 @@ extern void riscv_remove_unneeded_save_restore_calls >(void); > #define DWARF_REG_TO_UNWIND_COLUMN(REGNO) \ > ((REGNO == RISCV_DWARF_VLENB) ? (FIRST_PSEUDO_REGISTER + 1) : REGNO) > >+#define RISCV_ZCE_PUSH_POP_MASK 0x0ffc0302u >+#define RISCV_ZCMPE_PUSH_POP_MASK 0x302u >+ > #endif /* ! GCC_RISCV_H */ >diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md >index bc384d9aedf..b9f2a426e48 100644 >--- a/gcc/config/riscv/riscv.md >+++ b/gcc/config/riscv/riscv.md >@@ -108,12 +108,14 @@ > > (define_constants > [(RETURN_ADDR_REGNUM 1) >+ (SP_REGNUM 2) > (GP_REGNUM 3) > (TP_REGNUM 4) > (T0_REGNUM 5) > (T1_REGNUM 6) > (S0_REGNUM 8) > (S1_REGNUM 9) >+ (A0_REGNUM 10) > (S2_REGNUM 18) > (S3_REGNUM 19) > (S4_REGNUM 20) >@@ -3147,3 +3149,4 @@ > (include "sifive-7.md") > (include "thead.md") > (include "vector.md") >+(include "zc.md") >diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv >index 6e326fc7e02..9ef522306a5 100644 >--- a/gcc/config/riscv/t-riscv >+++ b/gcc/config/riscv/t-riscv >@@ -90,6 +90,10 @@ riscv-v.o: $(srcdir)/config/riscv/riscv-v.cc \ > $(COMPILE) $< > $(POSTCOMPILE) > >+riscv-zcmp-popret.o: $(srcdir)/config/riscv/riscv-zcmp-popret.cc >+ $(COMPILE) $< >+ $(POSTCOMPILE) >+ > thead.o: $(srcdir)/config/riscv/thead.cc \ > $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TARGET_H) backend.h $(RTL_H) \ > memmodel.h $(EMIT_RTL_H) poly-int.h output.h >diff --git a/gcc/config/riscv/zc.md b/gcc/config/riscv/zc.md >new file mode 100644 >index 00000000000..3ad34dacd49 >--- /dev/null >+++ b/gcc/config/riscv/zc.md >@@ -0,0 +1,47 @@ >+;; Machine description for ZCE extension. >+;; Copyright (C) 2021 Free Software Foundation, Inc. >+ >+;; This file is part of GCC. >+ >+;; GCC is free software; you can redistribute it and/or modify >+;; it under the terms of the GNU General Public License as published by >+;; the Free Software Foundation; either version 3, or (at your option) >+;; any later version. >+ >+;; GCC is distributed in the hope that it will be useful, >+;; but WITHOUT ANY WARRANTY; without even the implied warranty of >+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >+;; GNU General Public License for more details. >+ >+;; You should have received a copy of the GNU General Public License >+;; along with GCC; see the file COPYING3. If not see >+;; <http://www.gnu.org/licenses/>. >+ >+(define_insn "*stack_push<mode>" >+ [(match_parallel 0 "riscv_stack_push_operation" >+ [(set (reg:X SP_REGNUM) (plus:X (reg:X SP_REGNUM) >+ (match_operand:X 1 "const_int_operand" "")))])] >+ "TARGET_ZCMP" >+ "cm.push\t{%L0},%1") >+ >+(define_insn "*stack_pop<mode>" >+ [(match_parallel 0 "riscv_stack_pop_operation" >+ [(set (match_operand:X 1 "register_operand" "") >+ (mem:X (plus:X (reg:X SP_REGNUM) >+ (match_operand:X 2 "const_int_operand" ""))))])] >+ "TARGET_ZCMP" >+ { >+ return riscv_output_popret_p (operands[0]) ? >+ "cm.popret\t{%L0},%s0" : >+ "cm.pop\t{%L0},%s0"; >+ }) >+ >+(define_insn "*stack_pop_with_return_value<mode>" >+ [(match_parallel 0 "riscv_stack_pop_operation" >+ [(set (reg:ANYI A0_REGNUM) >+ (match_operand:ANYI 1 "pop_return_value_constant" ""))])] >+ "TARGET_ZCMP" >+ { >+ gcc_assert (riscv_output_popret_p (operands[0])); >+ return "cm.popretz\t{%L0},%s0"; >+ }) >-- >2.25.1