From: Ju-Zhe Zhong <juzhe.zh...@rivai.ai> This patch support RVV scalable register spilling. prologue && epilogue handling pick up prototype from Monk Chiang <monk.chi...@sifive.com>. Co-authored-by: Monk Chiang <monk.chi...@sifive.com>
gcc/ChangeLog: * config/riscv/riscv-v.cc (emit_pred_move): Adjust for scalable register spilling. (legitimize_move): Ditto. * config/riscv/riscv.cc (riscv_v_adjust_scalable_frame): New function. (riscv_first_stack_step): Adjust for scalable register spilling. (riscv_expand_prologue): Ditto. (riscv_expand_epilogue): Ditto. (riscv_dwarf_poly_indeterminate_value): New function. (TARGET_DWARF_POLY_INDETERMINATE_VALUE): New target hook support for register spilling. * config/riscv/riscv.h (RISCV_DWARF_VLENB): New macro. (RISCV_PROLOGUE_TEMP2_REGNUM): Ditto. (RISCV_PROLOGUE_TEMP2): Ditto. * config/riscv/vector-iterators.md: New iterators. * config/riscv/vector.md (*mov<mode>): Fix it for register spilling. (*mov<mode>_whole): New pattern. (*mov<mode>_fract): New pattern. (@pred_mov<mode>): Fix it for register spilling. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/mov-9.c: * gcc.target/riscv/rvv/base/macro.h: New test. * gcc.target/riscv/rvv/base/spill-1.c: New test. * gcc.target/riscv/rvv/base/spill-10.c: New test. * gcc.target/riscv/rvv/base/spill-11.c: New test. * gcc.target/riscv/rvv/base/spill-12.c: New test. * gcc.target/riscv/rvv/base/spill-2.c: New test. * gcc.target/riscv/rvv/base/spill-3.c: New test. * gcc.target/riscv/rvv/base/spill-4.c: New test. * gcc.target/riscv/rvv/base/spill-5.c: New test. * gcc.target/riscv/rvv/base/spill-6.c: New test. * gcc.target/riscv/rvv/base/spill-7.c: New test. * gcc.target/riscv/rvv/base/spill-8.c: New test. * gcc.target/riscv/rvv/base/spill-9.c: New test. --- gcc/config/riscv/riscv-v.cc | 47 +-- gcc/config/riscv/riscv.cc | 147 ++++++- gcc/config/riscv/riscv.h | 3 + gcc/config/riscv/vector-iterators.md | 23 ++ gcc/config/riscv/vector.md | 136 +++++-- .../gcc.target/riscv/rvv/base/macro.h | 6 + .../gcc.target/riscv/rvv/base/mov-9.c | 8 +- .../gcc.target/riscv/rvv/base/spill-1.c | 385 ++++++++++++++++++ .../gcc.target/riscv/rvv/base/spill-10.c | 41 ++ .../gcc.target/riscv/rvv/base/spill-11.c | 60 +++ .../gcc.target/riscv/rvv/base/spill-12.c | 47 +++ .../gcc.target/riscv/rvv/base/spill-2.c | 320 +++++++++++++++ .../gcc.target/riscv/rvv/base/spill-3.c | 254 ++++++++++++ .../gcc.target/riscv/rvv/base/spill-4.c | 196 +++++++++ .../gcc.target/riscv/rvv/base/spill-5.c | 130 ++++++ .../gcc.target/riscv/rvv/base/spill-6.c | 101 +++++ .../gcc.target/riscv/rvv/base/spill-7.c | 114 ++++++ .../gcc.target/riscv/rvv/base/spill-8.c | 51 +++ .../gcc.target/riscv/rvv/base/spill-9.c | 42 ++ 19 files changed, 2021 insertions(+), 90 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/macro.h create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/spill-1.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/spill-10.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/spill-11.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/spill-12.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/spill-2.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/spill-3.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/spill-4.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/spill-5.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/spill-6.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/spill-7.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/spill-8.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/spill-9.c diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 6615a5c7ffe..e0459e3f610 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -106,28 +106,25 @@ const_vec_all_same_in_range_p (rtx x, HOST_WIDE_INT minval, /* Emit an RVV unmask && vl mov from SRC to DEST. */ static void -emit_pred_move (rtx dest, rtx src, rtx vl, machine_mode mask_mode) +emit_pred_move (rtx dest, rtx src, machine_mode mask_mode) { insn_expander<7> e; - machine_mode mode = GET_MODE (dest); - if (register_operand (src, mode) && register_operand (dest, mode)) - { - emit_move_insn (dest, src); - return; - } + rtx vl = gen_reg_rtx (Pmode); + unsigned int sew = GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL + ? 8 + : GET_MODE_BITSIZE (GET_MODE_INNER (mode)); + + emit_insn (gen_vsetvl_no_side_effects ( + Pmode, vl, gen_rtx_REG (Pmode, 0), gen_int_mode (sew, Pmode), + gen_int_mode ((unsigned int) mode, Pmode), const1_rtx, const1_rtx)); e.add_output_operand (dest, mode); e.add_all_one_mask_operand (mask_mode); - /* For load operation, we create undef operand. - For store operation, we make it depend on the dest memory to - avoid potential bugs. */ - if (MEM_P (src)) - e.add_vundef_operand (mode); - else - e.add_input_operand (dest, mode); + e.add_vundef_operand (mode); e.add_input_operand (src, mode); + e.add_input_operand (vl, Pmode); e.add_policy_operand (TAIL_AGNOSTIC, MASK_AGNOSTIC); @@ -143,37 +140,25 @@ bool legitimize_move (rtx dest, rtx src, machine_mode mask_mode) { machine_mode mode = GET_MODE (dest); - /* For whole registers load/store or register-register move, - we don't need to specially handle them, just let them go - through "*mov<mode>" and then use the codegen directly. */ - if ((known_ge (GET_MODE_SIZE (mode), BYTES_PER_RISCV_VECTOR) - && (GET_MODE_CLASS (mode) != MODE_VECTOR_BOOL)) - || (register_operand (src, mode) && register_operand (dest, mode))) + if (known_ge (GET_MODE_SIZE (mode), BYTES_PER_RISCV_VECTOR) + && GET_MODE_CLASS (mode) != MODE_VECTOR_BOOL) { /* Need to force register if mem <- !reg. */ if (MEM_P (dest) && !REG_P (src)) src = force_reg (mode, src); + return false; } - - rtx vlmax = gen_reg_rtx (Pmode); - unsigned int sew = GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL - ? 8 - : GET_MODE_BITSIZE (GET_MODE_INNER (mode)); - emit_insn (gen_vsetvl_no_side_effects ( - Pmode, vlmax, gen_rtx_REG (Pmode, 0), gen_int_mode (sew, Pmode), - gen_int_mode ((unsigned int) mode, Pmode), const1_rtx, const1_rtx)); - if (!register_operand (src, mode) && !register_operand (dest, mode)) { rtx tmp = gen_reg_rtx (mode); if (MEM_P (src)) - emit_pred_move (tmp, src, vlmax, mask_mode); + emit_pred_move (tmp, src, mask_mode); else emit_move_insn (tmp, src); src = tmp; } - emit_pred_move (dest, src, vlmax, mask_mode); + emit_pred_move (dest, src, mask_mode); return true; } diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 50ef38438a2..8615df473c1 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -1943,6 +1943,37 @@ riscv_legitimize_poly_move (machine_mode mode, rtx dest, rtx tmp, rtx src) } } +/* Adjust scalable frame of vector for prologue && epilogue. */ + +static void +riscv_v_adjust_scalable_frame (rtx target, poly_int64 offset, bool epilogue) +{ + rtx tmp = RISCV_PROLOGUE_TEMP (Pmode); + rtx adjust_size = RISCV_PROLOGUE_TEMP2 (Pmode); + rtx insn, dwarf, adjust_frame_rtx; + + riscv_legitimize_poly_move (Pmode, adjust_size, tmp, + gen_int_mode (offset, Pmode)); + + if (epilogue) + insn = gen_add3_insn (target, target, adjust_size); + else + insn = gen_sub3_insn (target, target, adjust_size); + + insn = emit_insn (insn); + + RTX_FRAME_RELATED_P (insn) = 1; + + adjust_frame_rtx + = gen_rtx_SET (target, + plus_constant (Pmode, target, epilogue ? offset : -offset)); + + dwarf = alloc_reg_note (REG_FRAME_RELATED_EXPR, copy_rtx (adjust_frame_rtx), + NULL_RTX); + + REG_NOTES (insn) = dwarf; +} + /* If (set DEST SRC) is not a valid move instruction, emit an equivalent sequence that is valid. */ @@ -4824,21 +4855,29 @@ riscv_restore_reg (rtx reg, rtx mem) static HOST_WIDE_INT riscv_first_stack_step (struct riscv_frame_info *frame) { - if (SMALL_OPERAND (frame->total_size.to_constant())) - return frame->total_size.to_constant(); + HOST_WIDE_INT frame_total_constant_size; + if (!frame->total_size.is_constant ()) + frame_total_constant_size + = riscv_stack_align (frame->total_size.coeffs[0]) + - riscv_stack_align (frame->total_size.coeffs[1]); + else + frame_total_constant_size = frame->total_size.to_constant (); + + if (SMALL_OPERAND (frame_total_constant_size)) + return frame_total_constant_size; HOST_WIDE_INT min_first_step = RISCV_STACK_ALIGN ((frame->total_size - frame->fp_sp_offset).to_constant()); HOST_WIDE_INT max_first_step = IMM_REACH / 2 - PREFERRED_STACK_BOUNDARY / 8; - HOST_WIDE_INT min_second_step = frame->total_size.to_constant() - max_first_step; + HOST_WIDE_INT min_second_step = frame_total_constant_size - max_first_step; gcc_assert (min_first_step <= max_first_step); /* As an optimization, use the least-significant bits of the total frame size, so that the second adjustment step is just LUI + ADD. */ if (!SMALL_OPERAND (min_second_step) - && frame->total_size.to_constant() % IMM_REACH < IMM_REACH / 2 - && frame->total_size.to_constant() % IMM_REACH >= min_first_step) - return frame->total_size.to_constant() % IMM_REACH; + && frame_total_constant_size % IMM_REACH < IMM_REACH / 2 + && frame_total_constant_size % IMM_REACH >= min_first_step) + return frame_total_constant_size % IMM_REACH; if (TARGET_RVC) { @@ -4911,12 +4950,12 @@ void riscv_expand_prologue (void) { struct riscv_frame_info *frame = &cfun->machine->frame; - HOST_WIDE_INT size = frame->total_size.to_constant (); + poly_int64 size = frame->total_size; unsigned mask = frame->mask; rtx insn; if (flag_stack_usage_info) - current_function_static_stack_size = size; + current_function_static_stack_size = constant_lower_bound (size); if (cfun->machine->naked_p) return; @@ -4938,7 +4977,9 @@ riscv_expand_prologue (void) /* Save the registers. */ if ((frame->mask | frame->fmask) != 0) { - HOST_WIDE_INT step1 = MIN (size, riscv_first_stack_step (frame)); + HOST_WIDE_INT step1 = riscv_first_stack_step (frame); + if (size.is_constant ()) + step1 = MIN (size.to_constant(), step1); insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, @@ -4961,23 +5002,40 @@ riscv_expand_prologue (void) } /* Allocate the rest of the frame. */ - if (size > 0) + if (known_gt (size, 0)) { - if (SMALL_OPERAND (-size)) + /* Two step adjustment: + 1.scalable frame. 2.constant frame. */ + poly_int64 scalable_frame (0, 0); + if (!size.is_constant ()) + { + /* First for scalable frame. */ + poly_int64 scalable_frame = size; + scalable_frame.coeffs[0] = size.coeffs[1]; + riscv_v_adjust_scalable_frame (stack_pointer_rtx, scalable_frame, false); + size -= scalable_frame; + } + + /* Second step for constant frame. */ + HOST_WIDE_INT constant_frame = size.to_constant (); + if (constant_frame == 0) + return; + + if (SMALL_OPERAND (-constant_frame)) { insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, - GEN_INT (-size)); + GEN_INT (-constant_frame)); RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; } else { - riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode), GEN_INT (-size)); + riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode), GEN_INT (-constant_frame)); emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, RISCV_PROLOGUE_TEMP (Pmode))); /* Describe the effect of the previous instructions. */ - insn = plus_constant (Pmode, stack_pointer_rtx, -size); + insn = plus_constant (Pmode, stack_pointer_rtx, -constant_frame); insn = gen_rtx_SET (stack_pointer_rtx, insn); riscv_set_frame_expr (insn); } @@ -5020,7 +5078,7 @@ riscv_expand_epilogue (int style) Start off by assuming that no registers need to be restored. */ struct riscv_frame_info *frame = &cfun->machine->frame; unsigned mask = frame->mask; - HOST_WIDE_INT step1 = frame->total_size.to_constant (); + poly_int64 step1 = frame->total_size; HOST_WIDE_INT step2 = 0; bool use_restore_libcall = ((style == NORMAL_RETURN) && riscv_use_save_libcall (frame)); @@ -5056,11 +5114,27 @@ riscv_expand_epilogue (int style) riscv_emit_stack_tie (); need_barrier_p = false; - rtx adjust = GEN_INT (-frame->hard_frame_pointer_offset.to_constant ()); - if (!SMALL_OPERAND (INTVAL (adjust))) + poly_int64 adjust_offset = -frame->hard_frame_pointer_offset; + rtx adjust = NULL_RTX; + + if (!adjust_offset.is_constant ()) { - riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode), adjust); - adjust = RISCV_PROLOGUE_TEMP (Pmode); + rtx tmp1 = RISCV_PROLOGUE_TEMP (Pmode); + rtx tmp2 = RISCV_PROLOGUE_TEMP2 (Pmode); + riscv_legitimize_poly_move (Pmode, tmp1, tmp2, + gen_int_mode (adjust_offset, Pmode)); + adjust = tmp1; + } + else + { + if (!SMALL_OPERAND (adjust_offset.to_constant ())) + { + riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode), + GEN_INT (adjust_offset.to_constant ())); + adjust = RISCV_PROLOGUE_TEMP (Pmode); + } + else + adjust = GEN_INT (adjust_offset.to_constant ()); } insn = emit_insn ( @@ -5070,7 +5144,7 @@ riscv_expand_epilogue (int style) rtx dwarf = NULL_RTX; rtx cfa_adjust_value = gen_rtx_PLUS ( Pmode, hard_frame_pointer_rtx, - GEN_INT (-frame->hard_frame_pointer_offset.to_constant ())); + gen_int_mode (-frame->hard_frame_pointer_offset, Pmode)); rtx cfa_adjust_rtx = gen_rtx_SET (stack_pointer_rtx, cfa_adjust_value); dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, cfa_adjust_rtx, dwarf); RTX_FRAME_RELATED_P (insn) = 1; @@ -5092,10 +5166,20 @@ riscv_expand_epilogue (int style) /* Emit a barrier to prevent loads from a deallocated stack. */ riscv_emit_stack_tie (); need_barrier_p = false; + + /* Restore the scalable frame which is assigned in prologue. */ + if (!step1.is_constant ()) + { + poly_int64 scalable_frame = step1; + scalable_frame.coeffs[0] = step1.coeffs[1]; + riscv_v_adjust_scalable_frame (stack_pointer_rtx, scalable_frame, + true); + step1 -= scalable_frame; + } /* Get an rtx for STEP1 that we can add to BASE. */ - rtx adjust = GEN_INT (step1); - if (!SMALL_OPERAND (step1)) + rtx adjust = GEN_INT (step1.to_constant ()); + if (!SMALL_OPERAND (step1.to_constant ())) { riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode), adjust); adjust = RISCV_PROLOGUE_TEMP (Pmode); @@ -6463,6 +6547,22 @@ riscv_regmode_natural_size (machine_mode mode) return UNITS_PER_WORD; } +/* Implement the TARGET_DWARF_POLY_INDETERMINATE_VALUE hook. */ + +static unsigned int +riscv_dwarf_poly_indeterminate_value (unsigned int i, unsigned int *factor, + int *offset) +{ + /* Polynomial invariant 1 == (VLENB / riscv_bytes_per_vector_chunk) - 1. + 1. TARGET_MIN_VLEN == 32, olynomial invariant 1 == (VLENB / 4) - 1. + 2. TARGET_MIN_VLEN > 32, olynomial invariant 1 == (VLENB / 8) - 1. + */ + gcc_assert (i == 1); + *factor = riscv_bytes_per_vector_chunk; + *offset = 1; + return RISCV_DWARF_VLENB; +} + /* Initialize the GCC target structure. */ #undef TARGET_ASM_ALIGNED_HI_OP #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" @@ -6684,6 +6784,9 @@ riscv_regmode_natural_size (machine_mode mode) #undef TARGET_VECTOR_ALIGNMENT #define TARGET_VECTOR_ALIGNMENT riscv_vector_alignment +#undef TARGET_DWARF_POLY_INDETERMINATE_VALUE +#define TARGET_DWARF_POLY_INDETERMINATE_VALUE riscv_dwarf_poly_indeterminate_value + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-riscv.h" diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h index 1385f0a16dc..2d0d170645c 100644 --- a/gcc/config/riscv/riscv.h +++ b/gcc/config/riscv/riscv.h @@ -392,6 +392,7 @@ ASM_MISA_SPEC /* Define Dwarf for RVV. */ #define RISCV_DWARF_VL (4096 + 0xc20) #define RISCV_DWARF_VTYPE (4096 + 0xc21) +#define RISCV_DWARF_VLENB (4096 + 0xc22) /* Register in which static-chain is passed to a function. */ #define STATIC_CHAIN_REGNUM (GP_TEMP_FIRST + 2) @@ -405,6 +406,8 @@ ASM_MISA_SPEC #define RISCV_PROLOGUE_TEMP_REGNUM (GP_TEMP_FIRST) #define RISCV_PROLOGUE_TEMP(MODE) gen_rtx_REG (MODE, RISCV_PROLOGUE_TEMP_REGNUM) +#define RISCV_PROLOGUE_TEMP2_REGNUM (GP_TEMP_FIRST + 1) +#define RISCV_PROLOGUE_TEMP2(MODE) gen_rtx_REG (MODE, RISCV_PROLOGUE_TEMP2_REGNUM) #define RISCV_CALL_ADDRESS_TEMP_REGNUM (GP_TEMP_FIRST + 1) #define RISCV_CALL_ADDRESS_TEMP(MODE) \ diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md index 1255e33a6f8..bf3611f2eda 100644 --- a/gcc/config/riscv/vector-iterators.md +++ b/gcc/config/riscv/vector-iterators.md @@ -34,6 +34,29 @@ (VNx8DF "TARGET_VECTOR_ELEN_FP_64") ]) +(define_mode_iterator V_WHOLE [ + (VNx4QI "TARGET_MIN_VLEN == 32") VNx8QI VNx16QI VNx32QI (VNx64QI "TARGET_MIN_VLEN > 32") + (VNx2HI "TARGET_MIN_VLEN == 32") VNx4HI VNx8HI VNx16HI (VNx32HI "TARGET_MIN_VLEN > 32") + (VNx1SI "TARGET_MIN_VLEN == 32") VNx2SI VNx4SI VNx8SI (VNx16SI "TARGET_MIN_VLEN > 32") + VNx1DI VNx2DI VNx4DI (VNx8DI "TARGET_MIN_VLEN > 32") + (VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN == 32") + (VNx2SF "TARGET_VECTOR_ELEN_FP_32") + (VNx4SF "TARGET_VECTOR_ELEN_FP_32") + (VNx8SF "TARGET_VECTOR_ELEN_FP_32") + (VNx16SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32") + (VNx1DF "TARGET_VECTOR_ELEN_FP_64") + (VNx2DF "TARGET_VECTOR_ELEN_FP_64") + (VNx4DF "TARGET_VECTOR_ELEN_FP_64") + (VNx8DF "TARGET_VECTOR_ELEN_FP_64") +]) + +(define_mode_iterator V_FRACT [ + VNx1QI VNx2QI (VNx4QI "TARGET_MIN_VLEN > 32") + VNx1HI (VNx2HI "TARGET_MIN_VLEN > 32") + (VNx1SI "TARGET_MIN_VLEN > 32") + (VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32") +]) + (define_mode_iterator VB [ VNx1BI VNx2BI VNx4BI VNx8BI VNx16BI VNx32BI (VNx64BI "TARGET_MIN_VLEN > 32") diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index 19bb27560f8..8eb4ca63448 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -53,6 +53,36 @@ (match_operand:V 1 "vector_move_operand"))] "TARGET_VECTOR" { + /* For whole register move, we transform the pattern into the format + that excludes the clobber of scratch register. + + We include clobber of a scalar scratch register which is going to be + used for emit of vsetvl instruction after reload_completed since we + need vsetvl instruction to set VL/VTYPE global status for fractional + vector load/store. + + For example: + [(set (match_operand:VNx1QI v24) + (match_operand:VNx1QI (mem: a4))) + (clobber (scratch:SI a5))] + ====>> vsetvl a5,zero,e8,mf8 + ====>> vle8.v v24,(a4) + + Philosophy: + + - Clobber a scalar scratch register for each mov<mode>. + + - Classify the machine_mode mode = <MODE>mode into 2 class: + Whole register move and fractional register move. + + - Transform and remove scratch clobber register for whole + register move so that we can avoid occupying the scalar + registers. + + - We can not leave it to TARGET_SECONDARY_RELOAD since it happens + before spilling. The clobber scratch is used by spilling fractional + registers in IRA/LRA so it's too early. */ + if (riscv_vector::legitimize_move (operands[0], operands[1], <VM>mode)) DONE; }) @@ -61,12 +91,34 @@ ;; Also applicable for all register moves. ;; Fractional vector modes load/store are not allowed to match this pattern. ;; Mask modes load/store are not allowed to match this pattern. -(define_insn "*mov<mode>" - [(set (match_operand:V 0 "reg_or_mem_operand" "=vr,m,vr") - (match_operand:V 1 "reg_or_mem_operand" "m,vr,vr"))] - "TARGET_VECTOR && ((register_operand (operands[0], <MODE>mode) - && register_operand (operands[1], <MODE>mode)) - || known_ge (GET_MODE_SIZE (<MODE>mode), BYTES_PER_RISCV_VECTOR))" +;; We seperate "*mov<mode>" into "*mov<mode>_whole" and "*mov<mode>_fract" because +;; we don't want to include fractional load/store in "*mov<mode>" which will +;; create unexpected patterns in LRA. +;; For example: +;; ira rtl: +;; (insn 20 19 9 2 (set (reg/v:VNx2QI 97 v1 [ v1 ]) +;; (reg:VNx2QI 134 [ _1 ])) "rvv.c":9:22 571 {*movvnx2qi_fract} +;; (nil)) +;; When the value of pseudo register 134 of the insn above is discovered already +;; spilled in the memory during LRA. +;; LRA will reload this pattern into a memory load instruction pattern. +;; Because VNx2QI is a fractional vector, we want LRA reload this pattern into +;; (insn 20 19 9 2 (parallel [ +;; (set (reg:VNx2QI 98 v2 [orig:134 _1 ] [134]) +;; (mem/c:VNx2QI (reg:SI 13 a3 [155]) [1 %sfp+[-2, -2] S[2, 2] A8])) +;; (clobber (reg:SI 14 a4 [149]))]) +;; So that we could be able to emit vsetvl instruction using clobber sratch a4. +;; To let LRA generate the expected pattern, we should exclude fractional vector +;; load/store in "*mov<mode>_whole". Otherwise, it will reload this pattern into: +;; (insn 20 19 9 2 (set (reg:VNx2QI 98 v2 [orig:134 _1 ] [134]) +;; (mem/c:VNx2QI (reg:SI 13 a3 [155]) [1 %sfp+[-2, -2] S[2, 2] A8]))) +;; which is not the pattern we want. +;; According the facts above, we make "*mov<mode>_whole" includes load/store/move for whole +;; vector modes according to '-march' and "*mov<mode>_fract" only include fractional vector modes. +(define_insn "*mov<mode>_whole" + [(set (match_operand:V_WHOLE 0 "reg_or_mem_operand" "=vr, m,vr") + (match_operand:V_WHOLE 1 "reg_or_mem_operand" " m,vr,vr"))] + "TARGET_VECTOR" "@ vl%m1re<sew>.v\t%0,%1 vs%m1r.v\t%1,%0 @@ -74,18 +126,26 @@ [(set_attr "type" "vldr,vstr,vmov") (set_attr "mode" "<MODE>")]) +(define_insn "*mov<mode>_fract" + [(set (match_operand:V_FRACT 0 "register_operand" "=vr") + (match_operand:V_FRACT 1 "register_operand" " vr"))] + "TARGET_VECTOR" + "vmv1r.v\t%0,%1" + [(set_attr "type" "vmov") + (set_attr "mode" "<MODE>")]) + (define_expand "mov<mode>" [(set (match_operand:VB 0 "reg_or_mem_operand") (match_operand:VB 1 "vector_move_operand"))] "TARGET_VECTOR" { if (riscv_vector::legitimize_move (operands[0], operands[1], <MODE>mode)) - DONE; + DONE; }) (define_insn "*mov<mode>" [(set (match_operand:VB 0 "register_operand" "=vr") - (match_operand:VB 1 "register_operand" "vr"))] + (match_operand:VB 1 "register_operand" " vr"))] "TARGET_VECTOR" "vmv1r.v\t%0,%1" [(set_attr "type" "vmov") @@ -290,18 +350,18 @@ ;; (const_int:QI N)]), -15 <= N < 16. ;; 2. (const_vector:VNx1SF repeat [ ;; (const_double:SF 0.0 [0x0.0p+0])]). -(define_insn "@pred_mov<mode>" +(define_insn_and_split "@pred_mov<mode>" [(set (match_operand:V 0 "nonimmediate_operand" "=vd, vr, m, vr, vr") - (if_then_else:V - (unspec:<VM> - [(match_operand:<VM> 1 "vector_mask_operand" " vm, Wc1, vmWc1, vmWc1, Wc1") - (match_operand 4 "vector_length_operand" " rK, rK, rK, rK, rK") - (match_operand 5 "const_int_operand" " i, i, i, i, i") - (match_operand 6 "const_int_operand" " i, i, i, i, i") - (reg:SI VL_REGNUM) - (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) - (match_operand:V 3 "vector_move_operand" " m, m, vr, vr, viWc0") - (match_operand:V 2 "vector_merge_operand" " 0, vu, 0, vu0, vu0")))] + (if_then_else:V + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" " vm, Wc1, vmWc1, Wc1, Wc1") + (match_operand 4 "vector_length_operand" " rK, rK, rK, rK, rK") + (match_operand 5 "const_int_operand" " i, i, i, i, i") + (match_operand 6 "const_int_operand" " i, i, i, i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (match_operand:V 3 "vector_move_operand" " m, m, vr, vr, viWc0") + (match_operand:V 2 "vector_merge_operand" " 0, vu, vu0, vu0, vu0")))] "TARGET_VECTOR" "@ vle<sew>.v\t%0,%3%p1 @@ -309,31 +369,41 @@ vse<sew>.v\t%3,%0%p1 vmv.v.v\t%0,%3 vmv.v.i\t%0,v%3" + "&& register_operand (operands[0], <MODE>mode) + && register_operand (operands[3], <MODE>mode) + && satisfies_constraint_vu (operands[2])" + [(set (match_dup 0) (match_dup 3))] + "" [(set_attr "type" "vlde,vlde,vste,vimov,vimov") (set_attr "mode" "<MODE>")]) ;; vlm.v/vsm.v/vmclr.m/vmset.m. ;; constraint alternative 0 match vlm.v. -;; constraint alternative 2 match vsm.v. +;; constraint alternative 1 match vsm.v. ;; constraint alternative 3 match vmclr.m. ;; constraint alternative 4 match vmset.m. -(define_insn "@pred_mov<mode>" - [(set (match_operand:VB 0 "nonimmediate_operand" "=vr, m, vr, vr") - (if_then_else:VB - (unspec:VB - [(match_operand:VB 1 "vector_mask_operand" "Wc1, Wc1, Wc1, Wc1") - (match_operand 4 "vector_length_operand" " rK, rK, rK, rK") - (match_operand 5 "const_int_operand" " i, i, i, i") - (match_operand 6 "const_int_operand" " i, i, i, i") - (reg:SI VL_REGNUM) - (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) - (match_operand:VB 3 "vector_move_operand" " m, vr, Wc0, Wc1") - (match_operand:VB 2 "vector_merge_operand" " vu, 0, vu, vu")))] +(define_insn_and_split "@pred_mov<mode>" + [(set (match_operand:VB 0 "nonimmediate_operand" "=vr, m, vr, vr, vr") + (if_then_else:VB + (unspec:VB + [(match_operand:VB 1 "vector_mask_operand" "Wc1, Wc1, Wc1, Wc1, Wc1") + (match_operand 4 "vector_length_operand" " rK, rK, rK, rK, rK") + (match_operand 5 "const_int_operand" " i, i, i, i, i") + (match_operand 6 "const_int_operand" " i, i, i, i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (match_operand:VB 3 "vector_move_operand" " m, vr, vr, Wc0, Wc1") + (match_operand:VB 2 "vector_merge_operand" " vu, vu0, vu, vu, vu")))] "TARGET_VECTOR" "@ vlm.v\t%0,%3 vsm.v\t%3,%0 + # vmclr.m\t%0 vmset.m\t%0" - [(set_attr "type" "vldm,vstm,vmalu,vmalu") + "&& register_operand (operands[0], <MODE>mode) + && register_operand (operands[3], <MODE>mode)" + [(set (match_dup 0) (match_dup 3))] + "" + [(set_attr "type" "vldm,vstm,vimov,vmalu,vmalu") (set_attr "mode" "<MODE>")]) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/macro.h b/gcc/testsuite/gcc.target/riscv/rvv/base/macro.h new file mode 100644 index 00000000000..a032ac38f5a --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/macro.h @@ -0,0 +1,6 @@ +#define exhaust_vector_regs() \ + asm volatile("#" :: \ + : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \ + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", \ + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", \ + "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-9.c b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-9.c index 7ed10bc5833..ae672824685 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/mov-9.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mov-9.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3" } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3 -fno-schedule-insns -fno-schedule-insns2 " } */ /* { dg-final { check-function-bodies "**" "" } } */ #include <riscv_vector.h> @@ -7,12 +7,12 @@ /* Test tieable of RVV types with same LMUL. */ /* ** mov1: -** addi\t(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),1 ** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf2,\s*t[au],\s*m[au] -** addi\t(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),2 ** vle8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) ** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** addi\t(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),1 ** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** addi\t(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),2 ** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) ** ret */ @@ -28,10 +28,10 @@ void mov1 (int8_t *in, int8_t *out, int M) /* ** mov2: -** addi\t(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),1 ** vsetvli\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*zero,\s*e8,\s*mf4,\s*t[au],\s*m[au] ** vle8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) ** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) +** addi\t(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),1 ** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\((?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])\) ** ret */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-1.c new file mode 100644 index 00000000000..b1220c48f1b --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-1.c @@ -0,0 +1,385 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32 -mpreferred-stack-boundary=3 -O3 -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "riscv_vector.h" +#include "macro.h" + +/* +** spill_1: +** csrr\tt0,vlenb +** sub\tsp,sp,t0 +** ... +** csrr\ta2,vlenb +** srli\ta2,a2,3 +** slli\ta3,a2,3 +** sub\ta3,a3,a2 +** add\ta3,a3,sp +** vse8.v\tv24,0\(a3\) +** ... +** csrr\ta2,vlenb +** srli\ta2,a2,3 +** slli\ta3,a2,3 +** sub\ta3,a3,a2 +** add\ta3,a3,sp +** vle8.v\tv24,0\(a3\) +** vse8.v\tv24,0\(a1\) +** csrr\tt0,vlenb +** add\tsp,sp,t0 +** ... +** jr\tra +*/ +void +spill_1 (int8_t *in, int8_t *out) +{ + vint8mf8_t v1 = *(vint8mf8_t*)in; + exhaust_vector_regs (); + *(vint8mf8_t*)out = v1; +} + +/* +** spill_2: +** csrr\tt0,vlenb +** sub\tsp,sp,t0 +** vsetvli\ta5,zero,e8,mf4,ta,ma +** vle8.v\tv24,0\(a0\) +** csrr\ta2,vlenb +** srli\ta2,a2,2 +** slli\ta3,a2,2 +** sub\ta3,a3,a2 +** add\ta3,a3,sp +** vse8.v\tv24,0\(a3\) +** ... +** csrr\ta2,vlenb +** srli\ta2,a2,2 +** slli\ta3,a2,2 +** sub\ta3,a3,a2 +** add\ta3,a3,sp +** vle8.v\tv24,0\(a3\) +** vse8.v\tv24,0\(a1\) +** csrr\tt0,vlenb +** add\tsp,sp,t0 +** ... +** jr\tra +*/ +void +spill_2 (int8_t *in, int8_t *out) +{ + vint8mf4_t v1 = *(vint8mf4_t*)in; + exhaust_vector_regs (); + *(vint8mf4_t*)out = v1; +} + +/* +** spill_3: +** csrr\tt0,vlenb +** sub\tsp,sp,t0 +** vsetvli\ta5,zero,e8,mf2,ta,ma +** vle8.v\tv24,0\(a0\) +** csrr\ta3,vlenb +** srli\ta3,a3,1 +** add\ta3,a3,sp +** vse8.v\tv24,0\(a3\) +** ... +** csrr\ta3,vlenb +** srli\ta3,a3,1 +** add\ta3,a3,sp +** vle8.v\tv24,0\(a3\) +** vse8.v\tv24,0\(a1\) +** csrr\tt0,vlenb +** add\tsp,sp,t0 +** ... +** jr\tra +*/ +void +spill_3 (int8_t *in, int8_t *out) +{ + vint8mf2_t v1 = *(vint8mf2_t*)in; + exhaust_vector_regs (); + *(vint8mf2_t*)out = v1; +} + +/* +** spill_4: +** csrr\tt0,vlenb +** sub\tsp,sp,t0 +** ... +** vs1r.v\tv24,0\(sp\) +** ... +** vl1re8.v\tv2,0\(sp\) +** vs1r.v\tv2,0\(a1\) +** ... +** jr\tra +*/ +void +spill_4 (int8_t *in, int8_t *out) +{ + register vint8m1_t v1 asm("v1") = *(vint8m1_t*)in; + asm volatile ("# %0"::"vr"(v1)); + exhaust_vector_regs (); + register vint8m1_t v2 asm("v2") = v1; + *(vint8m1_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** spill_5: +** csrr\tt0,vlenb +** slli\tt1,t0,1 +** sub\tsp,sp,t1 +** ... +** vs2r.v\tv24,0\(sp\) +** ... +** vl2re8.v\tv4,0\(sp\) +** vs2r.v\tv4,0\(a1\) +** ... +** jr\tra +*/ +void +spill_5 (int8_t *in, int8_t *out) +{ + register vint8m2_t v2 asm("v2") = *(vint8m2_t*)in; + asm volatile ("# %0"::"vr"(v2)); + exhaust_vector_regs (); + register vint8m2_t v4 asm("v4") = v2; + *(vint8m2_t*)out = v4; + asm volatile ("# %0"::"vr"(v4)); +} + +/* +** spill_6: +** csrr\tt0,vlenb +** slli\tt1,t0,2 +** sub\tsp,sp,t1 +** ... +** vs4r.v\tv24,0\(sp\) +** ... +** vl4re8.v\tv8,0\(sp\) +** vs4r.v\tv8,0\(a1\) +** ... +** jr\tra +*/ +void +spill_6 (int8_t *in, int8_t *out) +{ + register vint8m4_t v4 asm("v4") = *(vint8m4_t*)in; + asm volatile ("# %0"::"vr"(v4)); + exhaust_vector_regs (); + register vint8m4_t v8 asm("v8") = v4; + *(vint8m4_t*)out = v8; + asm volatile ("# %0"::"vr"(v8)); +} + +/* +** spill_7: +** csrr\tt0,vlenb +** slli\tt1,t0,3 +** sub\tsp,sp,t1 +** ... +** vs8r.v\tv24,0\(sp\) +** ... +** vl8re8.v\tv16,0\(sp\) +** vs8r.v\tv16,0\(a1\) +** ... +** jr\tra +*/ +void +spill_7 (int8_t *in, int8_t *out) +{ + register vint8m8_t v8 asm("v8") = *(vint8m8_t*)in; + asm volatile ("# %0"::"vr"(v8)); + exhaust_vector_regs (); + register vint8m8_t v16 asm("v16") = v8; + *(vint8m8_t*)out = v16; + asm volatile ("# %0"::"vr"(v16)); +} + +/* +** spill_8: +** csrr\tt0,vlenb +** sub\tsp,sp,t0 +** vsetvli\ta5,zero,e8,mf8,ta,ma +** vle8.v\tv24,0\(a0\) +** csrr\ta2,vlenb +** srli\ta2,a2,3 +** slli\ta3,a2,3 +** sub\ta3,a3,a2 +** add\ta3,a3,sp +** vse8.v\tv24,0\(a3\) +** ... +** csrr\ta2,vlenb +** srli\ta2,a2,3 +** slli\ta3,a2,3 +** sub\ta3,a3,a2 +** add\ta3,a3,sp +** vle8.v\tv24,0\(a3\) +** vse8.v\tv24,0\(a1\) +** csrr\tt0,vlenb +** add\tsp,sp,t0 +** ... +** jr\tra +*/ +void +spill_8 (uint8_t *in, uint8_t *out) +{ + vuint8mf8_t v1 = *(vuint8mf8_t*)in; + exhaust_vector_regs (); + *(vuint8mf8_t*)out = v1; +} + +/* +** spill_9: +** csrr\tt0,vlenb +** sub\tsp,sp,t0 +** vsetvli\ta5,zero,e8,mf4,ta,ma +** vle8.v\tv24,0\(a0\) +** csrr\ta2,vlenb +** srli\ta2,a2,2 +** slli\ta3,a2,2 +** sub\ta3,a3,a2 +** add\ta3,a3,sp +** vse8.v\tv24,0\(a3\) +** ... +** csrr\ta2,vlenb +** srli\ta2,a2,2 +** slli\ta3,a2,2 +** sub\ta3,a3,a2 +** add\ta3,a3,sp +** vle8.v\tv24,0\(a3\) +** vse8.v\tv24,0\(a1\) +** csrr\tt0,vlenb +** add\tsp,sp,t0 +** ... +** jr\tra +*/ +void +spill_9 (uint8_t *in, uint8_t *out) +{ + vuint8mf4_t v1 = *(vuint8mf4_t*)in; + exhaust_vector_regs (); + *(vuint8mf4_t*)out = v1; +} + +/* +** spill_10: +** csrr\tt0,vlenb +** sub\tsp,sp,t0 +** vsetvli\ta5,zero,e8,mf2,ta,ma +** vle8.v\tv24,0\(a0\) +** csrr\ta3,vlenb +** srli\ta3,a3,1 +** add\ta3,a3,sp +** vse8.v\tv24,0\(a3\) +** ... +** csrr\ta3,vlenb +** srli\ta3,a3,1 +** add\ta3,a3,sp +** vle8.v\tv24,0\(a3\) +** vse8.v\tv24,0\(a1\) +** csrr\tt0,vlenb +** add\tsp,sp,t0 +** ... +** jr\tra +*/ +void +spill_10 (uint8_t *in, uint8_t *out) +{ + vuint8mf2_t v1 = *(vuint8mf2_t*)in; + exhaust_vector_regs (); + *(vuint8mf2_t*)out = v1; +} + +/* +** spill_11: +** csrr\tt0,vlenb +** sub\tsp,sp,t0 +** ... +** vs1r.v\tv24,0\(sp\) +** ... +** vl1re8.v\tv2,0\(sp\) +** vs1r.v\tv2,0\(a1\) +** ... +** jr\tra +*/ +void +spill_11 (uint8_t *in, uint8_t *out) +{ + register vuint8m1_t v1 asm("v1") = *(vuint8m1_t*)in; + asm volatile ("# %0"::"vr"(v1)); + exhaust_vector_regs (); + register vuint8m1_t v2 asm("v2") = v1; + *(vuint8m1_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** spill_12: +** csrr\tt0,vlenb +** slli\tt1,t0,1 +** sub\tsp,sp,t1 +** ... +** vs2r.v\tv24,0\(sp\) +** ... +** vl2re8.v\tv4,0\(sp\) +** vs2r.v\tv4,0\(a1\) +** ... +** jr\tra +*/ +void +spill_12 (uint8_t *in, uint8_t *out) +{ + register vuint8m2_t v2 asm("v2") = *(vuint8m2_t*)in; + asm volatile ("# %0"::"vr"(v2)); + exhaust_vector_regs (); + register vuint8m2_t v4 asm("v4") = v2; + *(vuint8m2_t*)out = v4; + asm volatile ("# %0"::"vr"(v4)); +} + +/* +** spill_13: +** csrr\tt0,vlenb +** slli\tt1,t0,2 +** sub\tsp,sp,t1 +** ... +** vs4r.v\tv24,0\(sp\) +** ... +** vl4re8.v\tv8,0\(sp\) +** vs4r.v\tv8,0\(a1\) +** ... +** jr\tra +*/ +void +spill_13 (uint8_t *in, uint8_t *out) +{ + register vuint8m4_t v4 asm("v4") = *(vuint8m4_t*)in; + asm volatile ("# %0"::"vr"(v4)); + exhaust_vector_regs (); + register vuint8m4_t v8 asm("v8") = v4; + *(vuint8m4_t*)out = v8; + asm volatile ("# %0"::"vr"(v8)); +} + +/* +** spill_14: +** csrr\tt0,vlenb +** slli\tt1,t0,3 +** sub\tsp,sp,t1 +** ... +** vs8r.v\tv24,0\(sp\) +** ... +** vl8re8.v\tv16,0\(sp\) +** vs8r.v\tv16,0\(a1\) +** ... +** jr\tra +*/ +void +spill_14 (uint8_t *in, uint8_t *out) +{ + register vuint8m8_t v8 asm("v8") = *(vuint8m8_t*)in; + asm volatile ("# %0"::"vr"(v8)); + exhaust_vector_regs (); + register vuint8m8_t v16 asm("v16") = v8; + *(vuint8m8_t*)out = v16; + asm volatile ("# %0"::"vr"(v16)); +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-10.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-10.c new file mode 100644 index 00000000000..d37857e24ab --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-10.c @@ -0,0 +1,41 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32 -mpreferred-stack-boundary=3 -fno-schedule-insns -fno-schedule-insns2 -O3" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "riscv_vector.h" + +void f (char*); + +/* +** stach_check_alloca_1: +** addi\tsp,sp,-32 +** sw\tra,4\(sp\) +** sw\ts0,0\(sp\) +** addi\ts0,sp,8 +** csrr\tt0,vlenb +** sub\tsp,sp,t0 +** ... +** addi\ta2,a2,15 +** andi\ta2,a2,-8 +** sub\tsp,sp,a2 +** ... +** lw\tra,4\(sp\) +** lw\ts0,0\(sp\) +** addi\tsp,sp,32 +** jr\tra +*/ +void stach_check_alloca_1 (vuint8m1_t data, uint8_t *base, int y, ...) +{ + vuint8m8_t v0, v8, v16, v24; + asm volatile ("nop" + : "=vr" (v0), "=vr" (v8), "=vr" (v16), "=vr" (v24) + : + :); + asm volatile ("nop" + : + : "vr" (v0), "vr" (v8), "vr" (v16), "vr" (v24) + :); + *(vuint8m1_t *)base = data; + char* pStr = (char*)__builtin_alloca(y); + f(pStr); +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-11.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-11.c new file mode 100644 index 00000000000..c2f68b86d90 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-11.c @@ -0,0 +1,60 @@ +/* { dg-do compile } */ +/* { dg-options "-msave-restore -march=rv32gcv -mabi=ilp32 -msave-restore -fno-schedule-insns -fno-schedule-insns2 -O3" } */ +/* { dg-final { check-function-bodies "**" "" } } */ +#include "riscv_vector.h" + +void fn2 (float a1, float a2, float a3, float a4, + float a5, float a6, float a7, float a8); +void fn3 (char*); + +/* +** stack_save_restore_2: +** call\tt0,__riscv_save_2 +** csrr\tt0,vlenb +** slli\tt1,t0,1 +** sub\tsp,sp,t1 +** li\tt0,-8192 +** addi\tt0,t0,192 +** add\tsp,sp,t0 +** ... +** csrr\tt0,vlenb +** slli\tt1,t0,1 +** add\tsp,sp,t1 +** li\tt0,8192 +** addi\tt0,t0,-208 +** add\tsp,sp,t0 +** addi\tsp,sp,16 +** tail\t__riscv_restore_2 +*/ +int stack_save_restore_2 (float a1, float a2, float a3, float a4, + float a5, float a6, float a7, float a8, + vuint8m1_t data, uint8_t *base) +{ + char d[8000]; + float f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13; + asm volatile ("nop" + : "=f" (f1), "=f" (f2), "=f" (f3), "=f" (f4), "=f" (f5), "=f" (f6), + "=f" (f7), "=f" (f8), "=f" (f9), "=f" (f10), "=f" (f11), + "=f" (f12), "=f" (f13) + : + :); + asm volatile ("nop" + : + : "f" (f1), "f" (f2), "f" (f3), "f" (f4), "f" (f5), "f" (f6), + "f" (f7), "f" (f8), "f" (f9), "f" (f10), "f" (f11), + "f" (f12), "f" (f13) + :); + vuint8m8_t v0, v8, v16, v24; + asm volatile ("nop" + : "=vr" (v0), "=vr" (v8), "=vr" (v16), "=vr" (v24) + : + :); + asm volatile ("nop" + : + : "vr" (v0), "vr" (v8), "vr" (v16), "vr" (v24) + :); + *(vuint8m1_t *)base = data; + fn2 (a1, a2, a3, a4, a5, a6, a7, a8); + fn3(d); + return 0; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-12.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-12.c new file mode 100644 index 00000000000..de6e0604a3c --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-12.c @@ -0,0 +1,47 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32 -msave-restore -fno-schedule-insns -fno-schedule-insns2 -O3" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + + +void fn2 (float a1, float a2, float a3, float a4, + float a5, float a6, float a7, float a8); +void fn3 (char*); + + +/* +** stack_save_restore_1: +** call\tt0,__riscv_save_0 +** li\tt0,-8192 +** addi\tt0,t0,192 +** add\tsp,sp,t0 +** ... +** li\ta0,-8192 +** addi\ta0,a0,192 +** li\ta5,8192 +** addi\ta5,a5,-192 +** add\ta5,a5,a0 +** add\ta0,a5,sp +** ... +** tail\t__riscv_restore_0 +*/ +int stack_save_restore_1 (float a1, float a2, float a3, float a4, + float a5, float a6, float a7, float a8) +{ + char d[8000]; + float f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13; + asm volatile ("nop" + : "=f" (f1), "=f" (f2), "=f" (f3), "=f" (f4), "=f" (f5), "=f" (f6), + "=f" (f7), "=f" (f8), "=f" (f9), "=f" (f10), "=f" (f11), + "=f" (f12), "=f" (f13) + : + :); + asm volatile ("nop" + : + : "f" (f1), "f" (f2), "f" (f3), "f" (f4), "f" (f5), "f" (f6), + "f" (f7), "f" (f8), "f" (f9), "f" (f10), "f" (f11), + "f" (f12), "f" (f13) + :); + fn2 (a1, a2, a3, a4, a5, a6, a7, a8); + fn3(d); + return 0; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-2.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-2.c new file mode 100644 index 00000000000..ca1904b830d --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-2.c @@ -0,0 +1,320 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32 -mpreferred-stack-boundary=3 -O3 -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "riscv_vector.h" +#include "macro.h" + +/* +** spill_2: +** csrr\tt0,vlenb +** sub\tsp,sp,t0 +** vsetvli\ta5,zero,e16,mf4,ta,ma +** vle16.v\tv24,0\(a0\) +** csrr\ta2,vlenb +** srli\ta2,a2,2 +** slli\ta3,a2,2 +** sub\ta3,a3,a2 +** add\ta3,a3,sp +** vse16.v\tv24,0\(a3\) +** ... +** csrr\ta2,vlenb +** srli\ta2,a2,2 +** slli\ta3,a2,2 +** sub\ta3,a3,a2 +** add\ta3,a3,sp +** vle16.v\tv24,0\(a3\) +** vse16.v\tv24,0\(a1\) +** csrr\tt0,vlenb +** add\tsp,sp,t0 +** ... +** jr\tra +*/ +void +spill_2 (int16_t *in, int16_t *out) +{ + vint16mf4_t v1 = *(vint16mf4_t*)in; + exhaust_vector_regs (); + *(vint16mf4_t*)out = v1; +} + +/* +** spill_3: +** csrr\tt0,vlenb +** sub\tsp,sp,t0 +** vsetvli\ta5,zero,e16,mf2,ta,ma +** vle16.v\tv24,0\(a0\) +** csrr\ta3,vlenb +** srli\ta3,a3,1 +** add\ta3,a3,sp +** vse16.v\tv24,0\(a3\) +** ... +** csrr\ta3,vlenb +** srli\ta3,a3,1 +** add\ta3,a3,sp +** vle16.v\tv24,0\(a3\) +** vse16.v\tv24,0\(a1\) +** csrr\tt0,vlenb +** add\tsp,sp,t0 +** ... +** jr\tra +*/ +void +spill_3 (int16_t *in, int16_t *out) +{ + vint16mf2_t v1 = *(vint16mf2_t*)in; + exhaust_vector_regs (); + *(vint16mf2_t*)out = v1; +} + +/* +** spill_4: +** csrr\tt0,vlenb +** sub\tsp,sp,t0 +** ... +** vs1r.v\tv24,0\(sp\) +** ... +** vl1re16.v\tv2,0\(sp\) +** vs1r.v\tv2,0\(a1\) +** ... +** jr\tra +*/ +void +spill_4 (int16_t *in, int16_t *out) +{ + register vint16m1_t v1 asm("v1") = *(vint16m1_t*)in; + asm volatile ("# %0"::"vr"(v1)); + exhaust_vector_regs (); + register vint16m1_t v2 asm("v2") = v1; + *(vint16m1_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** spill_5: +** csrr\tt0,vlenb +** slli\tt1,t0,1 +** sub\tsp,sp,t1 +** ... +** vs2r.v\tv24,0\(sp\) +** ... +** vl2re16.v\tv4,0\(sp\) +** vs2r.v\tv4,0\(a1\) +** ... +** jr\tra +*/ +void +spill_5 (int16_t *in, int16_t *out) +{ + register vint16m2_t v2 asm("v2") = *(vint16m2_t*)in; + asm volatile ("# %0"::"vr"(v2)); + exhaust_vector_regs (); + register vint16m2_t v4 asm("v4") = v2; + *(vint16m2_t*)out = v4; + asm volatile ("# %0"::"vr"(v4)); +} + +/* +** spill_6: +** csrr\tt0,vlenb +** slli\tt1,t0,2 +** sub\tsp,sp,t1 +** ... +** vs4r.v\tv24,0\(sp\) +** ... +** vl4re16.v\tv8,0\(sp\) +** vs4r.v\tv8,0\(a1\) +** ... +** jr\tra +*/ +void +spill_6 (int16_t *in, int16_t *out) +{ + register vint16m4_t v4 asm("v4") = *(vint16m4_t*)in; + asm volatile ("# %0"::"vr"(v4)); + exhaust_vector_regs (); + register vint16m4_t v8 asm("v8") = v4; + *(vint16m4_t*)out = v8; + asm volatile ("# %0"::"vr"(v8)); +} + +/* +** spill_7: +** csrr\tt0,vlenb +** slli\tt1,t0,3 +** sub\tsp,sp,t1 +** ... +** vs8r.v\tv24,0\(sp\) +** ... +** vl8re16.v\tv16,0\(sp\) +** vs8r.v\tv16,0\(a1\) +** ... +** jr\tra +*/ +void +spill_7 (int16_t *in, int16_t *out) +{ + register vint16m8_t v8 asm("v8") = *(vint16m8_t*)in; + asm volatile ("# %0"::"vr"(v8)); + exhaust_vector_regs (); + register vint16m8_t v16 asm("v16") = v8; + *(vint16m8_t*)out = v16; + asm volatile ("# %0"::"vr"(v16)); +} + +/* +** spill_9: +** csrr\tt0,vlenb +** sub\tsp,sp,t0 +** vsetvli\ta5,zero,e16,mf4,ta,ma +** vle16.v\tv24,0\(a0\) +** csrr\ta2,vlenb +** srli\ta2,a2,2 +** slli\ta3,a2,2 +** sub\ta3,a3,a2 +** add\ta3,a3,sp +** vse16.v\tv24,0\(a3\) +** ... +** csrr\ta2,vlenb +** srli\ta2,a2,2 +** slli\ta3,a2,2 +** sub\ta3,a3,a2 +** add\ta3,a3,sp +** vle16.v\tv24,0\(a3\) +** vse16.v\tv24,0\(a1\) +** csrr\tt0,vlenb +** add\tsp,sp,t0 +** ... +** jr\tra +*/ +void +spill_9 (uint16_t *in, uint16_t *out) +{ + vuint16mf4_t v1 = *(vuint16mf4_t*)in; + exhaust_vector_regs (); + *(vuint16mf4_t*)out = v1; +} + +/* +** spill_10: +** csrr\tt0,vlenb +** sub\tsp,sp,t0 +** vsetvli\ta5,zero,e16,mf2,ta,ma +** vle16.v\tv24,0\(a0\) +** csrr\ta3,vlenb +** srli\ta3,a3,1 +** add\ta3,a3,sp +** vse16.v\tv24,0\(a3\) +** ... +** csrr\ta3,vlenb +** srli\ta3,a3,1 +** add\ta3,a3,sp +** vle16.v\tv24,0\(a3\) +** vse16.v\tv24,0\(a1\) +** csrr\tt0,vlenb +** add\tsp,sp,t0 +** ... +** jr\tra +*/ +void +spill_10 (uint16_t *in, uint16_t *out) +{ + vuint16mf2_t v1 = *(vuint16mf2_t*)in; + exhaust_vector_regs (); + *(vuint16mf2_t*)out = v1; +} + +/* +** spill_11: +** csrr\tt0,vlenb +** sub\tsp,sp,t0 +** ... +** vs1r.v\tv24,0\(sp\) +** ... +** vl1re16.v\tv2,0\(sp\) +** vs1r.v\tv2,0\(a1\) +** ... +** jr\tra +*/ +void +spill_11 (uint16_t *in, uint16_t *out) +{ + register vuint16m1_t v1 asm("v1") = *(vuint16m1_t*)in; + asm volatile ("# %0"::"vr"(v1)); + exhaust_vector_regs (); + register vuint16m1_t v2 asm("v2") = v1; + *(vuint16m1_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** spill_12: +** csrr\tt0,vlenb +** slli\tt1,t0,1 +** sub\tsp,sp,t1 +** ... +** vs2r.v\tv24,0\(sp\) +** ... +** vl2re16.v\tv4,0\(sp\) +** vs2r.v\tv4,0\(a1\) +** ... +** jr\tra +*/ +void +spill_12 (uint16_t *in, uint16_t *out) +{ + register vuint16m2_t v2 asm("v2") = *(vuint16m2_t*)in; + asm volatile ("# %0"::"vr"(v2)); + exhaust_vector_regs (); + register vuint16m2_t v4 asm("v4") = v2; + *(vuint16m2_t*)out = v4; + asm volatile ("# %0"::"vr"(v4)); +} + +/* +** spill_13: +** csrr\tt0,vlenb +** slli\tt1,t0,2 +** sub\tsp,sp,t1 +** ... +** vs4r.v\tv24,0\(sp\) +** ... +** vl4re16.v\tv8,0\(sp\) +** vs4r.v\tv8,0\(a1\) +** ... +** jr\tra +*/ +void +spill_13 (uint16_t *in, uint16_t *out) +{ + register vuint16m4_t v4 asm("v4") = *(vuint16m4_t*)in; + asm volatile ("# %0"::"vr"(v4)); + exhaust_vector_regs (); + register vuint16m4_t v8 asm("v8") = v4; + *(vuint16m4_t*)out = v8; + asm volatile ("# %0"::"vr"(v8)); +} + +/* +** spill_14: +** csrr\tt0,vlenb +** slli\tt1,t0,3 +** sub\tsp,sp,t1 +** ... +** vs8r.v\tv24,0\(sp\) +** ... +** vl8re16.v\tv16,0\(sp\) +** vs8r.v\tv16,0\(a1\) +** ... +** jr\tra +*/ +void +spill_14 (uint16_t *in, uint16_t *out) +{ + register vuint16m8_t v8 asm("v8") = *(vuint16m8_t*)in; + asm volatile ("# %0"::"vr"(v8)); + exhaust_vector_regs (); + register vuint16m8_t v16 asm("v16") = v8; + *(vuint16m8_t*)out = v16; + asm volatile ("# %0"::"vr"(v16)); +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-3.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-3.c new file mode 100644 index 00000000000..2039ca34516 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-3.c @@ -0,0 +1,254 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32 -mpreferred-stack-boundary=3 -O3 -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "riscv_vector.h" +#include "macro.h" + +/* +** spill_3: +** csrr\tt0,vlenb +** sub\tsp,sp,t0 +** vsetvli\ta5,zero,e32,mf2,ta,ma +** vle32.v\tv24,0\(a0\) +** csrr\ta3,vlenb +** srli\ta3,a3,1 +** add\ta3,a3,sp +** vse32.v\tv24,0\(a3\) +** ... +** csrr\ta3,vlenb +** srli\ta3,a3,1 +** add\ta3,a3,sp +** vle32.v\tv24,0\(a3\) +** vse32.v\tv24,0\(a1\) +** csrr\tt0,vlenb +** add\tsp,sp,t0 +** ... +** jr\tra +*/ +void +spill_3 (int32_t *in, int32_t *out) +{ + vint32mf2_t v1 = *(vint32mf2_t*)in; + exhaust_vector_regs (); + *(vint32mf2_t*)out = v1; +} + +/* +** spill_4: +** csrr\tt0,vlenb +** sub\tsp,sp,t0 +** ... +** vs1r.v\tv24,0\(sp\) +** ... +** vl1re32.v\tv2,0\(sp\) +** vs1r.v\tv2,0\(a1\) +** ... +** jr\tra +*/ +void +spill_4 (int32_t *in, int32_t *out) +{ + register vint32m1_t v1 asm("v1") = *(vint32m1_t*)in; + asm volatile ("# %0"::"vr"(v1)); + exhaust_vector_regs (); + register vint32m1_t v2 asm("v2") = v1; + *(vint32m1_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** spill_5: +** csrr\tt0,vlenb +** slli\tt1,t0,1 +** sub\tsp,sp,t1 +** ... +** vs2r.v\tv24,0\(sp\) +** ... +** vl2re32.v\tv4,0\(sp\) +** vs2r.v\tv4,0\(a1\) +** ... +** jr\tra +*/ +void +spill_5 (int32_t *in, int32_t *out) +{ + register vint32m2_t v2 asm("v2") = *(vint32m2_t*)in; + asm volatile ("# %0"::"vr"(v2)); + exhaust_vector_regs (); + register vint32m2_t v4 asm("v4") = v2; + *(vint32m2_t*)out = v4; + asm volatile ("# %0"::"vr"(v4)); +} + +/* +** spill_6: +** csrr\tt0,vlenb +** slli\tt1,t0,2 +** sub\tsp,sp,t1 +** ... +** vs4r.v\tv24,0\(sp\) +** ... +** vl4re32.v\tv8,0\(sp\) +** vs4r.v\tv8,0\(a1\) +** ... +** jr\tra +*/ +void +spill_6 (int32_t *in, int32_t *out) +{ + register vint32m4_t v4 asm("v4") = *(vint32m4_t*)in; + asm volatile ("# %0"::"vr"(v4)); + exhaust_vector_regs (); + register vint32m4_t v8 asm("v8") = v4; + *(vint32m4_t*)out = v8; + asm volatile ("# %0"::"vr"(v8)); +} + +/* +** spill_7: +** csrr\tt0,vlenb +** slli\tt1,t0,3 +** sub\tsp,sp,t1 +** ... +** vs8r.v\tv24,0\(sp\) +** ... +** vl8re32.v\tv16,0\(sp\) +** vs8r.v\tv16,0\(a1\) +** ... +** jr\tra +*/ +void +spill_7 (int32_t *in, int32_t *out) +{ + register vint32m8_t v8 asm("v8") = *(vint32m8_t*)in; + asm volatile ("# %0"::"vr"(v8)); + exhaust_vector_regs (); + register vint32m8_t v16 asm("v16") = v8; + *(vint32m8_t*)out = v16; + asm volatile ("# %0"::"vr"(v16)); +} + +/* +** spill_10: +** csrr\tt0,vlenb +** sub\tsp,sp,t0 +** vsetvli\ta5,zero,e32,mf2,ta,ma +** vle32.v\tv24,0\(a0\) +** csrr\ta3,vlenb +** srli\ta3,a3,1 +** add\ta3,a3,sp +** vse32.v\tv24,0\(a3\) +** ... +** csrr\ta3,vlenb +** srli\ta3,a3,1 +** add\ta3,a3,sp +** vle32.v\tv24,0\(a3\) +** vse32.v\tv24,0\(a1\) +** csrr\tt0,vlenb +** add\tsp,sp,t0 +** ... +** jr\tra +*/ +void +spill_10 (uint32_t *in, uint32_t *out) +{ + vuint32mf2_t v1 = *(vuint32mf2_t*)in; + exhaust_vector_regs (); + *(vuint32mf2_t*)out = v1; +} + +/* +** spill_11: +** csrr\tt0,vlenb +** sub\tsp,sp,t0 +** ... +** vs1r.v\tv24,0\(sp\) +** ... +** vl1re32.v\tv2,0\(sp\) +** vs1r.v\tv2,0\(a1\) +** ... +** jr\tra +*/ +void +spill_11 (uint32_t *in, uint32_t *out) +{ + register vuint32m1_t v1 asm("v1") = *(vuint32m1_t*)in; + asm volatile ("# %0"::"vr"(v1)); + exhaust_vector_regs (); + register vuint32m1_t v2 asm("v2") = v1; + *(vuint32m1_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** spill_12: +** csrr\tt0,vlenb +** slli\tt1,t0,1 +** sub\tsp,sp,t1 +** ... +** vs2r.v\tv24,0\(sp\) +** ... +** vl2re32.v\tv4,0\(sp\) +** vs2r.v\tv4,0\(a1\) +** ... +** jr\tra +*/ +void +spill_12 (uint32_t *in, uint32_t *out) +{ + register vuint32m2_t v2 asm("v2") = *(vuint32m2_t*)in; + asm volatile ("# %0"::"vr"(v2)); + exhaust_vector_regs (); + register vuint32m2_t v4 asm("v4") = v2; + *(vuint32m2_t*)out = v4; + asm volatile ("# %0"::"vr"(v4)); +} + +/* +** spill_13: +** csrr\tt0,vlenb +** slli\tt1,t0,2 +** sub\tsp,sp,t1 +** ... +** vs4r.v\tv24,0\(sp\) +** ... +** vl4re32.v\tv8,0\(sp\) +** vs4r.v\tv8,0\(a1\) +** ... +** jr\tra +*/ +void +spill_13 (uint32_t *in, uint32_t *out) +{ + register vuint32m4_t v4 asm("v4") = *(vuint32m4_t*)in; + asm volatile ("# %0"::"vr"(v4)); + exhaust_vector_regs (); + register vuint32m4_t v8 asm("v8") = v4; + *(vuint32m4_t*)out = v8; + asm volatile ("# %0"::"vr"(v8)); +} + +/* +** spill_14: +** csrr\tt0,vlenb +** slli\tt1,t0,3 +** sub\tsp,sp,t1 +** ... +** vs8r.v\tv24,0\(sp\) +** ... +** vl8re32.v\tv16,0\(sp\) +** vs8r.v\tv16,0\(a1\) +** ... +** jr\tra +*/ +void +spill_14 (uint32_t *in, uint32_t *out) +{ + register vuint32m8_t v8 asm("v8") = *(vuint32m8_t*)in; + asm volatile ("# %0"::"vr"(v8)); + exhaust_vector_regs (); + register vuint32m8_t v16 asm("v16") = v8; + *(vuint32m8_t*)out = v16; + asm volatile ("# %0"::"vr"(v16)); +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-4.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-4.c new file mode 100644 index 00000000000..83c80b0b045 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-4.c @@ -0,0 +1,196 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32 -mpreferred-stack-boundary=3 -O3 -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "riscv_vector.h" +#include "macro.h" + +/* +** spill_4: +** csrr\tt0,vlenb +** sub\tsp,sp,t0 +** ... +** vs1r.v\tv24,0\(sp\) +** ... +** vl1re64.v\tv2,0\(sp\) +** vs1r.v\tv2,0\(a1\) +** ... +** jr\tra +*/ +void +spill_4 (int64_t *in, int64_t *out) +{ + register vint64m1_t v1 asm("v1") = *(vint64m1_t*)in; + asm volatile ("# %0"::"vr"(v1)); + exhaust_vector_regs (); + register vint64m1_t v2 asm("v2") = v1; + *(vint64m1_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** spill_5: +** csrr\tt0,vlenb +** slli\tt1,t0,1 +** sub\tsp,sp,t1 +** ... +** vs2r.v\tv24,0\(sp\) +** ... +** vl2re64.v\tv4,0\(sp\) +** vs2r.v\tv4,0\(a1\) +** ... +** jr\tra +*/ +void +spill_5 (int64_t *in, int64_t *out) +{ + register vint64m2_t v2 asm("v2") = *(vint64m2_t*)in; + asm volatile ("# %0"::"vr"(v2)); + exhaust_vector_regs (); + register vint64m2_t v4 asm("v4") = v2; + *(vint64m2_t*)out = v4; + asm volatile ("# %0"::"vr"(v4)); +} + +/* +** spill_6: +** csrr\tt0,vlenb +** slli\tt1,t0,2 +** sub\tsp,sp,t1 +** ... +** vs4r.v\tv24,0\(sp\) +** ... +** vl4re64.v\tv8,0\(sp\) +** vs4r.v\tv8,0\(a1\) +** ... +** jr\tra +*/ +void +spill_6 (int64_t *in, int64_t *out) +{ + register vint64m4_t v4 asm("v4") = *(vint64m4_t*)in; + asm volatile ("# %0"::"vr"(v4)); + exhaust_vector_regs (); + register vint64m4_t v8 asm("v8") = v4; + *(vint64m4_t*)out = v8; + asm volatile ("# %0"::"vr"(v8)); +} + +/* +** spill_7: +** csrr\tt0,vlenb +** slli\tt1,t0,3 +** sub\tsp,sp,t1 +** ... +** vs8r.v\tv24,0\(sp\) +** ... +** vl8re64.v\tv16,0\(sp\) +** vs8r.v\tv16,0\(a1\) +** ... +** jr\tra +*/ +void +spill_7 (int64_t *in, int64_t *out) +{ + register vint64m8_t v8 asm("v8") = *(vint64m8_t*)in; + asm volatile ("# %0"::"vr"(v8)); + exhaust_vector_regs (); + register vint64m8_t v16 asm("v16") = v8; + *(vint64m8_t*)out = v16; + asm volatile ("# %0"::"vr"(v16)); +} + +/* +** spill_11: +** csrr\tt0,vlenb +** sub\tsp,sp,t0 +** ... +** vs1r.v\tv24,0\(sp\) +** ... +** vl1re64.v\tv2,0\(sp\) +** vs1r.v\tv2,0\(a1\) +** ... +** jr\tra +*/ +void +spill_11 (uint64_t *in, uint64_t *out) +{ + register vuint64m1_t v1 asm("v1") = *(vuint64m1_t*)in; + asm volatile ("# %0"::"vr"(v1)); + exhaust_vector_regs (); + register vuint64m1_t v2 asm("v2") = v1; + *(vuint64m1_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** spill_12: +** csrr\tt0,vlenb +** slli\tt1,t0,1 +** sub\tsp,sp,t1 +** ... +** vs2r.v\tv24,0\(sp\) +** ... +** vl2re64.v\tv4,0\(sp\) +** vs2r.v\tv4,0\(a1\) +** ... +** jr\tra +*/ +void +spill_12 (uint64_t *in, uint64_t *out) +{ + register vuint64m2_t v2 asm("v2") = *(vuint64m2_t*)in; + asm volatile ("# %0"::"vr"(v2)); + exhaust_vector_regs (); + register vuint64m2_t v4 asm("v4") = v2; + *(vuint64m2_t*)out = v4; + asm volatile ("# %0"::"vr"(v4)); +} + +/* +** spill_13: +** csrr\tt0,vlenb +** slli\tt1,t0,2 +** sub\tsp,sp,t1 +** ... +** vs4r.v\tv24,0\(sp\) +** ... +** vl4re64.v\tv8,0\(sp\) +** vs4r.v\tv8,0\(a1\) +** ... +** jr\tra +*/ +void +spill_13 (uint64_t *in, uint64_t *out) +{ + register vuint64m4_t v4 asm("v4") = *(vuint64m4_t*)in; + asm volatile ("# %0"::"vr"(v4)); + exhaust_vector_regs (); + register vuint64m4_t v8 asm("v8") = v4; + *(vuint64m4_t*)out = v8; + asm volatile ("# %0"::"vr"(v8)); +} + +/* +** spill_14: +** csrr\tt0,vlenb +** slli\tt1,t0,3 +** sub\tsp,sp,t1 +** ... +** vs8r.v\tv24,0\(sp\) +** ... +** vl8re64.v\tv16,0\(sp\) +** vs8r.v\tv16,0\(a1\) +** ... +** jr\tra +*/ +void +spill_14 (uint64_t *in, uint64_t *out) +{ + register vuint64m8_t v8 asm("v8") = *(vuint64m8_t*)in; + asm volatile ("# %0"::"vr"(v8)); + exhaust_vector_regs (); + register vuint64m8_t v16 asm("v16") = v8; + *(vuint64m8_t*)out = v16; + asm volatile ("# %0"::"vr"(v16)); +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-5.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-5.c new file mode 100644 index 00000000000..3c228a00c48 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-5.c @@ -0,0 +1,130 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32 -mpreferred-stack-boundary=3 -O3 -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "riscv_vector.h" +#include "macro.h" + +/* +** spill_3: +** csrr\tt0,vlenb +** sub\tsp,sp,t0 +** vsetvli\ta5,zero,e32,mf2,ta,ma +** vle32.v\tv24,0\(a0\) +** csrr\ta3,vlenb +** srli\ta3,a3,1 +** add\ta3,a3,sp +** vse32.v\tv24,0\(a3\) +** ... +** csrr\ta3,vlenb +** srli\ta3,a3,1 +** add\ta3,a3,sp +** vle32.v\tv24,0\(a3\) +** vse32.v\tv24,0\(a1\) +** csrr\tt0,vlenb +** add\tsp,sp,t0 +** ... +** jr\tra +*/ +void +spill_3 (float *in, float *out) +{ + vfloat32mf2_t v1 = *(vfloat32mf2_t*)in; + exhaust_vector_regs (); + *(vfloat32mf2_t*)out = v1; +} + +/* +** spill_4: +** csrr\tt0,vlenb +** sub\tsp,sp,t0 +** ... +** vs1r.v\tv24,0\(sp\) +** ... +** vl1re32.v\tv2,0\(sp\) +** vs1r.v\tv2,0\(a1\) +** ... +** jr\tra +*/ +void +spill_4 (float *in, float *out) +{ + register vfloat32m1_t v1 asm("v1") = *(vfloat32m1_t*)in; + asm volatile ("# %0"::"vr"(v1)); + exhaust_vector_regs (); + register vfloat32m1_t v2 asm("v2") = v1; + *(vfloat32m1_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** spill_5: +** csrr\tt0,vlenb +** slli\tt1,t0,1 +** sub\tsp,sp,t1 +** ... +** vs2r.v\tv24,0\(sp\) +** ... +** vl2re32.v\tv4,0\(sp\) +** vs2r.v\tv4,0\(a1\) +** ... +** jr\tra +*/ +void +spill_5 (float *in, float *out) +{ + register vfloat32m2_t v2 asm("v2") = *(vfloat32m2_t*)in; + asm volatile ("# %0"::"vr"(v2)); + exhaust_vector_regs (); + register vfloat32m2_t v4 asm("v4") = v2; + *(vfloat32m2_t*)out = v4; + asm volatile ("# %0"::"vr"(v4)); +} + +/* +** spill_6: +** csrr\tt0,vlenb +** slli\tt1,t0,2 +** sub\tsp,sp,t1 +** ... +** vs4r.v\tv24,0\(sp\) +** ... +** vl4re32.v\tv8,0\(sp\) +** vs4r.v\tv8,0\(a1\) +** ... +** jr\tra +*/ +void +spill_6 (float *in, float *out) +{ + register vfloat32m4_t v4 asm("v4") = *(vfloat32m4_t*)in; + asm volatile ("# %0"::"vr"(v4)); + exhaust_vector_regs (); + register vfloat32m4_t v8 asm("v8") = v4; + *(vfloat32m4_t*)out = v8; + asm volatile ("# %0"::"vr"(v8)); +} + +/* +** spill_7: +** csrr\tt0,vlenb +** slli\tt1,t0,3 +** sub\tsp,sp,t1 +** ... +** vs8r.v\tv24,0\(sp\) +** ... +** vl8re32.v\tv16,0\(sp\) +** vs8r.v\tv16,0\(a1\) +** ... +** jr\tra +*/ +void +spill_7 (float *in, float *out) +{ + register vfloat32m8_t v8 asm("v8") = *(vfloat32m8_t*)in; + asm volatile ("# %0"::"vr"(v8)); + exhaust_vector_regs (); + register vfloat32m8_t v16 asm("v16") = v8; + *(vfloat32m8_t*)out = v16; + asm volatile ("# %0"::"vr"(v16)); +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-6.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-6.c new file mode 100644 index 00000000000..340029da88b --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-6.c @@ -0,0 +1,101 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32 -mpreferred-stack-boundary=3 -O3 -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "riscv_vector.h" +#include "macro.h" + +/* +** spill_4: +** csrr\tt0,vlenb +** sub\tsp,sp,t0 +** ... +** vs1r.v\tv24,0\(sp\) +** ... +** vl1re64.v\tv2,0\(sp\) +** vs1r.v\tv2,0\(a1\) +** ... +** jr\tra +*/ +void +spill_4 (double *in, double *out) +{ + register vfloat64m1_t v1 asm("v1") = *(vfloat64m1_t*)in; + asm volatile ("# %0"::"vr"(v1)); + exhaust_vector_regs (); + register vfloat64m1_t v2 asm("v2") = v1; + *(vfloat64m1_t*)out = v2; + asm volatile ("# %0"::"vr"(v2)); +} + +/* +** spill_5: +** csrr\tt0,vlenb +** slli\tt1,t0,1 +** sub\tsp,sp,t1 +** ... +** vs2r.v\tv24,0\(sp\) +** ... +** vl2re64.v\tv4,0\(sp\) +** vs2r.v\tv4,0\(a1\) +** ... +** jr\tra +*/ +void +spill_5 (double *in, double *out) +{ + register vfloat64m2_t v2 asm("v2") = *(vfloat64m2_t*)in; + asm volatile ("# %0"::"vr"(v2)); + exhaust_vector_regs (); + register vfloat64m2_t v4 asm("v4") = v2; + *(vfloat64m2_t*)out = v4; + asm volatile ("# %0"::"vr"(v4)); +} + +/* +** spill_6: +** csrr\tt0,vlenb +** slli\tt1,t0,2 +** sub\tsp,sp,t1 +** ... +** vs4r.v\tv24,0\(sp\) +** ... +** vl4re64.v\tv8,0\(sp\) +** vs4r.v\tv8,0\(a1\) +** ... +** jr\tra +*/ +void +spill_6 (double *in, double *out) +{ + register vfloat64m4_t v4 asm("v4") = *(vfloat64m4_t*)in; + asm volatile ("# %0"::"vr"(v4)); + exhaust_vector_regs (); + register vfloat64m4_t v8 asm("v8") = v4; + *(vfloat64m4_t*)out = v8; + asm volatile ("# %0"::"vr"(v8)); +} + +/* +** spill_7: +** csrr\tt0,vlenb +** slli\tt1,t0,3 +** sub\tsp,sp,t1 +** ... +** vs8r.v\tv24,0\(sp\) +** ... +** vl8re64.v\tv16,0\(sp\) +** vs8r.v\tv16,0\(a1\) +** ... +** jr\tra +*/ +void +spill_7 (double *in, double *out) +{ + register vfloat64m8_t v8 asm("v8") = *(vfloat64m8_t*)in; + asm volatile ("# %0"::"vr"(v8)); + exhaust_vector_regs (); + register vfloat64m8_t v16 asm("v16") = v8; + *(vfloat64m8_t*)out = v16; + asm volatile ("# %0"::"vr"(v16)); +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-7.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-7.c new file mode 100644 index 00000000000..cf1eea2fa3f --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-7.c @@ -0,0 +1,114 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32 -mpreferred-stack-boundary=3 -O3 -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "riscv_vector.h" +#include "macro.h" + +/* +** spill: +** csrr\tt0,vlenb +** slli\tt1,t0,4 +** sub\tsp,sp,t1 +** vsetvli\ta3,zero,e8,mf8,ta,ma +** vle8.v\tv24,0\(a0\) +** csrr\ta5,vlenb +** srli\ta5,a5,3 +** add\ta5,a5,sp +** vse8.v\tv24,0\(a5\) +** addi\ta5,a0,1 +** vsetvli\ta4,zero,e8,mf4,ta,ma +** vle8.v\tv24,0\(a5\) +** csrr\ta5,vlenb +** srli\ta5,a5,2 +** add\ta5,a5,sp +** vse8.v\tv24,0\(a5\) +** addi\ta2,a0,2 +** vsetvli\ta5,zero,e8,mf2,ta,ma +** vle8.v\tv24,0\(a2\) +** csrr\ta2,vlenb +** srli\ta2,a2,1 +** add\ta2,a2,sp +** vse8.v\tv24,0\(a2\) +** addi\ta2,a0,3 +** vl1re8.v\tv24,0\(a2\) +** csrr\ta2,vlenb +** add\ta2,a2,sp +** vs1r.v\tv24,0\(a2\) +** addi\ta2,a0,4 +** vl2re8.v\tv24,0\(a2\) +** csrr\tt3,vlenb +** slli\ta2,t3,1 +** add\ta2,a2,sp +** vs2r.v\tv24,0\(a2\) +** addi\ta2,a0,5 +** vl4re8.v\tv24,0\(a2\) +** mv\ta2,t3 +** slli\tt3,t3,2 +** add\tt3,t3,sp +** vs4r.v\tv24,0\(t3\) +** addi\ta0,a0,6 +** vl8re8.v\tv24,0\(a0\) +** slli\ta0,a2,3 +** add\ta0,a0,sp +** vs8r.v\tv24,0\(a0\) +** ... +** srli\ta0,a2,3 +** add\ta0,a0,sp +** ... +** vle8.v\tv27,0\(a0\) +** vse8.v\tv27,0\(a1\) +** addi\ta3,a1,1 +** srli\ta0,a2,2 +** add\ta0,a0,sp +** ... +** vle8.v\tv27,0\(a0\) +** vse8.v\tv27,0\(a3\) +** addi\ta4,a1,2 +** srli\ta3,a2,1 +** add\ta3,a3,sp +** ... +** vle8.v\tv27,0\(a3\) +** vse8.v\tv27,0\(a4\) +** addi\ta5,a1,3 +** add\ta4,a2,sp +** vl1re8.v\tv25,0\(a4\) +** vs1r.v\tv25,0\(a5\) +** addi\ta5,a1,4 +** slli\ta4,a2,1 +** add\ta4,a4,sp +** vl2re8.v\tv26,0\(a4\) +** vs2r.v\tv26,0\(a5\) +** addi\ta5,a1,5 +** vl4re8.v\tv28,0\(t3\) +** vs4r.v\tv28,0\(a5\) +** addi\ta1,a1,6 +** slli\ta5,a2,3 +** add\ta5,a5,sp +** vl8re8.v\tv24,0\(a5\) +** vs8r.v\tv24,0\(a1\) +** csrr\tt0,vlenb +** slli\tt1,t0,4 +** add\tsp,sp,t1 +** ... +** jr\tra +*/ +void +spill (int8_t *in, int8_t *out) +{ + vint8mf8_t v0 = *(vint8mf8_t*)in; + vint8mf4_t v1 = *(vint8mf4_t*)(in + 1); + vint8mf2_t v2 = *(vint8mf2_t*)(in + 2); + vint8m1_t v3 = *(vint8m1_t*)(in + 3); + vint8m2_t v4 = *(vint8m2_t*)(in + 4); + vint8m4_t v8 = *(vint8m4_t*)(in + 5); + vint8m8_t v16 = *(vint8m8_t*)(in + 6); + exhaust_vector_regs (); + *(vint8mf8_t*)out = v0; + *(vint8mf4_t*)(out + 1) = v1; + *(vint8mf2_t*)(out + 2) = v2; + *(vint8m1_t*)(out + 3) = v3; + *(vint8m2_t*)(out + 4) = v4; + *(vint8m4_t*)(out + 5) = v8; + *(vint8m8_t*)(out + 6) = v16; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-8.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-8.c new file mode 100644 index 00000000000..ddc36e888eb --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-8.c @@ -0,0 +1,51 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -O3" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +void f2 (char*); +void f3 (char*, ...); + +/* +** stach_check_alloca_1: +** addi sp,sp,-48 +** sw ra,12\(sp\) +** sw s0,8\(sp\) +** addi s0,sp,16 +** ... +** addi a0,a0,23 +** andi a0,a0,-16 +** sub sp,sp,a0 +** ... +** addi sp,s0,-16 +** lw ra,12\(sp\) +** lw s0,8\(sp\) +** addi sp,sp,48 +** jr ra +*/ +void stach_check_alloca_1 (int y, ...) +{ + char* pStr = (char*)__builtin_alloca(y); + f2(pStr); +} + +/* +** stach_check_alloca_2: +** addi sp,sp,-48 +** sw ra,44\(sp\) +** sw s0,40\(sp\) +** addi s0,sp,48 +** addi a0,a0,23 +** andi a0,a0,-16 +** sub sp,sp,a0 +** ... +** addi sp,s0,-48 +** lw ra,44\(sp\) +** lw s0,40\(sp\) +** addi sp,sp,48 +** jr ra +*/ +void stach_check_alloca_2 (int y) +{ + char* pStr = (char*)__builtin_alloca(y); + f3(pStr, pStr, pStr, pStr, pStr, pStr, pStr, pStr, 2, pStr, pStr, pStr, 1); +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-9.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-9.c new file mode 100644 index 00000000000..7111113d393 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-9.c @@ -0,0 +1,42 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -O3" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "riscv_vector.h" + +void f (char*); + +/* +** stach_check_alloca_1: +** addi\tsp,sp,-48 +** sw\tra,12\(sp\) +** sw\ts0,8\(sp\) +** addi\ts0,sp,16 +** csrr\tt0,vlenb +** slli\tt1,t0,1 +** sub\tsp,sp,t1 +** ... +** addi\ta2,a2,23 +** andi\ta2,a2,-16 +** sub\tsp,sp,a2 +** ... +** lw\tra,12\(sp\) +** lw\ts0,8\(sp\) +** addi\tsp,sp,48 +** jr\tra +*/ +void stach_check_alloca_1 (vuint8m1_t data, uint8_t *base, int y, ...) +{ + vuint8m8_t v0, v8, v16, v24; + asm volatile ("nop" + : "=vr" (v0), "=vr" (v8), "=vr" (v16), "=vr" (v24) + : + :); + asm volatile ("nop" + : + : "vr" (v0), "vr" (v8), "vr" (v16), "vr" (v24) + :); + *(vuint8m1_t *)base = data; + char* pStr = (char*)__builtin_alloca(y); + f(pStr); +} -- 2.36.1