Marcin, your implementation looks very good to me. Thanks!
But please be aware that we deprecated the support of g5 and g6 and intend to remove that code from the back-end with the next GCC version. So I would prefer if you could remove all the !TARGET_CPU_ZARCH stuff from the implementation and just error out if split-stack is enabled with -march g5/g6. It currently makes the implementation more complicated and would have to be removed anyway in the future. Thanks! https://gcc.gnu.org/ml/gcc-patches/2015-12/msg01854.html Bye, -Andreas- On 01/02/2016 08:16 PM, Marcin Kościelnicki wrote: > libgcc/ChangeLog: > > * config.host: Use t-stack and t-stack-s390 for s390*-*-linux. > * config/s390/morestack.S: New file. > * config/s390/t-stack-s390: New file. > * generic-morestack.c (__splitstack_find): Add s390-specific code. > > gcc/ChangeLog: > > * common/config/s390/s390-common.c (s390_supports_split_stack): > New function. > (TARGET_SUPPORTS_SPLIT_STACK): New macro. > * config/s390/s390-protos.h: Add s390_expand_split_stack_prologue. > * config/s390/s390.c (struct machine_function): New field > split_stack_varargs_pointer. > (s390_split_branches): Don't split split-stack pseudo-insns, rewire > split-stack prologue conditional jump instead of splitting it. > (s390_chunkify_start): Don't reload const pool register on split-stack > prologue conditional jumps. > (s390_register_info): Mark r12 as clobbered if it'll be used as temp > in s390_emit_prologue. > (s390_emit_prologue): Use r12 as temp if r1 is taken by split-stack > vararg pointer. > (morestack_ref): New global. > (SPLIT_STACK_AVAILABLE): New macro. > (s390_expand_split_stack_prologue): New function. > (s390_expand_split_stack_call_esa): New function. > (s390_expand_split_stack_call_zarch): New function. > (s390_live_on_entry): New function. > (s390_va_start): Use split-stack vararg pointer if appropriate. > (s390_reorg): Lower the split-stack pseudo-insns. > (s390_asm_file_end): Emit the split-stack note sections. > (TARGET_EXTRA_LIVE_ON_ENTRY): New macro. > * config/s390/s390.md: (UNSPEC_STACK_CHECK): New unspec. > (UNSPECV_SPLIT_STACK_CALL_ZARCH): New unspec. > (UNSPECV_SPLIT_STACK_CALL_ESA): New unspec. > (UNSPECV_SPLIT_STACK_SIBCALL): New unspec. > (UNSPECV_SPLIT_STACK_MARKER): New unspec. > (split_stack_prologue): New expand. > (split_stack_call_esa): New insn. > (split_stack_call_zarch_*): New insn. > (split_stack_cond_call_zarch_*): New insn. > (split_stack_space_check): New expand. > (split_stack_sibcall_basr): New insn. > (split_stack_sibcall_*): New insn. > (split_stack_cond_sibcall_*): New insn. > (split_stack_marker): New insn. > --- > gcc/ChangeLog | 41 ++ > gcc/common/config/s390/s390-common.c | 14 + > gcc/config/s390/s390-protos.h | 1 + > gcc/config/s390/s390.c | 538 +++++++++++++++++++++++++- > gcc/config/s390/s390.md | 133 +++++++ > libgcc/ChangeLog | 7 + > libgcc/config.host | 4 +- > libgcc/config/s390/morestack.S | 718 > +++++++++++++++++++++++++++++++++++ > libgcc/config/s390/t-stack-s390 | 2 + > libgcc/generic-morestack.c | 4 + > 10 files changed, 1454 insertions(+), 8 deletions(-) > create mode 100644 libgcc/config/s390/morestack.S > create mode 100644 libgcc/config/s390/t-stack-s390 > > diff --git a/gcc/ChangeLog b/gcc/ChangeLog > index 4c7046f..a4f4dff 100644 > --- a/gcc/ChangeLog > +++ b/gcc/ChangeLog > @@ -1,5 +1,46 @@ > 2016-01-02 Marcin Kościelnicki <koria...@0x04.net> > > + * common/config/s390/s390-common.c (s390_supports_split_stack): > + New function. > + (TARGET_SUPPORTS_SPLIT_STACK): New macro. > + * config/s390/s390-protos.h: Add s390_expand_split_stack_prologue. > + * config/s390/s390.c (struct machine_function): New field > + split_stack_varargs_pointer. > + (s390_split_branches): Don't split split-stack pseudo-insns, rewire > + split-stack prologue conditional jump instead of splitting it. > + (s390_chunkify_start): Don't reload const pool register on split-stack > + prologue conditional jumps. > + (s390_register_info): Mark r12 as clobbered if it'll be used as temp > + in s390_emit_prologue. > + (s390_emit_prologue): Use r12 as temp if r1 is taken by split-stack > + vararg pointer. > + (morestack_ref): New global. > + (SPLIT_STACK_AVAILABLE): New macro. > + (s390_expand_split_stack_prologue): New function. > + (s390_expand_split_stack_call_esa): New function. > + (s390_expand_split_stack_call_zarch): New function. > + (s390_live_on_entry): New function. > + (s390_va_start): Use split-stack vararg pointer if appropriate. > + (s390_reorg): Lower the split-stack pseudo-insns. > + (s390_asm_file_end): Emit the split-stack note sections. > + (TARGET_EXTRA_LIVE_ON_ENTRY): New macro. > + * config/s390/s390.md: (UNSPEC_STACK_CHECK): New unspec. > + (UNSPECV_SPLIT_STACK_CALL_ZARCH): New unspec. > + (UNSPECV_SPLIT_STACK_CALL_ESA): New unspec. > + (UNSPECV_SPLIT_STACK_SIBCALL): New unspec. > + (UNSPECV_SPLIT_STACK_MARKER): New unspec. > + (split_stack_prologue): New expand. > + (split_stack_call_esa): New insn. > + (split_stack_call_zarch_*): New insn. > + (split_stack_cond_call_zarch_*): New insn. > + (split_stack_space_check): New expand. > + (split_stack_sibcall_basr): New insn. > + (split_stack_sibcall_*): New insn. > + (split_stack_cond_sibcall_*): New insn. > + (split_stack_marker): New insn. > + > +2016-01-02 Marcin Kościelnicki <koria...@0x04.net> > + > * cfgrtl.c (rtl_tidy_fallthru_edge): Bail for unconditional jumps > with side effects. > > diff --git a/gcc/common/config/s390/s390-common.c > b/gcc/common/config/s390/s390-common.c > index 4cf0df7..0c468bf 100644 > --- a/gcc/common/config/s390/s390-common.c > +++ b/gcc/common/config/s390/s390-common.c > @@ -105,6 +105,17 @@ s390_handle_option (struct gcc_options *opts > ATTRIBUTE_UNUSED, > } > } > > +/* -fsplit-stack uses a field in the TCB, available with glibc-2.23. > + We don't verify it, since earlier versions just have padding at > + its place, which works just as well. */ > + > +static bool > +s390_supports_split_stack (bool report ATTRIBUTE_UNUSED, > + struct gcc_options *opts ATTRIBUTE_UNUSED) > +{ > + return true; > +} > + > #undef TARGET_DEFAULT_TARGET_FLAGS > #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT) > > @@ -117,4 +128,7 @@ s390_handle_option (struct gcc_options *opts > ATTRIBUTE_UNUSED, > #undef TARGET_OPTION_INIT_STRUCT > #define TARGET_OPTION_INIT_STRUCT s390_option_init_struct > > +#undef TARGET_SUPPORTS_SPLIT_STACK > +#define TARGET_SUPPORTS_SPLIT_STACK s390_supports_split_stack > + > struct gcc_targetm_common targetm_common = TARGETM_COMMON_INITIALIZER; > diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h > index 962abb1..936e267 100644 > --- a/gcc/config/s390/s390-protos.h > +++ b/gcc/config/s390/s390-protos.h > @@ -42,6 +42,7 @@ extern bool s390_handle_option (struct gcc_options *opts > ATTRIBUTE_UNUSED, > extern HOST_WIDE_INT s390_initial_elimination_offset (int, int); > extern void s390_emit_prologue (void); > extern void s390_emit_epilogue (bool); > +extern void s390_expand_split_stack_prologue (void); > extern bool s390_can_use_simple_return_insn (void); > extern bool s390_can_use_return_insn (void); > extern void s390_function_profiler (FILE *, int); > diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c > index 9dc8d1e..0255eec 100644 > --- a/gcc/config/s390/s390.c > +++ b/gcc/config/s390/s390.c > @@ -426,6 +426,13 @@ struct GTY(()) machine_function > /* True if the current function may contain a tbegin clobbering > FPRs. */ > bool tbegin_p; > + > + /* For -fsplit-stack support: A stack local which holds a pointer to > + the stack arguments for a function with a variable number of > + arguments. This is set at the start of the function and is used > + to initialize the overflow_arg_area field of the va_list > + structure. */ > + rtx split_stack_varargs_pointer; > }; > > /* Few accessor macros for struct cfun->machine->s390_frame_layout. */ > @@ -7669,7 +7676,17 @@ s390_split_branches (void) > > pat = PATTERN (insn); > if (GET_CODE (pat) == PARALLEL) > - pat = XVECEXP (pat, 0, 0); > + { > + /* Split stack call pseudo-jump doesn't need splitting. */ > + if (GET_CODE (XVECEXP (pat, 0, 1)) == SET > + && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 1)) == UNSPEC_VOLATILE > + && (XINT (XEXP (XVECEXP (pat, 0, 1), 1), 1) > + == UNSPECV_SPLIT_STACK_CALL_ESA > + || XINT (XEXP (XVECEXP (pat, 0, 1), 1), 1) > + == UNSPECV_SPLIT_STACK_CALL_ZARCH)) > + continue; > + pat = XVECEXP (pat, 0, 0); > + } > if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx) > continue; > > @@ -7692,6 +7709,49 @@ s390_split_branches (void) > if (get_attr_length (insn) <= 4) > continue; > > + if (prologue_epilogue_contains (insn)) > + { > + /* A jump in prologue/epilogue must come from the split-stack > + prologue. It cannot be split - there are no scratch regs > + available at that point. Rewire it instead. */ > + > + rtx_insn *code_label = (rtx_insn *)XEXP (*label, 0); > + gcc_assert (LABEL_P (code_label)); > + rtx_insn *note = NEXT_INSN (code_label); > + gcc_assert (NOTE_P (note)); > + rtx_insn *jump_ss = NEXT_INSN (note); > + gcc_assert (JUMP_P (jump_ss)); > + rtx_insn *barrier = NEXT_INSN (jump_ss); > + gcc_assert (BARRIER_P (barrier)); > + gcc_assert (GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE); > + gcc_assert (GET_CODE (XEXP (SET_SRC (pat), 0)) == LT); > + > + /* step 1 - insert new label after */ > + rtx new_label = gen_label_rtx (); > + emit_label_after (new_label, insn); > + > + /* step 2 - reorder */ > + reorder_insns_nobb (code_label, barrier, insn); > + > + /* step 3 - retarget jump */ > + rtx new_target = gen_rtx_LABEL_REF (VOIDmode, new_label); > + ret = validate_change (insn, label, new_target, 0); > + gcc_assert (ret); > + LABEL_NUSES (new_label)++; > + LABEL_NUSES (code_label)--; > + JUMP_LABEL (insn) = new_label; > + > + /* step 4 - invert jump cc */ > + rtx *pcond = &XEXP (SET_SRC (pat), 0); > + rtx new_cond = gen_rtx_fmt_ee (GE, VOIDmode, > + XEXP (*pcond, 0), > + XEXP (*pcond, 1)); > + ret = validate_change (insn, pcond, new_cond, 0); > + gcc_assert (ret); > + > + continue; > + } > + > /* We are going to use the return register as scratch register, > make sure it will be saved/restored by the prologue/epilogue. */ > cfun_frame_layout.save_return_addr_p = 1; > @@ -8736,7 +8796,7 @@ s390_chunkify_start (void) > } > /* If we have a direct jump (conditional or unconditional), > check all potential targets. */ > - else if (JUMP_P (insn)) > + else if (JUMP_P (insn) && !prologue_epilogue_contains (insn)) > { > rtx pat = PATTERN (insn); > > @@ -9316,9 +9376,13 @@ s390_register_info () > cfun_frame_layout.high_fprs++; > } > > - if (flag_pic) > - clobbered_regs[PIC_OFFSET_TABLE_REGNUM] > - |= !!df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM); > + /* Register 12 is used for GOT address, but also as temp in prologue > + for split-stack stdarg functions (unless r14 is available). */ > + clobbered_regs[12] > + |= ((flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)) > + || (flag_split_stack && cfun->stdarg > + && (crtl->is_leaf || TARGET_TPF_PROFILING > + || has_hard_reg_initial_val (Pmode, RETURN_REGNUM)))); > > clobbered_regs[BASE_REGNUM] > |= (cfun->machine->base_reg > @@ -10446,6 +10510,8 @@ s390_emit_prologue (void) > && !crtl->is_leaf > && !TARGET_TPF_PROFILING) > temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM); > + else if (flag_split_stack && cfun->stdarg) > + temp_reg = gen_rtx_REG (Pmode, 12); > else > temp_reg = gen_rtx_REG (Pmode, 1); > > @@ -10939,6 +11005,386 @@ s300_set_up_by_prologue (hard_reg_set_container > *regs) > SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg)); > } > > +/* -fsplit-stack support. */ > + > +/* A SYMBOL_REF for __morestack. */ > +static GTY(()) rtx morestack_ref; > + > +/* When using -fsplit-stack, the allocation routines set a field in > + the TCB to the bottom of the stack plus this much space, measured > + in bytes. */ > + > +#define SPLIT_STACK_AVAILABLE 1024 > + > +/* Emit -fsplit-stack prologue, which goes before the regular function > + prologue. */ > + > +void > +s390_expand_split_stack_prologue (void) > +{ > + rtx r1, guard, cc; > + rtx_insn *insn; > + /* Offset from thread pointer to __private_ss. */ > + int psso = TARGET_64BIT ? 0x38 : 0x20; > + /* Pointer size in bytes. */ > + /* Frame size and argument size - the two parameters to __morestack. */ > + HOST_WIDE_INT frame_size = cfun_frame_layout.frame_size; > + /* Align argument size to 8 bytes - simplifies __morestack code. */ > + HOST_WIDE_INT args_size = crtl->args.size >= 0 > + ? ((crtl->args.size + 7) & ~7) > + : 0; > + /* Label to jump to when no __morestack call is necessary. */ > + rtx_code_label *enough = NULL; > + /* Label to be called by __morestack. */ > + rtx_code_label *call_done = NULL; > + /* 1 if __morestack called conditionally, 0 if always. */ > + int conditional = 0; > + > + gcc_assert (flag_split_stack && reload_completed); > + > + r1 = gen_rtx_REG (Pmode, 1); > + > + /* If no stack frame will be allocated, don't do anything. */ > + if (!frame_size) > + { > + /* But emit a marker that will let linker and indirect function > + calls recognise this function as split-stack aware. */ > + emit_insn(gen_split_stack_marker()); > + if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) > + { > + /* If va_start is used, just use r15. */ > + emit_move_insn (r1, > + gen_rtx_PLUS (Pmode, stack_pointer_rtx, > + GEN_INT (STACK_POINTER_OFFSET))); > + } > + return; > + } > + > + if (morestack_ref == NULL_RTX) > + { > + morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack"); > + SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL > + | SYMBOL_FLAG_FUNCTION); > + } > + > + if (frame_size <= 0x7fff || (TARGET_EXTIMM && frame_size <= 0xffffffffu)) > + { > + /* If frame_size will fit in an add instruction, do a stack space > + check, and only call __morestack if there's not enough space. */ > + conditional = 1; > + > + /* Get thread pointer. r1 is the only register we can always destroy > - r0 > + could contain a static chain (and cannot be used to address memory > + anyway), r2-r6 can contain parameters, and r6-r15 are callee-saved. > */ > + emit_move_insn (r1, gen_rtx_REG (Pmode, TP_REGNUM)); > + /* Aim at __private_ss. */ > + guard = gen_rtx_MEM (Pmode, plus_constant (Pmode, r1, psso)); > + > + /* If less that 1kiB used, skip addition and compare directly with > + __private_ss. */ > + if (frame_size > SPLIT_STACK_AVAILABLE) > + { > + emit_move_insn (r1, guard); > + if (TARGET_64BIT) > + emit_insn (gen_adddi3 (r1, r1, GEN_INT(frame_size))); > + else > + emit_insn (gen_addsi3 (r1, r1, GEN_INT(frame_size))); > + guard = r1; > + } > + > + if (TARGET_CPU_ZARCH) > + { > + rtx tmp; > + > + /* Compare the (maybe adjusted) guard with the stack pointer. */ > + cc = s390_emit_compare (LT, stack_pointer_rtx, guard); > + > + call_done = gen_label_rtx (); > + > + if (TARGET_64BIT) > + tmp = gen_split_stack_cond_call_zarch_di (call_done, > + morestack_ref, > + GEN_INT (frame_size), > + GEN_INT (args_size), > + cc); > + else > + tmp = gen_split_stack_cond_call_zarch_si (call_done, > + morestack_ref, > + GEN_INT (frame_size), > + GEN_INT (args_size), > + cc); > + > + > + insn = emit_jump_insn (tmp); > + JUMP_LABEL (insn) = call_done; > + > + /* Mark the jump as very unlikely to be taken. */ > + add_int_reg_note (insn, REG_BR_PROB, REG_BR_PROB_BASE / 100); > + } > + else > + { > + /* Compare the (maybe adjusted) guard with the stack pointer. */ > + cc = s390_emit_compare (GE, stack_pointer_rtx, guard); > + > + enough = gen_label_rtx (); > + insn = s390_emit_jump (enough, cc); > + JUMP_LABEL (insn) = enough; > + > + /* Mark the jump as very likely to be taken. */ > + add_int_reg_note (insn, REG_BR_PROB, > + REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100); > + } > + } > + > + if (call_done == NULL) > + { > + rtx tmp; > + call_done = gen_label_rtx (); > + > + /* Now, we need to call __morestack. It has very special calling > + conventions: it preserves param/return/static chain registers for > + calling main function body, and looks for its own parameters > + at %r1 (after aligning it up to a 4 byte bounduary for 31-bit > mode). */ > + if (TARGET_64BIT) > + tmp = gen_split_stack_call_zarch_di (call_done, > + morestack_ref, > + GEN_INT (frame_size), > + GEN_INT (args_size)); > + else if (TARGET_CPU_ZARCH) > + tmp = gen_split_stack_call_zarch_si (call_done, > + morestack_ref, > + GEN_INT (frame_size), > + GEN_INT (args_size)); > + else > + tmp = gen_split_stack_call_esa (call_done, > + morestack_ref, > + GEN_INT (frame_size), > + GEN_INT (args_size)); > + insn = emit_jump_insn (tmp); > + JUMP_LABEL (insn) = call_done; > + emit_barrier (); > + } > + > + /* __morestack will call us here. */ > + > + if (enough != NULL) > + { > + emit_label (enough); > + LABEL_NUSES (enough) = 1; > + } > + > + if (conditional && cfun->machine->split_stack_varargs_pointer != NULL_RTX) > + { > + /* If va_start is used, and __morestack was not called, just use r15. > */ > + emit_move_insn (r1, > + gen_rtx_PLUS (Pmode, stack_pointer_rtx, > + GEN_INT (STACK_POINTER_OFFSET))); > + } > + > + emit_label (call_done); > + LABEL_NUSES (call_done) = 1; > +} > + > +/* Generates split-stack call sequence for esa mode, along with its parameter > + block. */ > + > +static void > +s390_expand_split_stack_call_esa (rtx_insn *orig_insn, > + rtx call_done, > + rtx function, > + rtx frame_size, > + rtx args_size) > +{ > + int psize = GET_MODE_SIZE (Pmode); > + /* Labels for literal base, literal __morestack, param base. */ > + rtx litbase = gen_label_rtx(); > + rtx litms = gen_label_rtx(); > + rtx parmbase = gen_label_rtx(); > + rtx r1 = gen_rtx_REG (Pmode, 1); > + rtx_insn *insn = orig_insn; > + rtx tmp, tmp2; > + > + /* No brasl, we have to make do using basr and a literal pool. */ > + > + /* %r1 = litbase. */ > + insn = emit_insn_after (gen_main_base_31_small (r1, litbase), insn); > + insn = emit_label_after (litbase, insn); > + > + /* a %r1, .Llitms-.Llitbase(%r1) */ > + tmp = gen_rtx_LABEL_REF (Pmode, litbase); > + tmp2 = gen_rtx_LABEL_REF (Pmode, litms); > + tmp = gen_rtx_UNSPEC (Pmode, > + gen_rtvec (2, tmp2, tmp), > + UNSPEC_POOL_OFFSET); > + tmp = gen_rtx_CONST (Pmode, tmp); > + tmp = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, r1, tmp)); > + insn = emit_insn_after (gen_addsi3 (r1, r1, tmp), insn); > + add_reg_note (insn, REG_LABEL_OPERAND, litbase); > + add_reg_note (insn, REG_LABEL_OPERAND, litms); > + LABEL_NUSES (litbase)++; > + LABEL_NUSES (litms)++; > + > + /* basr %r1, %r1 */ > + tmp = gen_split_stack_sibcall_basr (r1, call_done); > + insn = emit_jump_insn_after (tmp, insn); > + JUMP_LABEL (insn) = call_done; > + LABEL_NUSES (call_done)++; > + > + /* __morestack will mangle its return register to get our parameters. */ > + > + /* Now, we'll emit parameters to __morestack. First, align to pointer size > + (this mirrors the alignment done in __morestack - don't touch it). */ > + insn = emit_insn_after (gen_pool_align (GEN_INT (psize)), insn); > + > + insn = emit_label_after (parmbase, insn); > + > + tmp = gen_rtx_UNSPEC_VOLATILE (Pmode, > + gen_rtvec (1, frame_size), > + UNSPECV_POOL_ENTRY); > + insn = emit_insn_after (tmp, insn); > + > + /* Second parameter is size of the arguments passed on stack that > + __morestack has to copy to the new stack (does not include varargs). */ > + tmp = gen_rtx_UNSPEC_VOLATILE (Pmode, > + gen_rtvec (1, args_size), > + UNSPECV_POOL_ENTRY); > + insn = emit_insn_after (tmp, insn); > + > + /* Third parameter is offset between start of the parameter block > + and function body to be called by __morestack. */ > + tmp = gen_rtx_LABEL_REF (Pmode, parmbase); > + tmp2 = gen_rtx_LABEL_REF (Pmode, call_done); > + tmp = gen_rtx_CONST (Pmode, > + gen_rtx_MINUS (Pmode, tmp2, tmp)); > + tmp = gen_rtx_UNSPEC_VOLATILE (Pmode, > + gen_rtvec (1, tmp), > + UNSPECV_POOL_ENTRY); > + insn = emit_insn_after (tmp, insn); > + add_reg_note (insn, REG_LABEL_OPERAND, call_done); > + LABEL_NUSES (call_done)++; > + add_reg_note (insn, REG_LABEL_OPERAND, parmbase); > + LABEL_NUSES (parmbase)++; > + > + /* We take advantage of the already-existing literal pool here to stuff > + the __morestack address for use in the call above. */ > + > + insn = emit_label_after (litms, insn); > + > + /* We actually emit __morestack - litbase to support PIC. Since it > + works just as well for non-PIC, we use it in all cases. */ > + > + tmp = gen_rtx_LABEL_REF (Pmode, litbase); > + tmp = gen_rtx_CONST (Pmode, > + gen_rtx_MINUS (Pmode, function, tmp)); > + tmp = gen_rtx_UNSPEC_VOLATILE (Pmode, > + gen_rtvec (1, tmp), > + UNSPECV_POOL_ENTRY); > + insn = emit_insn_after (tmp, insn); > + add_reg_note (insn, REG_LABEL_OPERAND, litbase); > + LABEL_NUSES (litbase)++; > + > + delete_insn (orig_insn); > +} > + > +/* Generates split-stack call sequence for zarch mode, along with its > parameter > + block. */ > + > +static void > +s390_expand_split_stack_call_zarch (rtx_insn *orig_insn, > + rtx call_done, > + rtx function, > + rtx frame_size, > + rtx args_size, > + rtx cond) > +{ > + int psize = GET_MODE_SIZE (Pmode); > + rtx_insn *insn = orig_insn; > + rtx parmbase = gen_label_rtx(); > + rtx r1 = gen_rtx_REG (Pmode, 1); > + rtx tmp, tmp2; > + > + /* %r1 = litbase. */ > + insn = emit_insn_after (gen_main_base_64 (r1, parmbase), insn); > + add_reg_note (insn, REG_LABEL_OPERAND, parmbase); > + LABEL_NUSES (parmbase)++; > + > + /* jg<cond> __morestack. */ > + if (cond == NULL) > + { > + if (TARGET_64BIT) > + tmp = gen_split_stack_sibcall_di (function, call_done); > + else > + tmp = gen_split_stack_sibcall_si (function, call_done); > + insn = emit_jump_insn_after (tmp, insn); > + } > + else > + { > + if (!s390_comparison (cond, VOIDmode)) > + internal_error ("bad split_stack_call_zarch cond"); > + if (TARGET_64BIT) > + tmp = gen_split_stack_cond_sibcall_di (function, cond, call_done); > + else > + tmp = gen_split_stack_cond_sibcall_si (function, cond, call_done); > + insn = emit_jump_insn_after (tmp, insn); > + } > + JUMP_LABEL (insn) = call_done; > + LABEL_NUSES (call_done)++; > + > + /* Go to .rodata. */ > + insn = emit_insn_after (gen_pool_section_start (), insn); > + > + /* Now, we'll emit parameters to __morestack. First, align to pointer size > + (this mirrors the alignment done in __morestack - don't touch it). */ > + insn = emit_insn_after (gen_pool_align (GEN_INT (psize)), insn); > + > + insn = emit_label_after (parmbase, insn); > + > + tmp = gen_rtx_UNSPEC_VOLATILE (Pmode, > + gen_rtvec (1, frame_size), > + UNSPECV_POOL_ENTRY); > + insn = emit_insn_after (tmp, insn); > + > + /* Second parameter is size of the arguments passed on stack that > + __morestack has to copy to the new stack (does not include varargs). */ > + tmp = gen_rtx_UNSPEC_VOLATILE (Pmode, > + gen_rtvec (1, args_size), > + UNSPECV_POOL_ENTRY); > + insn = emit_insn_after (tmp, insn); > + > + /* Third parameter is offset between start of the parameter block > + and function body to be called by __morestack. */ > + tmp = gen_rtx_LABEL_REF (Pmode, parmbase); > + tmp2 = gen_rtx_LABEL_REF (Pmode, call_done); > + tmp = gen_rtx_CONST (Pmode, > + gen_rtx_MINUS (Pmode, tmp2, tmp)); > + tmp = gen_rtx_UNSPEC_VOLATILE (Pmode, > + gen_rtvec (1, tmp), > + UNSPECV_POOL_ENTRY); > + insn = emit_insn_after (tmp, insn); > + add_reg_note (insn, REG_LABEL_OPERAND, call_done); > + LABEL_NUSES (call_done)++; > + add_reg_note (insn, REG_LABEL_OPERAND, parmbase); > + LABEL_NUSES (parmbase)++; > + > + /* Return from .rodata. */ > + insn = emit_insn_after (gen_pool_section_end (), insn); > + > + delete_insn (orig_insn); > +} > + > +/* We may have to tell the dataflow pass that the split stack prologue > + is initializing a register. */ > + > +static void > +s390_live_on_entry (bitmap regs) > +{ > + if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) > + { > + gcc_assert (flag_split_stack); > + bitmap_set_bit (regs, 1); > + } > +} > + > /* Return true if the function can use simple_return to return outside > of a shrink-wrapped region. At present shrink-wrapping is supported > in all cases. */ > @@ -11541,6 +11987,27 @@ s390_va_start (tree valist, rtx nextarg > ATTRIBUTE_UNUSED) > expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); > } > > + if (flag_split_stack > + && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl)) > + == NULL) > + && cfun->machine->split_stack_varargs_pointer == NULL_RTX) > + { > + rtx reg; > + rtx_insn *seq; > + > + reg = gen_reg_rtx (Pmode); > + cfun->machine->split_stack_varargs_pointer = reg; > + > + start_sequence (); > + emit_move_insn (reg, gen_rtx_REG (Pmode, 1)); > + seq = get_insns (); > + end_sequence (); > + > + push_topmost_sequence (); > + emit_insn_after (seq, entry_of_function ()); > + pop_topmost_sequence (); > + } > + > /* Find the overflow area. > FIXME: This currently is too pessimistic when the vector ABI is > enabled. In that case we *always* set up the overflow area > @@ -11549,7 +12016,10 @@ s390_va_start (tree valist, rtx nextarg > ATTRIBUTE_UNUSED) > || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG > || TARGET_VX_ABI) > { > - t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx); > + if (cfun->machine->split_stack_varargs_pointer == NULL_RTX) > + t = make_tree (TREE_TYPE (ovf), crtl->args.internal_arg_pointer); > + else > + t = make_tree (TREE_TYPE (ovf), > cfun->machine->split_stack_varargs_pointer); > > off = INTVAL (crtl->args.arg_offset_rtx); > off = off < 0 ? 0 : off; > @@ -13158,6 +13628,56 @@ s390_reorg (void) > } > } > > + if (flag_split_stack) > + { > + rtx_insn *insn; > + > + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) > + { > + /* Look for the split-stack fake jump instructions. */ > + if (!JUMP_P(insn)) > + continue; > + if (GET_CODE (PATTERN (insn)) != PARALLEL > + || XVECLEN (PATTERN (insn), 0) != 2) > + continue; > + rtx set = XVECEXP (PATTERN (insn), 0, 1); > + if (GET_CODE (set) != SET) > + continue; > + rtx unspec = XEXP(set, 1); > + if (GET_CODE (unspec) != UNSPEC_VOLATILE) > + continue; > + if (XINT (unspec, 1) != UNSPECV_SPLIT_STACK_CALL_ESA > + && XINT (unspec, 1) != UNSPECV_SPLIT_STACK_CALL_ZARCH) > + continue; > + rtx set_pc = XVECEXP (PATTERN (insn), 0, 0); > + rtx function = XVECEXP (unspec, 0, 0); > + rtx frame_size = XVECEXP (unspec, 0, 1); > + rtx args_size = XVECEXP (unspec, 0, 2); > + rtx pc_src = XEXP (set_pc, 1); > + rtx call_done, cond = NULL_RTX; > + if (GET_CODE (pc_src) == IF_THEN_ELSE) > + { > + cond = XEXP (pc_src, 0); > + call_done = XEXP (XEXP (pc_src, 1), 0); > + } > + else > + call_done = XEXP (pc_src, 0); > + if (XINT (unspec, 1) == UNSPECV_SPLIT_STACK_CALL_ESA) > + s390_expand_split_stack_call_esa (insn, > + call_done, > + function, > + frame_size, > + args_size); > + else > + s390_expand_split_stack_call_zarch (insn, > + call_done, > + function, > + frame_size, > + args_size, > + cond); > + } > + } > + > /* Try to optimize prologue and epilogue further. */ > s390_optimize_prologue (); > > @@ -14469,6 +14989,9 @@ s390_asm_file_end (void) > s390_vector_abi); > #endif > file_end_indicate_exec_stack (); > + > + if (flag_split_stack) > + file_end_indicate_split_stack (); > } > > /* Return true if TYPE is a vector bool type. */ > @@ -14724,6 +15247,9 @@ s390_invalid_binary_op (int op ATTRIBUTE_UNUSED, > const_tree type1, const_tree ty > #undef TARGET_SET_UP_BY_PROLOGUE > #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue > > +#undef TARGET_EXTRA_LIVE_ON_ENTRY > +#define TARGET_EXTRA_LIVE_ON_ENTRY s390_live_on_entry > + > #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P > #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \ > s390_use_by_pieces_infrastructure_p > diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md > index 0ebefd6..15c6eed 100644 > --- a/gcc/config/s390/s390.md > +++ b/gcc/config/s390/s390.md > @@ -114,6 +114,9 @@ > UNSPEC_SP_SET > UNSPEC_SP_TEST > > + ; Split stack support > + UNSPEC_STACK_CHECK > + > ; Test Data Class (TDC) > UNSPEC_TDC_INSN > > @@ -276,6 +279,12 @@ > ; Set and get floating point control register > UNSPECV_SFPC > UNSPECV_EFPC > + > + ; Split stack support > + UNSPECV_SPLIT_STACK_CALL_ZARCH > + UNSPECV_SPLIT_STACK_CALL_ESA > + UNSPECV_SPLIT_STACK_SIBCALL > + UNSPECV_SPLIT_STACK_MARKER > ]) > > ;; > @@ -10909,3 +10918,127 @@ > "TARGET_Z13" > "lcbb\t%0,%1,%b2" > [(set_attr "op_type" "VRX")]) > + > +; Handle -fsplit-stack. > + > +(define_expand "split_stack_prologue" > + [(const_int 0)] > + "" > +{ > + s390_expand_split_stack_prologue (); > + DONE; > +}) > + > +(define_insn "split_stack_call_esa" > + [(set (pc) (label_ref (match_operand 0 "" ""))) > + (set (reg:SI 1) (unspec_volatile [(match_operand 1 "bras_sym_operand" "X") > + (match_operand 2 "consttable_operand" > "X") > + (match_operand 3 "consttable_operand" > "X")] > + UNSPECV_SPLIT_STACK_CALL_ESA))] > + "!TARGET_CPU_ZARCH" > +{ > + gcc_unreachable (); > +} > + [(set_attr "length" "32")]) > + > +(define_insn "split_stack_call_zarch_<mode>" > + [(set (pc) (label_ref (match_operand 0 "" ""))) > + (set (reg:P 1) (unspec_volatile [(match_operand 1 "bras_sym_operand" "X") > + (match_operand 2 "consttable_operand" > "X") > + (match_operand 3 "consttable_operand" > "X")] > + UNSPECV_SPLIT_STACK_CALL_ZARCH))] > + "TARGET_CPU_ZARCH" > +{ > + gcc_unreachable (); > +} > + [(set_attr "length" "12")]) > + > +(define_insn "split_stack_cond_call_zarch_<mode>" > + [(set (pc) > + (if_then_else > + (match_operand 4 "" "") > + (label_ref (match_operand 0 "" "")) > + (pc))) > + (set (reg:P 1) (unspec_volatile [(match_operand 1 "bras_sym_operand" "X") > + (match_operand 2 "consttable_operand" > "X") > + (match_operand 3 "consttable_operand" > "X")] > + UNSPECV_SPLIT_STACK_CALL_ZARCH))] > + "TARGET_CPU_ZARCH" > +{ > + gcc_unreachable (); > +} > + [(set_attr "length" "12")]) > + > +;; If there are operand 0 bytes available on the stack, jump to > +;; operand 1. > + > +(define_expand "split_stack_space_check" > + [(set (pc) (if_then_else > + (ltu (minus (reg 15) > + (match_operand 0 "register_operand")) > + (unspec [(const_int 0)] UNSPEC_STACK_CHECK)) > + (label_ref (match_operand 1)) > + (pc)))] > + "" > +{ > + /* Offset from thread pointer to __private_ss. */ > + int psso = TARGET_64BIT ? 0x38 : 0x20; > + rtx tp = s390_get_thread_pointer (); > + rtx guard = gen_rtx_MEM (Pmode, plus_constant (Pmode, tp, psso)); > + rtx reg = gen_reg_rtx (Pmode); > + rtx cc; > + if (TARGET_64BIT) > + emit_insn (gen_subdi3 (reg, stack_pointer_rtx, operands[0])); > + else > + emit_insn (gen_subsi3 (reg, stack_pointer_rtx, operands[0])); > + cc = s390_emit_compare (GT, reg, guard); > + s390_emit_jump (operands[1], cc); > + > + DONE; > +}) > + > +;; A basr for use in split stack prologue. > + > +(define_insn "split_stack_sibcall_basr" > + [(set (pc) (label_ref (match_operand 1 "" ""))) > + (set (reg:SI 1) (unspec_volatile [(match_operand 0 "register_operand" > "a")] > + UNSPECV_SPLIT_STACK_SIBCALL))] > + "!TARGET_CPU_ZARCH" > + "basr\t%%r1, %0" > + [(set_attr "op_type" "RR") > + (set_attr "type" "jsr")]) > + > +;; A jg with minimal fuss for use in split stack prologue. > + > +(define_insn "split_stack_sibcall_<mode>" > + [(set (pc) (label_ref (match_operand 1 "" ""))) > + (set (reg:P 1) (unspec_volatile [(match_operand 0 "bras_sym_operand" "X")] > + UNSPECV_SPLIT_STACK_SIBCALL))] > + "TARGET_CPU_ZARCH" > + "jg\t%0" > + [(set_attr "op_type" "RIL") > + (set_attr "type" "branch")]) > + > +;; Also a conditional one. > + > +(define_insn "split_stack_cond_sibcall_<mode>" > + [(set (pc) > + (if_then_else > + (match_operand 1 "" "") > + (label_ref (match_operand 2 "" "")) > + (pc))) > + (set (reg:P 1) (unspec_volatile [(match_operand 0 "bras_sym_operand" "X")] > + UNSPECV_SPLIT_STACK_SIBCALL))] > + "TARGET_CPU_ZARCH" > + "jg%C1\t%0" > + [(set_attr "op_type" "RIL") > + (set_attr "type" "branch")]) > + > +;; An unusual nop instruction used to mark functions with no stack frames > +;; as split-stack aware. > + > +(define_insn "split_stack_marker" > + [(unspec_volatile [(const_int 0)] UNSPECV_SPLIT_STACK_MARKER)] > + "" > + "nopr\t%%r15" > + [(set_attr "op_type" "RR")]) > diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog > index f66646c..ff60571 100644 > --- a/libgcc/ChangeLog > +++ b/libgcc/ChangeLog > @@ -1,3 +1,10 @@ > +2016-01-02 Marcin Kościelnicki <koria...@0x04.net> > + > + * config.host: Use t-stack and t-stack-s390 for s390*-*-linux. > + * config/s390/morestack.S: New file. > + * config/s390/t-stack-s390: New file. > + * generic-morestack.c (__splitstack_find): Add s390-specific code. > + > 2015-12-18 Andris Pavenis <andris.pave...@iki.fi> > > * config.host: Add *-*-msdosdjgpp to lists of i[34567]86-*-* > diff --git a/libgcc/config.host b/libgcc/config.host > index 0a3b879..ce6d259 100644 > --- a/libgcc/config.host > +++ b/libgcc/config.host > @@ -1105,11 +1105,11 @@ rx-*-elf) > tm_file="$tm_file rx/rx-abi.h rx/rx-lib.h" > ;; > s390-*-linux*) > - tmake_file="${tmake_file} s390/t-crtstuff s390/t-linux > s390/32/t-floattodi" > + tmake_file="${tmake_file} s390/t-crtstuff s390/t-linux > s390/32/t-floattodi t-stack s390/t-stack-s390" > md_unwind_header=s390/linux-unwind.h > ;; > s390x-*-linux*) > - tmake_file="${tmake_file} s390/t-crtstuff s390/t-linux" > + tmake_file="${tmake_file} s390/t-crtstuff s390/t-linux t-stack > s390/t-stack-s390" > if test "${host_address}" = 32; then > tmake_file="${tmake_file} s390/32/t-floattodi" > fi > diff --git a/libgcc/config/s390/morestack.S b/libgcc/config/s390/morestack.S > new file mode 100644 > index 0000000..8e26c66 > --- /dev/null > +++ b/libgcc/config/s390/morestack.S > @@ -0,0 +1,718 @@ > +# s390 support for -fsplit-stack. > +# Copyright (C) 2015 Free Software Foundation, Inc. > +# Contributed by Marcin Kościelnicki <koria...@0x04.net>. > + > +# This file is part of GCC. > + > +# GCC is free software; you can redistribute it and/or modify it under > +# the terms of the GNU General Public License as published by the Free > +# Software Foundation; either version 3, or (at your option) any later > +# version. > + > +# GCC is distributed in the hope that it will be useful, but WITHOUT ANY > +# WARRANTY; without even the implied warranty of MERCHANTABILITY or > +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License > +# for more details. > + > +# Under Section 7 of GPL version 3, you are granted additional > +# permissions described in the GCC Runtime Library Exception, version > +# 3.1, as published by the Free Software Foundation. > + > +# You should have received a copy of the GNU General Public License and > +# a copy of the GCC Runtime Library Exception along with this program; > +# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see > +# <http://www.gnu.org/licenses/>. > + > +# Excess space needed to call ld.so resolver for lazy plt > +# resolution. Go uses sigaltstack so this doesn't need to > +# also cover signal frame size. > +#define BACKOFF 0x1000 > + > +# The __morestack function. > + > + .global __morestack > + .hidden __morestack > + > + .type __morestack,@function > + > +__morestack: > +.LFB1: > + .cfi_startproc > + > + > +#ifndef __s390x__ > + > + > +# The 31-bit __morestack function. > + > + # We use a cleanup to restore the stack guard if an exception > + # is thrown through this code. > +#ifndef __PIC__ > + .cfi_personality 0,__gcc_personality_v0 > + .cfi_lsda 0,.LLSDA1 > +#else > + .cfi_personality 0x9b,DW.ref.__gcc_personality_v0 > + .cfi_lsda 0x1b,.LLSDA1 > +#endif > + > + stm %r2, %r15, 0x8(%r15) # Save %r2-%r15. > + .cfi_offset %r6, -0x48 > + .cfi_offset %r7, -0x44 > + .cfi_offset %r8, -0x40 > + .cfi_offset %r9, -0x3c > + .cfi_offset %r10, -0x38 > + .cfi_offset %r11, -0x34 > + .cfi_offset %r12, -0x30 > + .cfi_offset %r13, -0x2c > + .cfi_offset %r14, -0x28 > + .cfi_offset %r15, -0x24 > + lr %r11, %r15 # Make frame pointer for vararg. > + .cfi_def_cfa_register %r11 > + ahi %r15, -0x60 # 0x60 for standard frame. > + st %r11, 0(%r15) # Save back chain. > + lr %r8, %r0 # Save %r0 (static chain). > + > + basr %r13, 0 # .Lmsl to %r13 > +.Lmsl: > + > + # %r1 may point directly to the parameter area (zarch), or right after > + # the basr instruction that called us (esa). In the first case, > + # the pointer is already aligned. In the second case, we may need to > + # align it up to 4 bytes to get to the parameters. > + la %r10, 3(%r1) > + lhi %r7, -4 > + nr %r10, %r7 # %r10 = (%r1 + 3) & ~3 > + > + l %r7, 0(%r10) # Required frame size to %r7 > + ear %r1, %a0 # Extract thread pointer. > + l %r1, 0x20(%r1) # Get stack bounduary > + ar %r1, %r7 # Stack bounduary + frame size > + a %r1, 4(%r10) # + stack param size > + clr %r1, %r15 # Compare with current stack pointer > + jle .Lnoalloc # guard > sp - frame-size: need alloc > + > + l %r1, .Lmslbs-.Lmsl(%r13) # __morestack_block_signals > +#ifdef __PIC__ > + bas %r14, 0(%r1, %r13) > +#else > + basr %r14, %r1 > +#endif > + > + # We abuse one of caller's fpr save slots (which we don't use for fprs) > + # as a local variable. Not needed here, but done to be consistent with > + # the below use. > + ahi %r7, BACKOFF # Bump requested size a bit. > + st %r7, 0x40(%r11) # Stuff frame size on stack. > + la %r2, 0x40(%r11) # Pass its address as parameter. > + la %r3, 0x60(%r11) # Caller's stack parameters. > + l %r4, 4(%r10) # Size of stack paremeters. > + > + l %r1, .Lmslgms-.Lmsl(%r13) # __generic_morestack > +#ifdef __PIC__ > + bas %r14, 0(%r1, %r13) > +#else > + basr %r14, %r1 > +#endif > + > + lr %r15, %r2 # Switch to the new stack. > + ahi %r15, -0x60 # Make a stack frame on it. > + st %r11, 0(%r15) # Save back chain. > + > + s %r2, 0x40(%r11) # The end of stack space. > + ahi %r2, BACKOFF # Back off a bit. > + ear %r1, %a0 # Extract thread pointer. > +.LEHB0: > + st %r2, 0x20(%r1) # Save the new stack boundary. > + > + l %r1, .Lmslubs-.Lmsl(%r13) # __morestack_unblock_signals > +#ifdef __PIC__ > + bas %r14, 0(%r1, %r13) > +#else > + basr %r14, %r1 > +#endif > + > + lr %r0, %r8 # Static chain. > + lm %r2, %r6, 0x8(%r11) # Paremeter registers. > + > + # Third parameter is address of function meat - address of parameter > + # block. > + a %r10, 0x8(%r10) > + > + # Leave vararg pointer in %r1, in case function uses it > + la %r1, 0x60(%r11) > + > + # State of registers: > + # %r0: Static chain from entry. > + # %r1: Vararg pointer. > + # %r2-%r6: Parameters from entry. > + # %r7-%r10: Indeterminate. > + # %r11: Frame pointer (%r15 from entry). > + # %r12: Indeterminate. > + # %r13: Literal pool address. > + # %r14: Return address. > + # %r15: Stack pointer. > + basr %r14, %r10 # Call our caller. > + > + stm %r2, %r3, 0x8(%r11) # Save return registers. > + > + l %r1, .Lmslbs-.Lmsl(%r13) # __morestack_block_signals > +#ifdef __PIC__ > + bas %r14, 0(%r1, %r13) > +#else > + basr %r14, %r1 > +#endif > + > + # We need a stack slot now, but have no good way to get it - the frame > + # on new stack had to be exactly 0x60 bytes, or stack parameters would > + # be passed wrong. Abuse fpr save area in caller's frame (we don't > + # save actual fprs). > + la %r2, 0x40(%r11) > + l %r1, .Lmslgrs-.Lmsl(%r13) # __generic_releasestack > +#ifdef __PIC__ > + bas %r14, 0(%r1, %r13) > +#else > + basr %r14, %r1 > +#endif > + > + s %r2, 0x40(%r11) # Subtract available space. > + ahi %r2, BACKOFF # Back off a bit. > + ear %r1, %a0 # Extract thread pointer. > +.LEHE0: > + st %r2, 0x20(%r1) # Save the new stack boundary. > + > + # We need to restore the old stack pointer before unblocking signals. > + # We also need 0x60 bytes for a stack frame. Since we had a stack > + # frame at this place before the stack switch, there's no need to > + # write the back chain again. > + lr %r15, %r11 > + ahi %r15, -0x60 > + > + l %r1, .Lmslubs-.Lmsl(%r13) # __morestack_unblock_signals > +#ifdef __PIC__ > + bas %r14, 0(%r1, %r13) > +#else > + basr %r14, %r1 > +#endif > + > + lm %r2, %r15, 0x8(%r11) # Restore all registers. > + .cfi_remember_state > + .cfi_restore %r15 > + .cfi_restore %r14 > + .cfi_restore %r13 > + .cfi_restore %r12 > + .cfi_restore %r11 > + .cfi_restore %r10 > + .cfi_restore %r9 > + .cfi_restore %r8 > + .cfi_restore %r7 > + .cfi_restore %r6 > + .cfi_def_cfa_register %r15 > + br %r14 # Return to caller's caller. > + > +# Executed if no new stack allocation is needed. > + > +.Lnoalloc: > + .cfi_restore_state > + # We may need to copy stack parameters. > + l %r9, 0x4(%r10) # Load stack parameter size. > + ltr %r9, %r9 # And check if it's 0. > + je .Lnostackparm # Skip the copy if not needed. > + sr %r15, %r9 # Make space on the stack. > + la %r8, 0x60(%r15) # Destination. > + la %r12, 0x60(%r11) # Source. > + lr %r13, %r9 # Source size. > +.Lcopy: > + mvcle %r8, %r12, 0 # Copy. > + jo .Lcopy > + > +.Lnostackparm: > + # Third parameter is address of function meat - address of parameter > + # block. > + a %r10, 0x8(%r10) > + > + # Leave vararg pointer in %r1, in case function uses it > + la %r1, 0x60(%r11) > + > + # OK, no stack allocation needed. We still follow the protocol and > + # call our caller - it doesn't cost much and makes sure vararg works. > + # No need to set any registers here - %r0 and %r2-%r6 weren't modified. > + basr %r14, %r10 # Call our caller. > + > + lm %r6, %r15, 0x18(%r11) # Restore all callee-saved registers. > + .cfi_remember_state > + .cfi_restore %r15 > + .cfi_restore %r14 > + .cfi_restore %r13 > + .cfi_restore %r12 > + .cfi_restore %r11 > + .cfi_restore %r10 > + .cfi_restore %r9 > + .cfi_restore %r8 > + .cfi_restore %r7 > + .cfi_restore %r6 > + .cfi_def_cfa_register %r15 > + br %r14 # Return to caller's caller. > + > +# This is the cleanup code called by the stack unwinder when unwinding > +# through the code between .LEHB0 and .LEHE0 above. > + > +.L1: > + .cfi_restore_state > + lr %r2, %r11 # Stack pointer after resume. > + l %r1, .Lmslgfs-.Lmsl(%r13) # __generic_findstack > +#ifdef __PIC__ > + bas %r14, 0(%r1, %r13) > +#else > + basr %r14, %r1 > +#endif > + lr %r3, %r11 # Get the stack pointer. > + sr %r3, %r2 # Subtract available space. > + ahi %r3, BACKOFF # Back off a bit. > + ear %r1, %a0 # Extract thread pointer. > + st %r3, 0x20(%r1) # Save the new stack boundary. > + > + lr %r2, %r6 # Exception header. > +#ifdef __PIC__ > + l %r12, .Lmslgot-.Lmsl(%r13) > + ar %r12, %r13 > + l %r1, .Lmslunw-.Lmsl(%r13) > + bas %r14, 0(%r1, %r12) > +#else > + l %r1, .Lmslunw-.Lmsl(%r13) > + basr %r14, %r1 > +#endif > + > +# Literal pool. > + > +.align 4 > +#ifdef __PIC__ > +.Lmslbs: > + .long __morestack_block_signals-.Lmsl > +.Lmslubs: > + .long __morestack_unblock_signals-.Lmsl > +.Lmslgms: > + .long __generic_morestack-.Lmsl > +.Lmslgrs: > + .long __generic_releasestack-.Lmsl > +.Lmslgfs: > + .long __generic_findstack-.Lmsl > +.Lmslunw: > + .long _Unwind_Resume@PLTOFF > +.Lmslgot: > + .long _GLOBAL_OFFSET_TABLE_-.Lmsl > +#else > +.Lmslbs: > + .long __morestack_block_signals > +.Lmslubs: > + .long __morestack_unblock_signals > +.Lmslgms: > + .long __generic_morestack > +.Lmslgrs: > + .long __generic_releasestack > +.Lmslgfs: > + .long __generic_findstack > +.Lmslunw: > + .long _Unwind_Resume > +#endif > + > +#else /* defined(__s390x__) */ > + > + > +# The 64-bit __morestack function. > + > + # We use a cleanup to restore the stack guard if an exception > + # is thrown through this code. > +#ifndef __PIC__ > + .cfi_personality 0x3,__gcc_personality_v0 > + .cfi_lsda 0x3,.LLSDA1 > +#else > + .cfi_personality 0x9b,DW.ref.__gcc_personality_v0 > + .cfi_lsda 0x1b,.LLSDA1 > +#endif > + > + stmg %r2, %r15, 0x10(%r15) # Save %r2-%r15. > + .cfi_offset %r6, -0x70 > + .cfi_offset %r7, -0x68 > + .cfi_offset %r8, -0x60 > + .cfi_offset %r9, -0x58 > + .cfi_offset %r10, -0x50 > + .cfi_offset %r11, -0x48 > + .cfi_offset %r12, -0x40 > + .cfi_offset %r13, -0x38 > + .cfi_offset %r14, -0x30 > + .cfi_offset %r15, -0x28 > + lgr %r11, %r15 # Make frame pointer for vararg. > + .cfi_def_cfa_register %r11 > + aghi %r15, -0xa0 # 0xa0 for standard frame. > + stg %r11, 0(%r15) # Save back chain. > + lgr %r8, %r0 # Save %r0 (static chain). > + lgr %r10, %r1 # Save %r1 (address of parameter block). > + > + lg %r7, 0(%r10) # Required frame size to %r7 > + ear %r1, %a0 > + sllg %r1, %r1, 32 > + ear %r1, %a1 # Extract thread pointer. > + lg %r1, 0x38(%r1) # Get stack bounduary > + agr %r1, %r7 # Stack bounduary + frame size > + ag %r1, 8(%r10) # + stack param size > + clgr %r1, %r15 # Compare with current stack pointer > + jle .Lnoalloc # guard > sp - frame-size: need alloc > + > + brasl %r14, __morestack_block_signals > + > + # We abuse one of caller's fpr save slots (which we don't use for fprs) > + # as a local variable. Not needed here, but done to be consistent with > + # the below use. > + aghi %r7, BACKOFF # Bump requested size a bit. > + stg %r7, 0x80(%r11) # Stuff frame size on stack. > + la %r2, 0x80(%r11) # Pass its address as parameter. > + la %r3, 0xa0(%r11) # Caller's stack parameters. > + lg %r4, 8(%r10) # Size of stack paremeters. > + brasl %r14, __generic_morestack > + > + lgr %r15, %r2 # Switch to the new stack. > + aghi %r15, -0xa0 # Make a stack frame on it. > + stg %r11, 0(%r15) # Save back chain. > + > + sg %r2, 0x80(%r11) # The end of stack space. > + aghi %r2, BACKOFF # Back off a bit. > + ear %r1, %a0 > + sllg %r1, %r1, 32 > + ear %r1, %a1 # Extract thread pointer. > +.LEHB0: > + stg %r2, 0x38(%r1) # Save the new stack boundary. > + > + brasl %r14, __morestack_unblock_signals > + > + lgr %r0, %r8 # Static chain. > + lmg %r2, %r6, 0x10(%r11) # Paremeter registers. > + > + # Third parameter is address of function meat - address of parameter > + # block. > + ag %r10, 0x10(%r10) > + > + # Leave vararg pointer in %r1, in case function uses it > + la %r1, 0xa0(%r11) > + > + # State of registers: > + # %r0: Static chain from entry. > + # %r1: Vararg pointer. > + # %r2-%r6: Parameters from entry. > + # %r7-%r10: Indeterminate. > + # %r11: Frame pointer (%r15 from entry). > + # %r12-%r13: Indeterminate. > + # %r14: Return address. > + # %r15: Stack pointer. > + basr %r14, %r10 # Call our caller. > + > + stg %r2, 0x10(%r11) # Save return register. > + > + brasl %r14, __morestack_block_signals > + > + # We need a stack slot now, but have no good way to get it - the frame > + # on new stack had to be exactly 0xa0 bytes, or stack parameters would > + # be passed wrong. Abuse fpr save area in caller's frame (we don't > + # save actual fprs). > + la %r2, 0x80(%r11) > + brasl %r14, __generic_releasestack > + > + sg %r2, 0x80(%r11) # Subtract available space. > + aghi %r2, BACKOFF # Back off a bit. > + ear %r1, %a0 > + sllg %r1, %r1, 32 > + ear %r1, %a1 # Extract thread pointer. > +.LEHE0: > + stg %r2, 0x38(%r1) # Save the new stack boundary. > + > + # We need to restore the old stack pointer before unblocking signals. > + # We also need 0xa0 bytes for a stack frame. Since we had a stack > + # frame at this place before the stack switch, there's no need to > + # write the back chain again. > + lgr %r15, %r11 > + aghi %r15, -0xa0 > + > + brasl %r14, __morestack_unblock_signals > + > + lmg %r2, %r15, 0x10(%r11) # Restore all registers. > + .cfi_remember_state > + .cfi_restore %r15 > + .cfi_restore %r14 > + .cfi_restore %r13 > + .cfi_restore %r12 > + .cfi_restore %r11 > + .cfi_restore %r10 > + .cfi_restore %r9 > + .cfi_restore %r8 > + .cfi_restore %r7 > + .cfi_restore %r6 > + .cfi_def_cfa_register %r15 > + br %r14 # Return to caller's caller. > + > +# Executed if no new stack allocation is needed. > + > +.Lnoalloc: > + .cfi_restore_state > + # We may need to copy stack parameters. > + lg %r9, 0x8(%r10) # Load stack parameter size. > + ltgr %r9, %r9 # Check if it's 0. > + je .Lnostackparm # Skip the copy if not needed. > + sgr %r15, %r9 # Make space on the stack. > + la %r8, 0xa0(%r15) # Destination. > + la %r12, 0xa0(%r11) # Source. > + lgr %r13, %r9 # Source size. > +.Lcopy: > + mvcle %r8, %r12, 0 # Copy. > + jo .Lcopy > + > +.Lnostackparm: > + # Third parameter is address of function meat - address of parameter > + # block. > + ag %r10, 0x10(%r10) > + > + # Leave vararg pointer in %r1, in case function uses it > + la %r1, 0xa0(%r11) > + > + # OK, no stack allocation needed. We still follow the protocol and > + # call our caller - it doesn't cost much and makes sure vararg works. > + # No need to set any registers here - %r0 and %r2-%r6 weren't modified. > + basr %r14, %r10 # Call our caller. > + > + lmg %r6, %r15, 0x30(%r11) # Restore all callee-saved registers. > + .cfi_remember_state > + .cfi_restore %r15 > + .cfi_restore %r14 > + .cfi_restore %r13 > + .cfi_restore %r12 > + .cfi_restore %r11 > + .cfi_restore %r10 > + .cfi_restore %r9 > + .cfi_restore %r8 > + .cfi_restore %r7 > + .cfi_restore %r6 > + .cfi_def_cfa_register %r15 > + br %r14 # Return to caller's caller. > + > +# This is the cleanup code called by the stack unwinder when unwinding > +# through the code between .LEHB0 and .LEHE0 above. > + > +.L1: > + .cfi_restore_state > + lgr %r2, %r11 # Stack pointer after resume. > + brasl %r14, __generic_findstack > + lgr %r3, %r11 # Get the stack pointer. > + sgr %r3, %r2 # Subtract available space. > + aghi %r3, BACKOFF # Back off a bit. > + ear %r1, %a0 > + sllg %r1, %r1, 32 > + ear %r1, %a1 # Extract thread pointer. > + stg %r3, 0x38(%r1) # Save the new stack boundary. > + > + lgr %r2, %r6 # Exception header. > +#ifdef __PIC__ > + brasl %r14, _Unwind_Resume@PLT > +#else > + brasl %r14, _Unwind_Resume > +#endif > + > +#endif /* defined(__s390x__) */ > + > + .cfi_endproc > + .size __morestack, . - __morestack > + > + > +# The exception table. This tells the personality routine to execute > +# the exception handler. > + > + .section .gcc_except_table,"a",@progbits > + .align 4 > +.LLSDA1: > + .byte 0xff # @LPStart format (omit) > + .byte 0xff # @TType format (omit) > + .byte 0x1 # call-site format (uleb128) > + .uleb128 .LLSDACSE1-.LLSDACSB1 # Call-site table length > +.LLSDACSB1: > + .uleb128 .LEHB0-.LFB1 # region 0 start > + .uleb128 .LEHE0-.LEHB0 # length > + .uleb128 .L1-.LFB1 # landing pad > + .uleb128 0 # action > +.LLSDACSE1: > + > + > + .global __gcc_personality_v0 > +#ifdef __PIC__ > + # Build a position independent reference to the basic > + # personality function. > + .hidden DW.ref.__gcc_personality_v0 > + .weak DW.ref.__gcc_personality_v0 > + .section > .data.DW.ref.__gcc_personality_v0,"awG",@progbits,DW.ref.__gcc_personality_v0,comdat > + .type DW.ref.__gcc_personality_v0, @object > +DW.ref.__gcc_personality_v0: > +#ifndef __LP64__ > + .align 4 > + .size DW.ref.__gcc_personality_v0, 4 > + .long __gcc_personality_v0 > +#else > + .align 8 > + .size DW.ref.__gcc_personality_v0, 8 > + .quad __gcc_personality_v0 > +#endif > +#endif > + > + > + > +# Initialize the stack test value when the program starts or when a > +# new thread starts. We don't know how large the main stack is, so we > +# guess conservatively. We might be able to use getrlimit here. > + > + .text > + .global __stack_split_initialize > + .hidden __stack_split_initialize > + > + .type __stack_split_initialize, @function > + > +__stack_split_initialize: > + > +#ifndef __s390x__ > + > + ear %r1, %a0 > + lr %r0, %r15 > + ahi %r0, -0x4000 # We should have at least 16K. > + st %r0, 0x20(%r1) > + > + lr %r2, %r15 > + lhi %r3, 0x4000 > +#ifdef __PIC__ > + # Cannot do a tail call - we'll go through PLT, so we need GOT address > + # in %r12, which is callee-saved. > + stm %r12, %r15, 0x30(%r15) > + basr %r13, 0 > +.Lssi0: > + ahi %r15, -0x60 > + l %r12, .Lssi2-.Lssi0(%r13) > + ar %r12, %r13 > + l %r1, .Lssi1-.Lssi0(%r13) > + bas %r14, 0(%r1, %r12) > + lm %r12, %r15, 0x90(%r15) > + br %r14 > + > +.align 4 > +.Lssi1: > + .long __generic_morestack_set_initial_sp@PLTOFF > +.Lssi2: > + .long _GLOBAL_OFFSET_TABLE_-.Lssi0 > + > +#else > + basr %r1, 0 > +.Lssi0: > + l %r1, .Lssi1-.Lssi0(%r1) > + br %r1 # Tail call > + > +.align 4 > +.Lssi1: > + .long __generic_morestack_set_initial_sp > +#endif > + > +#else /* defined(__s390x__) */ > + > + ear %r1, %a0 > + sllg %r1, %r1, 32 > + ear %r1, %a1 > + lgr %r0, %r15 > + aghi %r0, -0x4000 # We should have at least 16K. > + stg %r0, 0x38(%r1) > + > + lgr %r2, %r15 > + lghi %r3, 0x4000 > +#ifdef __PIC__ > + jg __generic_morestack_set_initial_sp@PLT # Tail call > +#else > + jg __generic_morestack_set_initial_sp # Tail call > +#endif > + > +#endif /* defined(__s390x__) */ > + > + .size __stack_split_initialize, . - __stack_split_initialize > + > +# Routines to get and set the guard, for __splitstack_getcontext, > +# __splitstack_setcontext, and __splitstack_makecontext. > + > +# void *__morestack_get_guard (void) returns the current stack guard. > + .text > + .global __morestack_get_guard > + .hidden __morestack_get_guard > + > + .type __morestack_get_guard,@function > + > +__morestack_get_guard: > + > +#ifndef __s390x__ > + ear %r1, %a0 > + l %r2, 0x20(%r1) > +#else > + ear %r1, %a0 > + sllg %r1, %r1, 32 > + ear %r1, %a1 > + lg %r2, 0x38(%r1) > +#endif > + br %r14 > + > + .size __morestack_get_guard, . - __morestack_get_guard > + > +# void __morestack_set_guard (void *) sets the stack guard. > + .global __morestack_set_guard > + .hidden __morestack_set_guard > + > + .type __morestack_set_guard,@function > + > +__morestack_set_guard: > + > +#ifndef __s390x__ > + ear %r1, %a0 > + st %r2, 0x20(%r1) > +#else > + ear %r1, %a0 > + sllg %r1, %r1, 32 > + ear %r1, %a1 > + stg %r2, 0x38(%r1) > +#endif > + br %r14 > + > + .size __morestack_set_guard, . - __morestack_set_guard > + > +# void *__morestack_make_guard (void *, size_t) returns the stack > +# guard value for a stack. > + .global __morestack_make_guard > + .hidden __morestack_make_guard > + > + .type __morestack_make_guard,@function > + > +__morestack_make_guard: > + > +#ifndef __s390x__ > + sr %r2, %r3 > + ahi %r2, BACKOFF > +#else > + sgr %r2, %r3 > + aghi %r2, BACKOFF > +#endif > + br %r14 > + > + .size __morestack_make_guard, . - __morestack_make_guard > + > +# Make __stack_split_initialize a high priority constructor. > + > + .section .ctors.65535,"aw",@progbits > + > +#ifndef __LP64__ > + .align 4 > + .long __stack_split_initialize > + .long __morestack_load_mmap > +#else > + .align 8 > + .quad __stack_split_initialize > + .quad __morestack_load_mmap > +#endif > + > + .section .note.GNU-stack,"",@progbits > + .section .note.GNU-split-stack,"",@progbits > + .section .note.GNU-no-split-stack,"",@progbits > diff --git a/libgcc/config/s390/t-stack-s390 b/libgcc/config/s390/t-stack-s390 > new file mode 100644 > index 0000000..4c959b0 > --- /dev/null > +++ b/libgcc/config/s390/t-stack-s390 > @@ -0,0 +1,2 @@ > +# Makefile fragment to support -fsplit-stack for s390. > +LIB2ADD_ST += $(srcdir)/config/s390/morestack.S > diff --git a/libgcc/generic-morestack.c b/libgcc/generic-morestack.c > index a10559b..8109c1a 100644 > --- a/libgcc/generic-morestack.c > +++ b/libgcc/generic-morestack.c > @@ -939,6 +939,10 @@ __splitstack_find (void *segment_arg, void *sp, size_t > *len, > #elif defined (__i386__) > nsp -= 6 * sizeof (void *); > #elif defined __powerpc64__ > +#elif defined __s390x__ > + nsp -= 2 * 160; > +#elif defined __s390__ > + nsp -= 2 * 96; > #else > #error "unrecognized target" > #endif >