libgcc/ChangeLog:

        * config.host: Use t-stack and t-stack-s390 for s390*-*-linux.
        * config/s390/morestack.S: New file.
        * config/s390/t-stack-s390: New file.
        * generic-morestack.c (__splitstack_find): Add s390-specific code.

gcc/ChangeLog:

        * common/config/s390/s390-common.c (s390_supports_split_stack):
        New function.
        (TARGET_SUPPORTS_SPLIT_STACK): New macro.
        * config/s390/s390-protos.h: Add s390_expand_split_stack_prologue.
        * config/s390/s390.c (struct machine_function): New field
        split_stack_varargs_pointer.
        (s390_register_info): Mark r12 as clobbered if it'll be used as temp
        in s390_emit_prologue.
        (s390_emit_prologue): Use r12 as temp if r1 is taken by split-stack
        vararg pointer.
        (morestack_ref): New global.
        (SPLIT_STACK_AVAILABLE): New macro.
        (s390_expand_split_stack_prologue): New function.
        (s390_expand_split_stack_call): New function.
        (s390_live_on_entry): New function.
        (s390_va_start): Use split-stack vararg pointer if appropriate.
        (s390_reorg): Lower the split-stack pseudo-insns.
        (s390_asm_file_end): Emit the split-stack note sections.
        (TARGET_EXTRA_LIVE_ON_ENTRY): New macro.
        * config/s390/s390.md (UNSPEC_STACK_CHECK): New unspec.
        (UNSPECV_SPLIT_STACK_CALL): New unspec.
        (UNSPECV_SPLIT_STACK_SIBCALL): New unspec.
        (UNSPECV_SPLIT_STACK_MARKER): New unspec.
        (split_stack_prologue): New expand.
        (split_stack_call): New expand.
        (split_stack_call_*): New insn.
        (split_stack_cond_call): New expand.
        (split_stack_cond_call_*): New insn.
        (split_stack_space_check): New expand.
        (split_stack_sibcall): New expand.
        (split_stack_sibcall_*): New insn.
        (split_stack_cond_sibcall): New expand.
        (split_stack_cond_sibcall_*): New insn.
        (split_stack_marker): New insn.
---
I've implemented most of your requested changes, with two exceptions:

- I don't use virtual_incoming_args_rtx in s390_expand_split_stack_prologue,
  since this causes constraint error - I suppose it just cannot be used after
  reload.
- It seems to me there's no problem with TPF and r1 - the conditional you
  mention is meant to avoid modifying r14 (which we do - by aiming at r1 and
  r12 for arg pointer and temp, respectively), not to ensure use of r1 as the
  temporary.  Unless there's a good reason to avoid modifying r12, the code
  seems fine to me.

As for the testcase we discussed, I'll submit it as a separate patch.


 gcc/ChangeLog                        |  37 +++
 gcc/common/config/s390/s390-common.c |  14 +
 gcc/config/s390/s390-protos.h        |   1 +
 gcc/config/s390/s390.c               | 321 +++++++++++++++++-
 gcc/config/s390/s390.md              | 177 ++++++++++
 libgcc/ChangeLog                     |   7 +
 libgcc/config.host                   |   4 +-
 libgcc/config/s390/morestack.S       | 609 +++++++++++++++++++++++++++++++++++
 libgcc/config/s390/t-stack-s390      |   2 +
 libgcc/generic-morestack.c           |   4 +
 10 files changed, 1170 insertions(+), 6 deletions(-)
 create mode 100644 libgcc/config/s390/morestack.S
 create mode 100644 libgcc/config/s390/t-stack-s390

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 9a2cec8..af86079 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,40 @@
+2016-02-02  Marcin Kościelnicki  <koria...@0x04.net>
+
+       * common/config/s390/s390-common.c (s390_supports_split_stack):
+       New function.
+       (TARGET_SUPPORTS_SPLIT_STACK): New macro.
+       * config/s390/s390-protos.h: Add s390_expand_split_stack_prologue.
+       * config/s390/s390.c (struct machine_function): New field
+       split_stack_varargs_pointer.
+       (s390_register_info): Mark r12 as clobbered if it'll be used as temp
+       in s390_emit_prologue.
+       (s390_emit_prologue): Use r12 as temp if r1 is taken by split-stack
+       vararg pointer.
+       (morestack_ref): New global.
+       (SPLIT_STACK_AVAILABLE): New macro.
+       (s390_expand_split_stack_prologue): New function.
+       (s390_expand_split_stack_call): New function.
+       (s390_live_on_entry): New function.
+       (s390_va_start): Use split-stack vararg pointer if appropriate.
+       (s390_reorg): Lower the split-stack pseudo-insns.
+       (s390_asm_file_end): Emit the split-stack note sections.
+       (TARGET_EXTRA_LIVE_ON_ENTRY): New macro.
+       * config/s390/s390.md (UNSPEC_STACK_CHECK): New unspec.
+       (UNSPECV_SPLIT_STACK_CALL): New unspec.
+       (UNSPECV_SPLIT_STACK_SIBCALL): New unspec.
+       (UNSPECV_SPLIT_STACK_MARKER): New unspec.
+       (split_stack_prologue): New expand.
+       (split_stack_call): New expand.
+       (split_stack_call_*): New insn.
+       (split_stack_cond_call): New expand.
+       (split_stack_cond_call_*): New insn.
+       (split_stack_space_check): New expand.
+       (split_stack_sibcall): New expand.
+       (split_stack_sibcall_*): New insn.
+       (split_stack_cond_sibcall): New expand.
+       (split_stack_cond_sibcall_*): New insn.
+       (split_stack_marker): New insn.
+
 2016-02-02  Thomas Schwinge  <tho...@codesourcery.com>
 
        * omp-builtins.def (BUILT_IN_GOACC_HOST_DATA): Remove.
diff --git a/gcc/common/config/s390/s390-common.c 
b/gcc/common/config/s390/s390-common.c
index 4519c21..1e497e6 100644
--- a/gcc/common/config/s390/s390-common.c
+++ b/gcc/common/config/s390/s390-common.c
@@ -105,6 +105,17 @@ s390_handle_option (struct gcc_options *opts 
ATTRIBUTE_UNUSED,
     }
 }
 
+/* -fsplit-stack uses a field in the TCB, available with glibc-2.23.
+   We don't verify it, since earlier versions just have padding at
+   its place, which works just as well.  */
+
+static bool
+s390_supports_split_stack (bool report ATTRIBUTE_UNUSED,
+                          struct gcc_options *opts ATTRIBUTE_UNUSED)
+{
+  return true;
+}
+
 #undef TARGET_DEFAULT_TARGET_FLAGS
 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
 
@@ -117,4 +128,7 @@ s390_handle_option (struct gcc_options *opts 
ATTRIBUTE_UNUSED,
 #undef TARGET_OPTION_INIT_STRUCT
 #define TARGET_OPTION_INIT_STRUCT s390_option_init_struct
 
+#undef TARGET_SUPPORTS_SPLIT_STACK
+#define TARGET_SUPPORTS_SPLIT_STACK s390_supports_split_stack
+
 struct gcc_targetm_common targetm_common = TARGETM_COMMON_INITIALIZER;
diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
index 633bc1e..09032c9 100644
--- a/gcc/config/s390/s390-protos.h
+++ b/gcc/config/s390/s390-protos.h
@@ -42,6 +42,7 @@ extern bool s390_handle_option (struct gcc_options *opts 
ATTRIBUTE_UNUSED,
 extern HOST_WIDE_INT s390_initial_elimination_offset (int, int);
 extern void s390_emit_prologue (void);
 extern void s390_emit_epilogue (bool);
+extern void s390_expand_split_stack_prologue (void);
 extern bool s390_can_use_simple_return_insn (void);
 extern bool s390_can_use_return_insn (void);
 extern void s390_function_profiler (FILE *, int);
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 3be64de..59628ba 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -426,6 +426,13 @@ struct GTY(()) machine_function
   /* True if the current function may contain a tbegin clobbering
      FPRs.  */
   bool tbegin_p;
+
+  /* For -fsplit-stack support: A stack local which holds a pointer to
+     the stack arguments for a function with a variable number of
+     arguments.  This is set at the start of the function and is used
+     to initialize the overflow_arg_area field of the va_list
+     structure.  */
+  rtx split_stack_varargs_pointer;
 };
 
 /* Few accessor macros for struct cfun->machine->s390_frame_layout.  */
@@ -9316,9 +9323,13 @@ s390_register_info ()
          cfun_frame_layout.high_fprs++;
       }
 
-  if (flag_pic)
-    clobbered_regs[PIC_OFFSET_TABLE_REGNUM]
-      |= !!df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
+  /* Register 12 is used for GOT address, but also as temp in prologue
+     for split-stack stdarg functions (unless r14 is available).  */
+  clobbered_regs[12]
+    |= ((flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
+       || (flag_split_stack && cfun->stdarg
+           && (crtl->is_leaf || TARGET_TPF_PROFILING
+               || has_hard_reg_initial_val (Pmode, RETURN_REGNUM))));
 
   clobbered_regs[BASE_REGNUM]
     |= (cfun->machine->base_reg
@@ -10446,6 +10457,8 @@ s390_emit_prologue (void)
       && !crtl->is_leaf
       && !TARGET_TPF_PROFILING)
     temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
+  else if (flag_split_stack && cfun->stdarg)
+    temp_reg = gen_rtx_REG (Pmode, 12);
   else
     temp_reg = gen_rtx_REG (Pmode, 1);
 
@@ -10939,6 +10952,234 @@ s300_set_up_by_prologue (hard_reg_set_container *regs)
     SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg));
 }
 
+/* -fsplit-stack support.  */
+
+/* A SYMBOL_REF for __morestack.  */
+static GTY(()) rtx morestack_ref;
+
+/* When using -fsplit-stack, the allocation routines set a field in
+   the TCB to the bottom of the stack plus this much space, measured
+   in bytes.  */
+
+#define SPLIT_STACK_AVAILABLE 1024
+
+/* Emit -fsplit-stack prologue, which goes before the regular function
+   prologue.  */
+
+void
+s390_expand_split_stack_prologue (void)
+{
+  rtx r1, guard, cc;
+  rtx_insn *insn;
+  /* Offset from thread pointer to __private_ss.  */
+  int psso = TARGET_64BIT ? 0x38 : 0x20;
+  /* Pointer size in bytes.  */
+  /* Frame size and argument size - the two parameters to __morestack.  */
+  HOST_WIDE_INT frame_size = cfun_frame_layout.frame_size;
+  /* Align argument size to 8 bytes - simplifies __morestack code.  */
+  HOST_WIDE_INT args_size = crtl->args.size >= 0
+                           ? ((crtl->args.size + 7) & ~7)
+                           : 0;
+  /* Label to be called by __morestack.  */
+  rtx_code_label *call_done = NULL;
+  rtx tmp;
+
+  gcc_assert (flag_split_stack && reload_completed);
+  if (!TARGET_CPU_ZARCH)
+    {
+      sorry ("CPUs older than z900 are not supported for -fsplit-stack");
+      return;
+    }
+
+  r1 = gen_rtx_REG (Pmode, 1);
+
+  /* If no stack frame will be allocated, don't do anything.  */
+  if (!frame_size)
+    {
+      /* But emit a marker that will let linker and indirect function
+        calls recognise this function as split-stack aware.  */
+      emit_insn (gen_split_stack_marker ());
+      if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
+       {
+         /* If va_start is used, just use r15.  */
+         emit_move_insn (r1,
+                        gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+                                      GEN_INT (STACK_POINTER_OFFSET)));
+
+       }
+      return;
+    }
+
+  if (morestack_ref == NULL_RTX)
+    {
+      morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
+      SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
+                                          | SYMBOL_FLAG_FUNCTION);
+    }
+
+  if (CONST_OK_FOR_K (frame_size) || CONST_OK_FOR_Op (frame_size))
+    {
+      /* If frame_size will fit in an add instruction, do a stack space
+        check, and only call __morestack if there's not enough space.  */
+
+      /* Get thread pointer.  r1 is the only register we can always destroy - 
r0
+        could contain a static chain (and cannot be used to address memory
+        anyway), r2-r6 can contain parameters, and r6-r15 are callee-saved.  */
+      emit_move_insn (r1, gen_rtx_REG (Pmode, TP_REGNUM));
+      /* Aim at __private_ss.  */
+      guard = gen_rtx_MEM (Pmode, plus_constant (Pmode, r1, psso));
+
+      /* If less that 1kiB used, skip addition and compare directly with
+        __private_ss.  */
+      if (frame_size > SPLIT_STACK_AVAILABLE)
+       {
+         emit_move_insn (r1, guard);
+         if (TARGET_64BIT)
+           emit_insn (gen_adddi3 (r1, r1, GEN_INT (frame_size)));
+         else
+           emit_insn (gen_addsi3 (r1, r1, GEN_INT (frame_size)));
+         guard = r1;
+       }
+
+      /* Compare the (maybe adjusted) guard with the stack pointer.  */
+      cc = s390_emit_compare (LT, stack_pointer_rtx, guard);
+
+      call_done = gen_label_rtx ();
+
+      tmp = gen_split_stack_cond_call (call_done,
+                                      morestack_ref,
+                                      GEN_INT (frame_size),
+                                      GEN_INT (args_size),
+                                      cc);
+
+      insn = emit_jump_insn (tmp);
+      JUMP_LABEL (insn) = call_done;
+
+      /* Mark the jump as very unlikely to be taken.  */
+      add_int_reg_note (insn, REG_BR_PROB, REG_BR_PROB_BASE / 100);
+
+      if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
+       {
+         /* If va_start is used, and __morestack was not called, just use
+            r15.  */
+         emit_move_insn (r1,
+                        gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+                                      GEN_INT (STACK_POINTER_OFFSET)));
+       }
+    }
+  else
+    {
+      call_done = gen_label_rtx ();
+
+      /* Now, we need to call __morestack.  It has very special calling
+        conventions: it preserves param/return/static chain registers for
+        calling main function body, and looks for its own parameters
+        at %r1 (after aligning it up to a 4 byte bounduary for 31-bit mode). */
+      tmp = gen_split_stack_call (call_done,
+                                 morestack_ref,
+                                 GEN_INT (frame_size),
+                                 GEN_INT (args_size));
+      insn = emit_jump_insn (tmp);
+      JUMP_LABEL (insn) = call_done;
+      emit_barrier ();
+    }
+
+  /* __morestack will call us here.  */
+
+  emit_label (call_done);
+  LABEL_NUSES (call_done) = 1;
+}
+
+/* Generates split-stack call sequence, along with its parameter block.  */
+
+static void
+s390_expand_split_stack_call (rtx_insn *orig_insn,
+                             rtx call_done,
+                             rtx function,
+                             rtx frame_size,
+                             rtx args_size,
+                             rtx cond)
+{
+  rtx_insn *insn = orig_insn;
+  rtx parmbase = gen_label_rtx ();
+  rtx r1 = gen_rtx_REG (Pmode, 1);
+  rtx tmp, tmp2;
+
+  /* %r1 = litbase.  */
+  insn = emit_insn_after (gen_main_base_64 (r1, parmbase), insn);
+  add_reg_note (insn, REG_LABEL_OPERAND, parmbase);
+  LABEL_NUSES (parmbase)++;
+
+  /* jg<cond> __morestack.  */
+  if (cond == NULL)
+    {
+      tmp = gen_split_stack_sibcall (function, call_done);
+      insn = emit_jump_insn_after (tmp, insn);
+    }
+  else
+    {
+      gcc_assert (s390_comparison (cond, VOIDmode));
+      tmp = gen_split_stack_cond_sibcall (function, cond, call_done);
+      insn = emit_jump_insn_after (tmp, insn);
+    }
+  JUMP_LABEL (insn) = call_done;
+  LABEL_NUSES (call_done)++;
+
+  /* Go to .rodata.  */
+  insn = emit_insn_after (gen_pool_section_start (), insn);
+
+  /* Now, we'll emit parameters to __morestack.  First, align to pointer size
+     (this mirrors the alignment done in __morestack - don't touch it).  */
+  insn = emit_insn_after (gen_pool_align (GEN_INT (UNITS_PER_LONG)), insn);
+
+  insn = emit_label_after (parmbase, insn);
+
+  tmp = gen_rtx_UNSPEC_VOLATILE (Pmode,
+                                gen_rtvec (1, frame_size),
+                                UNSPECV_POOL_ENTRY);
+  insn = emit_insn_after (tmp, insn);
+
+  /* Second parameter is size of the arguments passed on stack that
+     __morestack has to copy to the new stack (does not include varargs).  */
+  tmp = gen_rtx_UNSPEC_VOLATILE (Pmode,
+                                gen_rtvec (1, args_size),
+                                UNSPECV_POOL_ENTRY);
+  insn = emit_insn_after (tmp, insn);
+
+  /* Third parameter is offset between start of the parameter block
+     and function body to be called by __morestack.  */
+  tmp = gen_rtx_LABEL_REF (Pmode, parmbase);
+  tmp2 = gen_rtx_LABEL_REF (Pmode, call_done);
+  tmp = gen_rtx_CONST (Pmode,
+                      gen_rtx_MINUS (Pmode, tmp2, tmp));
+  tmp = gen_rtx_UNSPEC_VOLATILE (Pmode,
+                                gen_rtvec (1, tmp),
+                                UNSPECV_POOL_ENTRY);
+  insn = emit_insn_after (tmp, insn);
+  add_reg_note (insn, REG_LABEL_OPERAND, call_done);
+  LABEL_NUSES (call_done)++;
+  add_reg_note (insn, REG_LABEL_OPERAND, parmbase);
+  LABEL_NUSES (parmbase)++;
+
+  /* Return from .rodata.  */
+  insn = emit_insn_after (gen_pool_section_end (), insn);
+
+  delete_insn (orig_insn);
+}
+
+/* We may have to tell the dataflow pass that the split stack prologue
+   is initializing a register.  */
+
+static void
+s390_live_on_entry (bitmap regs)
+{
+  if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
+    {
+      gcc_assert (flag_split_stack);
+      bitmap_set_bit (regs, 1);
+    }
+}
+
 /* Return true if the function can use simple_return to return outside
    of a shrink-wrapped region.  At present shrink-wrapping is supported
    in all cases.  */
@@ -11541,6 +11782,27 @@ s390_va_start (tree valist, rtx nextarg 
ATTRIBUTE_UNUSED)
       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
     }
 
+  if (flag_split_stack
+     && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
+         == NULL)
+     && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
+    {
+      rtx reg;
+      rtx_insn *seq;
+
+      reg = gen_reg_rtx (Pmode);
+      cfun->machine->split_stack_varargs_pointer = reg;
+
+      start_sequence ();
+      emit_move_insn (reg, gen_rtx_REG (Pmode, 1));
+      seq = get_insns ();
+      end_sequence ();
+
+      push_topmost_sequence ();
+      emit_insn_after (seq, entry_of_function ());
+      pop_topmost_sequence ();
+    }
+
   /* Find the overflow area.
      FIXME: This currently is too pessimistic when the vector ABI is
      enabled.  In that case we *always* set up the overflow area
@@ -11549,7 +11811,10 @@ s390_va_start (tree valist, rtx nextarg 
ATTRIBUTE_UNUSED)
       || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG
       || TARGET_VX_ABI)
     {
-      t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
+      if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
+        t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
+      else
+        t = make_tree (TREE_TYPE (ovf), 
cfun->machine->split_stack_varargs_pointer);
 
       off = INTVAL (crtl->args.arg_offset_rtx);
       off = off < 0 ? 0 : off;
@@ -13158,6 +13423,48 @@ s390_reorg (void)
        }
     }
 
+  if (flag_split_stack)
+    {
+      rtx_insn *insn;
+
+      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+       {
+         /* Look for the split-stack fake jump instructions.  */
+         if (!JUMP_P(insn))
+           continue;
+         if (GET_CODE (PATTERN (insn)) != PARALLEL
+             || XVECLEN (PATTERN (insn), 0) != 2)
+           continue;
+         rtx set = XVECEXP (PATTERN (insn), 0, 1);
+         if (GET_CODE (set) != SET)
+           continue;
+         rtx unspec = XEXP(set, 1);
+         if (GET_CODE (unspec) != UNSPEC_VOLATILE)
+           continue;
+         if (XINT (unspec, 1) != UNSPECV_SPLIT_STACK_CALL)
+           continue;
+         rtx set_pc = XVECEXP (PATTERN (insn), 0, 0);
+         rtx function = XVECEXP (unspec, 0, 0);
+         rtx frame_size = XVECEXP (unspec, 0, 1);
+         rtx args_size = XVECEXP (unspec, 0, 2);
+         rtx pc_src = XEXP (set_pc, 1);
+         rtx call_done, cond = NULL_RTX;
+         if (GET_CODE (pc_src) == IF_THEN_ELSE)
+           {
+             cond = XEXP (pc_src, 0);
+             call_done = XEXP (XEXP (pc_src, 1), 0);
+           }
+         else
+           call_done = XEXP (pc_src, 0);
+         s390_expand_split_stack_call (insn,
+                                       call_done,
+                                       function,
+                                       frame_size,
+                                       args_size,
+                                       cond);
+       }
+    }
+
   /* Try to optimize prologue and epilogue further.  */
   s390_optimize_prologue ();
 
@@ -14469,6 +14776,9 @@ s390_asm_file_end (void)
             s390_vector_abi);
 #endif
   file_end_indicate_exec_stack ();
+
+  if (flag_split_stack)
+    file_end_indicate_split_stack ();
 }
 
 /* Return true if TYPE is a vector bool type.  */
@@ -14724,6 +15034,9 @@ s390_invalid_binary_op (int op ATTRIBUTE_UNUSED, 
const_tree type1, const_tree ty
 #undef TARGET_SET_UP_BY_PROLOGUE
 #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
 
+#undef TARGET_EXTRA_LIVE_ON_ENTRY
+#define TARGET_EXTRA_LIVE_ON_ENTRY s390_live_on_entry
+
 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
   s390_use_by_pieces_infrastructure_p
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 9b869d5..771f1cc 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -114,6 +114,9 @@
    UNSPEC_SP_SET
    UNSPEC_SP_TEST
 
+   ; Split stack support
+   UNSPEC_STACK_CHECK
+
    ; Test Data Class (TDC)
    UNSPEC_TDC_INSN
 
@@ -276,6 +279,11 @@
    ; Set and get floating point control register
    UNSPECV_SFPC
    UNSPECV_EFPC
+
+   ; Split stack support
+   UNSPECV_SPLIT_STACK_CALL
+   UNSPECV_SPLIT_STACK_SIBCALL
+   UNSPECV_SPLIT_STACK_MARKER
   ])
 
 ;;
@@ -10907,3 +10915,172 @@
   "TARGET_Z13"
   "lcbb\t%0,%1,%b2"
   [(set_attr "op_type" "VRX")])
+
+; Handle -fsplit-stack.
+
+(define_expand "split_stack_prologue"
+  [(const_int 0)]
+  ""
+{
+  s390_expand_split_stack_prologue ();
+  DONE;
+})
+
+(define_expand "split_stack_call"
+  [(match_operand 0 "" "")
+   (match_operand 1 "bras_sym_operand" "X")
+   (match_operand 2 "consttable_operand" "X")
+   (match_operand 3 "consttable_operand" "X")]
+  "TARGET_CPU_ZARCH"
+{
+  if (TARGET_64BIT)
+    emit_jump_insn (gen_split_stack_call_di (operands[0],
+                                            operands[1],
+                                            operands[2],
+                                            operands[3]));
+  else
+    emit_jump_insn (gen_split_stack_call_si (operands[0],
+                                            operands[1],
+                                            operands[2],
+                                            operands[3]));
+  DONE;
+})
+
+(define_insn "split_stack_call_<mode>"
+  [(set (pc) (label_ref (match_operand 0 "" "")))
+   (set (reg:P 1) (unspec_volatile [(match_operand 1 "bras_sym_operand" "X")
+                                   (match_operand 2 "consttable_operand" "X")
+                                   (match_operand 3 "consttable_operand" "X")]
+                                  UNSPECV_SPLIT_STACK_CALL))]
+  "TARGET_CPU_ZARCH"
+{
+  gcc_unreachable ();
+}
+  [(set_attr "length" "12")])
+
+(define_expand "split_stack_cond_call"
+  [(match_operand 0 "" "")
+   (match_operand 1 "bras_sym_operand" "X")
+   (match_operand 2 "consttable_operand" "X")
+   (match_operand 3 "consttable_operand" "X")
+   (match_operand 4 "" "")]
+  "TARGET_CPU_ZARCH"
+{
+  if (TARGET_64BIT)
+    emit_jump_insn (gen_split_stack_cond_call_di (operands[0],
+                                                 operands[1],
+                                                 operands[2],
+                                                 operands[3],
+                                                 operands[4]));
+  else
+    emit_jump_insn (gen_split_stack_cond_call_si (operands[0],
+                                                 operands[1],
+                                                 operands[2],
+                                                 operands[3],
+                                                 operands[4]));
+  DONE;
+})
+
+(define_insn "split_stack_cond_call_<mode>"
+  [(set (pc)
+       (if_then_else
+         (match_operand 4 "" "")
+         (label_ref (match_operand 0 "" ""))
+         (pc)))
+   (set (reg:P 1) (unspec_volatile [(match_operand 1 "bras_sym_operand" "X")
+                                   (match_operand 2 "consttable_operand" "X")
+                                   (match_operand 3 "consttable_operand" "X")]
+                                  UNSPECV_SPLIT_STACK_CALL))]
+  "TARGET_CPU_ZARCH"
+{
+  gcc_unreachable ();
+}
+  [(set_attr "length" "12")])
+
+;; If there are operand 0 bytes available on the stack, jump to
+;; operand 1.
+
+(define_expand "split_stack_space_check"
+  [(set (pc) (if_then_else
+             (ltu (minus (reg 15)
+                         (match_operand 0 "register_operand"))
+                  (unspec [(const_int 0)] UNSPEC_STACK_CHECK))
+             (label_ref (match_operand 1))
+             (pc)))]
+  ""
+{
+  /* Offset from thread pointer to __private_ss.  */
+  int psso = TARGET_64BIT ? 0x38 : 0x20;
+  rtx tp = s390_get_thread_pointer ();
+  rtx guard = gen_rtx_MEM (Pmode, plus_constant (Pmode, tp, psso));
+  rtx reg = gen_reg_rtx (Pmode);
+  rtx cc;
+  if (TARGET_64BIT)
+    emit_insn (gen_subdi3 (reg, stack_pointer_rtx, operands[0]));
+  else
+    emit_insn (gen_subsi3 (reg, stack_pointer_rtx, operands[0]));
+  cc = s390_emit_compare (GT, reg, guard);
+  s390_emit_jump (operands[1], cc);
+
+  DONE;
+})
+
+;; A jg with minimal fuss for use in split stack prologue.
+
+(define_expand "split_stack_sibcall"
+  [(match_operand 0 "bras_sym_operand" "X")
+   (match_operand 1 "" "")]
+  "TARGET_CPU_ZARCH"
+{
+  if (TARGET_64BIT)
+    emit_jump_insn (gen_split_stack_sibcall_di (operands[0], operands[1]));
+  else
+    emit_jump_insn (gen_split_stack_sibcall_si (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "split_stack_sibcall_<mode>"
+  [(set (pc) (label_ref (match_operand 1 "" "")))
+   (set (reg:P 1) (unspec_volatile [(match_operand 0 "bras_sym_operand" "X")]
+                                  UNSPECV_SPLIT_STACK_SIBCALL))]
+  "TARGET_CPU_ZARCH"
+  "jg\t%0"
+  [(set_attr "op_type" "RIL")
+   (set_attr "type"  "branch")])
+
+;; Also a conditional one.
+
+(define_expand "split_stack_cond_sibcall"
+  [(match_operand 0 "bras_sym_operand" "X")
+   (match_operand 1 "" "")
+   (match_operand 2 "" "")]
+  "TARGET_CPU_ZARCH"
+{
+  if (TARGET_64BIT)
+    emit_jump_insn (gen_split_stack_cond_sibcall_di (operands[0], operands[1], 
operands[2]));
+  else
+    emit_jump_insn (gen_split_stack_cond_sibcall_si (operands[0], operands[1], 
operands[2]));
+  DONE;
+})
+
+(define_insn "split_stack_cond_sibcall_<mode>"
+  [(set (pc)
+       (if_then_else
+         (match_operand 1 "" "")
+         (label_ref (match_operand 2 "" ""))
+         (pc)))
+   (set (reg:P 1) (unspec_volatile [(match_operand 0 "bras_sym_operand" "X")]
+                                  UNSPECV_SPLIT_STACK_SIBCALL))]
+  "TARGET_CPU_ZARCH"
+  "jg%C1\t%0"
+  [(set_attr "op_type" "RIL")
+   (set_attr "type"  "branch")])
+
+;; An unusual nop instruction used to mark functions with no stack frames
+;; as split-stack aware.
+
+(define_insn "split_stack_marker"
+  [(unspec_volatile [(const_int 0)] UNSPECV_SPLIT_STACK_MARKER)]
+  ""
+  "nopr\t%%r15"
+  [(set_attr "op_type" "RR")])
diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog
index 49c7929..3900ab1 100644
--- a/libgcc/ChangeLog
+++ b/libgcc/ChangeLog
@@ -1,3 +1,10 @@
+2016-02-02  Marcin Kościelnicki  <koria...@0x04.net>
+
+       * config.host: Use t-stack and t-stack-s390 for s390*-*-linux.
+       * config/s390/morestack.S: New file.
+       * config/s390/t-stack-s390: New file.
+       * generic-morestack.c (__splitstack_find): Add s390-specific code.
+
 2016-01-25  Jakub Jelinek  <ja...@redhat.com>
 
        PR target/69444
diff --git a/libgcc/config.host b/libgcc/config.host
index d8efd82..2be5f7e 100644
--- a/libgcc/config.host
+++ b/libgcc/config.host
@@ -1114,11 +1114,11 @@ rx-*-elf)
        tm_file="$tm_file rx/rx-abi.h rx/rx-lib.h"
        ;;
 s390-*-linux*)
-       tmake_file="${tmake_file} s390/t-crtstuff s390/t-linux 
s390/32/t-floattodi"
+       tmake_file="${tmake_file} s390/t-crtstuff s390/t-linux 
s390/32/t-floattodi t-stack s390/t-stack-s390"
        md_unwind_header=s390/linux-unwind.h
        ;;
 s390x-*-linux*)
-       tmake_file="${tmake_file} s390/t-crtstuff s390/t-linux"
+       tmake_file="${tmake_file} s390/t-crtstuff s390/t-linux t-stack 
s390/t-stack-s390"
        if test "${host_address}" = 32; then
           tmake_file="${tmake_file} s390/32/t-floattodi"
        fi
diff --git a/libgcc/config/s390/morestack.S b/libgcc/config/s390/morestack.S
new file mode 100644
index 0000000..141dead
--- /dev/null
+++ b/libgcc/config/s390/morestack.S
@@ -0,0 +1,609 @@
+# s390 support for -fsplit-stack.
+# Copyright (C) 2015 Free Software Foundation, Inc.
+# Contributed by Marcin Kościelnicki <koria...@0x04.net>.
+
+# This file is part of GCC.
+
+# GCC is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 3, or (at your option) any later
+# version.
+
+# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# for more details.
+
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the GCC Runtime Library Exception, version
+# 3.1, as published by the Free Software Foundation.
+
+# You should have received a copy of the GNU General Public License and
+# a copy of the GCC Runtime Library Exception along with this program;
+# see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+# <http://www.gnu.org/licenses/>.
+
+# Excess space needed to call ld.so resolver for lazy plt
+# resolution.  Go uses sigaltstack so this doesn't need to
+# also cover signal frame size.
+#define BACKOFF 0x1000
+
+# The __morestack function.
+
+       .global __morestack
+       .hidden __morestack
+
+       .type   __morestack,@function
+
+__morestack:
+.LFB1:
+       .cfi_startproc
+
+
+#ifndef __s390x__
+
+
+# The 31-bit __morestack function.
+
+       # We use a cleanup to restore the stack guard if an exception
+       # is thrown through this code.
+#ifndef __PIC__
+       .cfi_personality 0,__gcc_personality_v0
+       .cfi_lsda 0,.LLSDA1
+#else
+       .cfi_personality 0x9b,DW.ref.__gcc_personality_v0
+       .cfi_lsda 0x1b,.LLSDA1
+#endif
+
+       stm     %r2, %r15, 0x8(%r15)    # Save %r2-%r15.
+       .cfi_offset %r6, -0x48
+       .cfi_offset %r7, -0x44
+       .cfi_offset %r8, -0x40
+       .cfi_offset %r9, -0x3c
+       .cfi_offset %r10, -0x38
+       .cfi_offset %r11, -0x34
+       .cfi_offset %r12, -0x30
+       .cfi_offset %r13, -0x2c
+       .cfi_offset %r14, -0x28
+       .cfi_offset %r15, -0x24
+       lr      %r11, %r15              # Make frame pointer for vararg.
+       .cfi_def_cfa_register %r11
+       ahi     %r15, -0x60             # 0x60 for standard frame.
+       st      %r11, 0(%r15)           # Save back chain.
+       lr      %r8, %r0                # Save %r0 (static chain).
+       lr      %r10, %r1               # Save %r1 (address of parameter block).
+
+       l       %r7, 0(%r10)            # Required frame size to %r7
+       ear     %r1, %a0                # Extract thread pointer.
+       l       %r1, 0x20(%r1)          # Get stack bounduary
+       ar      %r1, %r7                # Stack bounduary + frame size
+       a       %r1, 4(%r10)            # + stack param size
+       clr     %r1, %r15               # Compare with current stack pointer
+       jle     .Lnoalloc               # guard > sp - frame-size: need alloc
+
+       brasl   %r14, __morestack_block_signals
+
+       # We abuse one of caller's fpr save slots (which we don't use for fprs)
+       # as a local variable.  Not needed here, but done to be consistent with
+       # the below use.
+       ahi     %r7, BACKOFF            # Bump requested size a bit.
+       st      %r7, 0x40(%r11)         # Stuff frame size on stack.
+       la      %r2, 0x40(%r11)         # Pass its address as parameter.
+       la      %r3, 0x60(%r11)         # Caller's stack parameters.
+       l       %r4, 4(%r10)            # Size of stack parameters.
+       brasl   %r14, __generic_morestack
+
+       lr      %r15, %r2               # Switch to the new stack.
+       ahi     %r15, -0x60             # Make a stack frame on it.
+       st      %r11, 0(%r15)           # Save back chain.
+
+       s       %r2, 0x40(%r11)         # The end of stack space.
+       ahi     %r2, BACKOFF            # Back off a bit.
+       ear     %r1, %a0                # Extract thread pointer.
+.LEHB0:
+       st      %r2, 0x20(%r1)  # Save the new stack boundary.
+
+       brasl   %r14, __morestack_unblock_signals
+
+       lr      %r0, %r8                # Static chain.
+       lm      %r2, %r6, 0x8(%r11)     # Paremeter registers.
+
+       # Third parameter is address of function meat - address of parameter
+       # block.
+       a       %r10, 0x8(%r10)
+
+       # Leave vararg pointer in %r1, in case function uses it
+       la      %r1, 0x60(%r11)
+
+       # State of registers:
+       # %r0: Static chain from entry.
+       # %r1: Vararg pointer.
+       # %r2-%r6: Parameters from entry.
+       # %r7-%r10: Indeterminate.
+       # %r11: Frame pointer (%r15 from entry).
+       # %r12-%r13: Indeterminate.
+       # %r14: Return address.
+       # %r15: Stack pointer.
+       basr    %r14, %r10              # Call our caller.
+
+       stm     %r2, %r3, 0x8(%r11)     # Save return registers.
+
+       brasl   %r14, __morestack_block_signals
+
+       # We need a stack slot now, but have no good way to get it - the frame
+       # on new stack had to be exactly 0x60 bytes, or stack parameters would
+       # be passed wrong.  Abuse fpr save area in caller's frame (we don't
+       # save actual fprs).
+       la      %r2, 0x40(%r11)
+       brasl   %r14, __generic_releasestack
+
+       s       %r2, 0x40(%r11)         # Subtract available space.
+       ahi     %r2, BACKOFF            # Back off a bit.
+       ear     %r1, %a0                # Extract thread pointer.
+.LEHE0:
+       st      %r2, 0x20(%r1)  # Save the new stack boundary.
+
+       # We need to restore the old stack pointer before unblocking signals.
+       # We also need 0x60 bytes for a stack frame.  Since we had a stack
+       # frame at this place before the stack switch, there's no need to
+       # write the back chain again.
+       lr      %r15, %r11
+       ahi     %r15, -0x60
+
+       brasl   %r14, __morestack_unblock_signals
+
+       lm      %r2, %r15, 0x8(%r11)    # Restore all registers.
+       .cfi_remember_state
+       .cfi_restore %r15
+       .cfi_restore %r14
+       .cfi_restore %r13
+       .cfi_restore %r12
+       .cfi_restore %r11
+       .cfi_restore %r10
+       .cfi_restore %r9
+       .cfi_restore %r8
+       .cfi_restore %r7
+       .cfi_restore %r6
+       .cfi_def_cfa_register %r15
+       br      %r14                    # Return to caller's caller.
+
+# Executed if no new stack allocation is needed.
+
+.Lnoalloc:
+       .cfi_restore_state
+       # We may need to copy stack parameters.
+       l       %r9, 0x4(%r10)          # Load stack parameter size.
+       ltr     %r9, %r9                # And check if it's 0.
+       je      .Lnostackparm           # Skip the copy if not needed.
+       sr      %r15, %r9               # Make space on the stack.
+       la      %r8, 0x60(%r15)         # Destination.
+       la      %r12, 0x60(%r11)        # Source.
+       lr      %r13, %r9               # Source size.
+.Lcopy:
+       mvcle   %r8, %r12, 0            # Copy.
+       jo      .Lcopy
+
+.Lnostackparm:
+       # Third parameter is address of function meat - address of parameter
+       # block.
+       a       %r10, 0x8(%r10)
+
+       # Leave vararg pointer in %r1, in case function uses it
+       la      %r1, 0x60(%r11)
+
+       # OK, no stack allocation needed.  We still follow the protocol and
+       # call our caller - it doesn't cost much and makes sure vararg works.
+       # No need to set any registers here - %r0 and %r2-%r6 weren't modified.
+       basr    %r14, %r10              # Call our caller.
+
+       lm      %r6, %r15, 0x18(%r11)   # Restore all callee-saved registers.
+       .cfi_remember_state
+       .cfi_restore %r15
+       .cfi_restore %r14
+       .cfi_restore %r13
+       .cfi_restore %r12
+       .cfi_restore %r11
+       .cfi_restore %r10
+       .cfi_restore %r9
+       .cfi_restore %r8
+       .cfi_restore %r7
+       .cfi_restore %r6
+       .cfi_def_cfa_register %r15
+       br      %r14                    # Return to caller's caller.
+
+# This is the cleanup code called by the stack unwinder when unwinding
+# through the code between .LEHB0 and .LEHE0 above.
+
+.L1:
+       .cfi_restore_state
+       lr      %r2, %r11               # Stack pointer after resume.
+       brasl   %r14, __generic_findstack
+       lr      %r3, %r11               # Get the stack pointer.
+       sr      %r3, %r2                # Subtract available space.
+       ahi     %r3, BACKOFF            # Back off a bit.
+       ear     %r1, %a0                # Extract thread pointer.
+       st      %r3, 0x20(%r1)  # Save the new stack boundary.
+
+       lr      %r2, %r6                # Exception header.
+#ifdef __PIC__
+       brasl   %r14, _Unwind_Resume@PLT
+#else
+       brasl   %r14, _Unwind_Resume
+#endif
+
+#else /* defined(__s390x__) */
+
+
+# The 64-bit __morestack function.
+
+       # We use a cleanup to restore the stack guard if an exception
+       # is thrown through this code.
+#ifndef __PIC__
+       .cfi_personality 0x3,__gcc_personality_v0
+       .cfi_lsda 0x3,.LLSDA1
+#else
+       .cfi_personality 0x9b,DW.ref.__gcc_personality_v0
+       .cfi_lsda 0x1b,.LLSDA1
+#endif
+
+       stmg    %r2, %r15, 0x10(%r15)   # Save %r2-%r15.
+       .cfi_offset %r6, -0x70
+       .cfi_offset %r7, -0x68
+       .cfi_offset %r8, -0x60
+       .cfi_offset %r9, -0x58
+       .cfi_offset %r10, -0x50
+       .cfi_offset %r11, -0x48
+       .cfi_offset %r12, -0x40
+       .cfi_offset %r13, -0x38
+       .cfi_offset %r14, -0x30
+       .cfi_offset %r15, -0x28
+       lgr     %r11, %r15              # Make frame pointer for vararg.
+       .cfi_def_cfa_register %r11
+       aghi    %r15, -0xa0             # 0xa0 for standard frame.
+       stg     %r11, 0(%r15)           # Save back chain.
+       lgr     %r8, %r0                # Save %r0 (static chain).
+       lgr     %r10, %r1               # Save %r1 (address of parameter block).
+
+       lg      %r7, 0(%r10)            # Required frame size to %r7
+       ear     %r1, %a0
+       sllg    %r1, %r1, 32
+       ear     %r1, %a1                # Extract thread pointer.
+       lg      %r1, 0x38(%r1)          # Get stack bounduary
+       agr     %r1, %r7                # Stack bounduary + frame size
+       ag      %r1, 8(%r10)            # + stack param size
+       clgr    %r1, %r15               # Compare with current stack pointer
+       jle     .Lnoalloc               # guard > sp - frame-size: need alloc
+
+       brasl   %r14, __morestack_block_signals
+
+       # We abuse one of caller's fpr save slots (which we don't use for fprs)
+       # as a local variable.  Not needed here, but done to be consistent with
+       # the below use.
+       aghi    %r7, BACKOFF            # Bump requested size a bit.
+       stg     %r7, 0x80(%r11)         # Stuff frame size on stack.
+       la      %r2, 0x80(%r11)         # Pass its address as parameter.
+       la      %r3, 0xa0(%r11)         # Caller's stack parameters.
+       lg      %r4, 8(%r10)            # Size of stack parameters.
+       brasl   %r14, __generic_morestack
+
+       lgr     %r15, %r2               # Switch to the new stack.
+       aghi    %r15, -0xa0             # Make a stack frame on it.
+       stg     %r11, 0(%r15)           # Save back chain.
+
+       sg      %r2, 0x80(%r11)         # The end of stack space.
+       aghi    %r2, BACKOFF            # Back off a bit.
+       ear     %r1, %a0
+       sllg    %r1, %r1, 32
+       ear     %r1, %a1                # Extract thread pointer.
+.LEHB0:
+       stg     %r2, 0x38(%r1)  # Save the new stack boundary.
+
+       brasl   %r14, __morestack_unblock_signals
+
+       lgr     %r0, %r8                # Static chain.
+       lmg     %r2, %r6, 0x10(%r11)    # Paremeter registers.
+
+       # Third parameter is address of function meat - address of parameter
+       # block.
+       ag      %r10, 0x10(%r10)
+
+       # Leave vararg pointer in %r1, in case function uses it
+       la      %r1, 0xa0(%r11)
+
+       # State of registers:
+       # %r0: Static chain from entry.
+       # %r1: Vararg pointer.
+       # %r2-%r6: Parameters from entry.
+       # %r7-%r10: Indeterminate.
+       # %r11: Frame pointer (%r15 from entry).
+       # %r12-%r13: Indeterminate.
+       # %r14: Return address.
+       # %r15: Stack pointer.
+       basr    %r14, %r10              # Call our caller.
+
+       stg     %r2, 0x10(%r11)         # Save return register.
+
+       brasl   %r14, __morestack_block_signals
+
+       # We need a stack slot now, but have no good way to get it - the frame
+       # on new stack had to be exactly 0xa0 bytes, or stack parameters would
+       # be passed wrong.  Abuse fpr save area in caller's frame (we don't
+       # save actual fprs).
+       la      %r2, 0x80(%r11)
+       brasl   %r14, __generic_releasestack
+
+       sg      %r2, 0x80(%r11)         # Subtract available space.
+       aghi    %r2, BACKOFF            # Back off a bit.
+       ear     %r1, %a0
+       sllg    %r1, %r1, 32
+       ear     %r1, %a1                # Extract thread pointer.
+.LEHE0:
+       stg     %r2, 0x38(%r1)  # Save the new stack boundary.
+
+       # We need to restore the old stack pointer before unblocking signals.
+       # We also need 0xa0 bytes for a stack frame.  Since we had a stack
+       # frame at this place before the stack switch, there's no need to
+       # write the back chain again.
+       lgr     %r15, %r11
+       aghi    %r15, -0xa0
+
+       brasl   %r14, __morestack_unblock_signals
+
+       lmg     %r2, %r15, 0x10(%r11)   # Restore all registers.
+       .cfi_remember_state
+       .cfi_restore %r15
+       .cfi_restore %r14
+       .cfi_restore %r13
+       .cfi_restore %r12
+       .cfi_restore %r11
+       .cfi_restore %r10
+       .cfi_restore %r9
+       .cfi_restore %r8
+       .cfi_restore %r7
+       .cfi_restore %r6
+       .cfi_def_cfa_register %r15
+       br      %r14                    # Return to caller's caller.
+
+# Executed if no new stack allocation is needed.
+
+.Lnoalloc:
+       .cfi_restore_state
+       # We may need to copy stack parameters.
+       lg      %r9, 0x8(%r10)          # Load stack parameter size.
+       ltgr    %r9, %r9                # Check if it's 0.
+       je      .Lnostackparm           # Skip the copy if not needed.
+       sgr     %r15, %r9               # Make space on the stack.
+       la      %r8, 0xa0(%r15)         # Destination.
+       la      %r12, 0xa0(%r11)        # Source.
+       lgr     %r13, %r9               # Source size.
+.Lcopy:
+       mvcle   %r8, %r12, 0            # Copy.
+       jo      .Lcopy
+
+.Lnostackparm:
+       # Third parameter is address of function meat - address of parameter
+       # block.
+       ag      %r10, 0x10(%r10)
+
+       # Leave vararg pointer in %r1, in case function uses it
+       la      %r1, 0xa0(%r11)
+
+       # OK, no stack allocation needed.  We still follow the protocol and
+       # call our caller - it doesn't cost much and makes sure vararg works.
+       # No need to set any registers here - %r0 and %r2-%r6 weren't modified.
+       basr    %r14, %r10              # Call our caller.
+
+       lmg     %r6, %r15, 0x30(%r11)   # Restore all callee-saved registers.
+       .cfi_remember_state
+       .cfi_restore %r15
+       .cfi_restore %r14
+       .cfi_restore %r13
+       .cfi_restore %r12
+       .cfi_restore %r11
+       .cfi_restore %r10
+       .cfi_restore %r9
+       .cfi_restore %r8
+       .cfi_restore %r7
+       .cfi_restore %r6
+       .cfi_def_cfa_register %r15
+       br      %r14                    # Return to caller's caller.
+
+# This is the cleanup code called by the stack unwinder when unwinding
+# through the code between .LEHB0 and .LEHE0 above.
+
+.L1:
+       .cfi_restore_state
+       lgr     %r2, %r11               # Stack pointer after resume.
+       brasl   %r14, __generic_findstack
+       lgr     %r3, %r11               # Get the stack pointer.
+       sgr     %r3, %r2                # Subtract available space.
+       aghi    %r3, BACKOFF            # Back off a bit.
+       ear     %r1, %a0
+       sllg    %r1, %r1, 32
+       ear     %r1, %a1                # Extract thread pointer.
+       stg     %r3, 0x38(%r1)  # Save the new stack boundary.
+
+       lgr     %r2, %r6                # Exception header.
+#ifdef __PIC__
+       brasl   %r14, _Unwind_Resume@PLT
+#else
+       brasl   %r14, _Unwind_Resume
+#endif
+
+#endif /* defined(__s390x__) */
+
+       .cfi_endproc
+       .size   __morestack, . - __morestack
+
+
+# The exception table.  This tells the personality routine to execute
+# the exception handler.
+
+       .section        .gcc_except_table,"a",@progbits
+       .align  4
+.LLSDA1:
+       .byte   0xff    # @LPStart format (omit)
+       .byte   0xff    # @TType format (omit)
+       .byte   0x1     # call-site format (uleb128)
+       .uleb128 .LLSDACSE1-.LLSDACSB1  # Call-site table length
+.LLSDACSB1:
+       .uleb128 .LEHB0-.LFB1   # region 0 start
+       .uleb128 .LEHE0-.LEHB0  # length
+       .uleb128 .L1-.LFB1      # landing pad
+       .uleb128 0              # action
+.LLSDACSE1:
+
+
+       .global __gcc_personality_v0
+#ifdef __PIC__
+       # Build a position independent reference to the basic
+       # personality function.
+       .hidden DW.ref.__gcc_personality_v0
+       .weak   DW.ref.__gcc_personality_v0
+       .section 
.data.DW.ref.__gcc_personality_v0,"awG",@progbits,DW.ref.__gcc_personality_v0,comdat
+       .type   DW.ref.__gcc_personality_v0, @object
+DW.ref.__gcc_personality_v0:
+#ifndef __LP64__
+       .align 4
+       .size   DW.ref.__gcc_personality_v0, 4
+       .long   __gcc_personality_v0
+#else
+       .align 8
+       .size   DW.ref.__gcc_personality_v0, 8
+       .quad   __gcc_personality_v0
+#endif
+#endif
+
+
+
+# Initialize the stack test value when the program starts or when a
+# new thread starts.  We don't know how large the main stack is, so we
+# guess conservatively.  We might be able to use getrlimit here.
+
+       .text
+       .global __stack_split_initialize
+       .hidden __stack_split_initialize
+
+       .type   __stack_split_initialize, @function
+
+__stack_split_initialize:
+
+#ifndef __s390x__
+
+       ear     %r1, %a0
+       lr      %r0, %r15
+       ahi     %r0, -0x4000    # We should have at least 16K.
+       st      %r0, 0x20(%r1)
+
+       lr      %r2, %r15
+       lhi     %r3, 0x4000
+#ifdef __PIC__
+       jg      __generic_morestack_set_initial_sp@PLT  # Tail call
+#else
+       jg      __generic_morestack_set_initial_sp      # Tail call
+#endif
+
+#else /* defined(__s390x__) */
+
+       ear     %r1, %a0
+       sllg    %r1, %r1, 32
+       ear     %r1, %a1
+       lgr     %r0, %r15
+       aghi    %r0, -0x4000    # We should have at least 16K.
+       stg     %r0, 0x38(%r1)
+
+       lgr     %r2, %r15
+       lghi    %r3, 0x4000
+#ifdef __PIC__
+       jg      __generic_morestack_set_initial_sp@PLT  # Tail call
+#else
+       jg      __generic_morestack_set_initial_sp      # Tail call
+#endif
+
+#endif /* defined(__s390x__) */
+
+       .size   __stack_split_initialize, . - __stack_split_initialize
+
+# Routines to get and set the guard, for __splitstack_getcontext,
+# __splitstack_setcontext, and __splitstack_makecontext.
+
+# void *__morestack_get_guard (void) returns the current stack guard.
+       .text
+       .global __morestack_get_guard
+       .hidden __morestack_get_guard
+
+       .type   __morestack_get_guard,@function
+
+__morestack_get_guard:
+
+#ifndef __s390x__
+       ear     %r1, %a0
+       l       %r2, 0x20(%r1)
+#else
+       ear     %r1, %a0
+       sllg    %r1, %r1, 32
+       ear     %r1, %a1
+       lg      %r2, 0x38(%r1)
+#endif
+       br %r14
+
+       .size   __morestack_get_guard, . - __morestack_get_guard
+
+# void __morestack_set_guard (void *) sets the stack guard.
+       .global __morestack_set_guard
+       .hidden __morestack_set_guard
+
+       .type   __morestack_set_guard,@function
+
+__morestack_set_guard:
+
+#ifndef __s390x__
+       ear     %r1, %a0
+       st      %r2, 0x20(%r1)
+#else
+       ear     %r1, %a0
+       sllg    %r1, %r1, 32
+       ear     %r1, %a1
+       stg     %r2, 0x38(%r1)
+#endif
+       br      %r14
+
+       .size   __morestack_set_guard, . - __morestack_set_guard
+
+# void *__morestack_make_guard (void *, size_t) returns the stack
+# guard value for a stack.
+       .global __morestack_make_guard
+       .hidden __morestack_make_guard
+
+       .type   __morestack_make_guard,@function
+
+__morestack_make_guard:
+
+#ifndef __s390x__
+       sr      %r2, %r3
+       ahi     %r2, BACKOFF
+#else
+       sgr     %r2, %r3
+       aghi    %r2, BACKOFF
+#endif
+       br      %r14
+
+       .size   __morestack_make_guard, . - __morestack_make_guard
+
+# Make __stack_split_initialize a high priority constructor.
+
+       .section .ctors.65535,"aw",@progbits
+
+#ifndef __LP64__
+       .align  4
+       .long   __stack_split_initialize
+       .long   __morestack_load_mmap
+#else
+       .align  8
+       .quad   __stack_split_initialize
+       .quad   __morestack_load_mmap
+#endif
+
+       .section        .note.GNU-stack,"",@progbits
+       .section        .note.GNU-split-stack,"",@progbits
+       .section        .note.GNU-no-split-stack,"",@progbits
diff --git a/libgcc/config/s390/t-stack-s390 b/libgcc/config/s390/t-stack-s390
new file mode 100644
index 0000000..4c959b0
--- /dev/null
+++ b/libgcc/config/s390/t-stack-s390
@@ -0,0 +1,2 @@
+# Makefile fragment to support -fsplit-stack for s390.
+LIB2ADD_ST += $(srcdir)/config/s390/morestack.S
diff --git a/libgcc/generic-morestack.c b/libgcc/generic-morestack.c
index 89765d4..b8eec4e 100644
--- a/libgcc/generic-morestack.c
+++ b/libgcc/generic-morestack.c
@@ -939,6 +939,10 @@ __splitstack_find (void *segment_arg, void *sp, size_t 
*len,
 #elif defined (__i386__)
       nsp -= 6 * sizeof (void *);
 #elif defined __powerpc64__
+#elif defined __s390x__
+      nsp -= 2 * 160;
+#elif defined __s390__
+      nsp -= 2 * 96;
 #else
 #error "unrecognized target"
 #endif
-- 
2.7.0

Reply via email to