This small refinement to the -fsplit-stack prologue arg pointer
initialization improves code generation.  Compare the -O2
gcc/testsuite/gcc.dg/split-3.c code for down() below.

before                  after
        mflr 0                  mflr 0
        std 31,-8(1)            std 31,-8(1)
        std 0,16(1)             mr 12,1
        stdu 1,-10144(1)        std 0,16(1)
        addi 12,1,10144         stdu 1,-10144(1)
        bge 7,.L7               bge 7,.L7
        mr 12,29                mr 12,29
.L7:                    .L7:

        * config/rs6000/rs6000.c (rs6000_emit_allocate_stack): Return
        stack adjusting insn.  Formatting.
        (rs6000_emit_prologue): Track stack adjusting insn, and use of
        r12.  If possible, emit first -fsplit-stack arg pointer insn
        before stack adjust.  Don't use r12 to save cr if split-stack.

diff -urpN gcc-split-stack1/gcc/config/rs6000/rs6000.c 
gcc-split-stack2/gcc/config/rs6000/rs6000.c
--- gcc-split-stack1/gcc/config/rs6000/rs6000.c 2015-05-18 10:17:11.341628090 
+0930
+++ gcc-split-stack2/gcc/config/rs6000/rs6000.c 2015-05-18 10:16:58.758131165 
+0930
@@ -22608,7 +22608,7 @@ rs6000_emit_stack_tie (rtx fp, bool hard
    If COPY_REG, make sure a copy of the old frame is left there.
    The generated code may use hard register 0 as a temporary.  */
 
-static void
+static rtx_insn *
 rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
 {
   rtx_insn *insn;
@@ -22621,7 +22621,7 @@ rs6000_emit_allocate_stack (HOST_WIDE_IN
     {
       warning (0, "stack frame too large");
       emit_insn (gen_trap ());
-      return;
+      return 0;
     }
 
   if (crtl->limit_stack)
@@ -22672,9 +22672,9 @@ rs6000_emit_allocate_stack (HOST_WIDE_IN
   
   insn = emit_insn (TARGET_32BIT
                    ? gen_movsi_update_stack (stack_reg, stack_reg,
-                                       todec, stack_reg)
+                                             todec, stack_reg)
                    : gen_movdi_di_update_stack (stack_reg, stack_reg,
-                                          todec, stack_reg));
+                                                todec, stack_reg));
   /* Since we didn't use gen_frame_mem to generate the MEM, grab
      it now and set the alias set/attributes. The above gen_*_update
      calls will generate a PARALLEL with the MEM set being the first
@@ -22692,6 +22692,7 @@ rs6000_emit_allocate_stack (HOST_WIDE_IN
   add_reg_note (insn, REG_FRAME_RELATED_EXPR,
                gen_rtx_SET (stack_reg, gen_rtx_PLUS (Pmode, stack_reg,
                                                      GEN_INT (-size))));
+  return insn;
 }
 
 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
@@ -23496,6 +23497,10 @@ rs6000_emit_prologue (void)
   /* Offset to top of frame for frame_reg and sp respectively.  */
   HOST_WIDE_INT frame_off = 0;
   HOST_WIDE_INT sp_off = 0;
+  /* sp_adjust is the stack adjusting instruction, tracked so that the
+     insn setting up the split-stack arg pointer can be emitted just
+     prior to it, when r12 is not used here for other purposes.  */
+  rtx_insn *sp_adjust = 0;
 
 #ifdef ENABLE_CHECKING
   /* Track and check usage of r0, r11, r12.  */
@@ -23714,7 +23719,10 @@ rs6000_emit_prologue (void)
            ptr_off = info->altivec_save_offset + info->altivec_size;
          frame_off = -ptr_off;
        }
-      rs6000_emit_allocate_stack (info->total_size, ptr_reg, ptr_off);
+      sp_adjust = rs6000_emit_allocate_stack (info->total_size,
+                                             ptr_reg, ptr_off);
+      if (REGNO (frame_reg_rtx) == 12)
+       sp_adjust = 0;
       sp_off = info->total_size;
       if (frame_reg_rtx != sp_reg_rtx)
        rs6000_emit_stack_tie (frame_reg_rtx, false);
@@ -23755,7 +23763,8 @@ rs6000_emit_prologue (void)
   if (!WORLD_SAVE_P (info)
       && info->cr_save_p
       && REGNO (frame_reg_rtx) != cr_save_regno
-      && !(using_static_chain_p && cr_save_regno == 11))
+      && !(using_static_chain_p && cr_save_regno == 11)
+      && !(flag_split_stack && cr_save_regno == 12 && sp_adjust))
     {
       cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
       START_USE (cr_save_regno);
@@ -23901,6 +23910,8 @@ rs6000_emit_prologue (void)
       int end_save = info->gp_save_offset + info->gp_size;
       int ptr_off;
 
+      if (ptr_regno == 12)
+       sp_adjust = 0;
       if (!ptr_set_up)
        ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
 
@@ -24219,7 +24230,10 @@ rs6000_emit_prologue (void)
        }
       else if (REGNO (frame_reg_rtx) == 1)
        frame_off = info->total_size;
-      rs6000_emit_allocate_stack (info->total_size, ptr_reg, ptr_off);
+      sp_adjust = rs6000_emit_allocate_stack (info->total_size,
+                                             ptr_reg, ptr_off);
+      if (REGNO (frame_reg_rtx) == 12)
+       sp_adjust = 0;
       sp_off = info->total_size;
       if (frame_reg_rtx != sp_reg_rtx)
        rs6000_emit_stack_tie (frame_reg_rtx, false);
@@ -24249,6 +24263,8 @@ rs6000_emit_prologue (void)
 
       gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
       NOT_INUSE (0);
+      if (scratch_regno == 12)
+       sp_adjust = 0;
       if (end_save + frame_off != 0)
        {
          rtx offset = GEN_INT (end_save + frame_off);
@@ -24326,7 +24342,7 @@ rs6000_emit_prologue (void)
       if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
          && !using_static_chain_p)
        save_regno = 11;
-      else if (REGNO (frame_reg_rtx) == 12)
+      else if (flag_split_stack || REGNO (frame_reg_rtx) == 12)
        {
          save_regno = 11;
          if (using_static_chain_p)
@@ -24372,6 +24388,7 @@ rs6000_emit_prologue (void)
          rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
          rtx tmp = gen_rtx_REG (Pmode, 12);
 
+         sp_adjust = 0;
          insn = emit_move_insn (tmp, lr);
          RTX_FRAME_RELATED_P (insn) = 1;
 
@@ -24434,7 +24451,13 @@ rs6000_emit_prologue (void)
         __morestack was called, it left the arg pointer to the old
         stack in r29.  Otherwise, the arg pointer is the top of the
         current frame.  */
-      if (frame_off != 0 || REGNO (frame_reg_rtx) != 12)
+      if (sp_adjust)
+       {
+         rtx r12 = gen_rtx_REG (Pmode, 12);
+         rtx set_r12 = gen_rtx_SET (r12, sp_reg_rtx);
+         emit_insn_before (set_r12, sp_adjust);
+       }
+      else if (frame_off != 0 || REGNO (frame_reg_rtx) != 12)
        {
          rtx r12 = gen_rtx_REG (Pmode, 12);
          if (frame_off == 0)

-- 
Alan Modra
Australia Development Lab, IBM

Reply via email to