Hello Alan, Thanks a lot for following up on this one. Coincidentally, I was just about to submit the alternate approach we had discussed about, after David's comment at http://gcc.gnu.org/ml/gcc-patches/2011-11/msg01842.html.
We have experienced with it for a while on our gcc-4.5 series of compilers and we are working on a transition to gcc 4.7. This is of course a much heavier hammer so it would be nice if we can indeed have a subtler way out :-) Olivier On Apr 3, 2012, at 10:40 , Alan Modra wrote: > This patch merges the rs6000 stack_tie and frame_tie rtl, so that we > now should use a tie insn that mentions all base regs involved in > register saves and restores. That should avoid any alias analysis > problems eg. http://gcc.gnu.org/ml/gcc-patches/2011-11/msg01156.html. > I chose to put the regs of interest on the destination of the fake > set, rather than in the source as is currently done, because I figure > that relying on the compiler to not reorder reads is fragile. Here's > an example of the new stack tie: > > (set (mem:BLK (unspec:SI [(reg:SI 1) (reg:SI 12)] UNSPEC_TIE)) > (const_int 0)) > > Some notes: > - I tried putting the mem on both source and destination of the set, > but that runs afoul of rtl dead code elimination. > - The rs6000_emit_epilogue places that called gen_frame_tie and > gen_stack_tie both used alias set 0 mems. I believe this was > unnecessarily restrictive. > - CODE_FOR_stack_tie is mentioned in rs6000.c but not > CODE_FOR_frame_tie. Removing frame_tie fixes this sched bug too. > > Bootstrapped and regression tested powerpc-linux with usual -O2 > BOOT_CFLAGS and also -Os, and testcases from pr44199, pr30282, pr52828, > the url above and http://gcc.gnu.org/ml/gcc/2011-03/msg00123.html > examined for sanity. OK to apply? > > PR target/52828 > * config/rs6000/rs6000.c (rs6000_emit_stack_tie): Rewrite with > tie regs on destination of set. Delete forward declaration. > (rs6000_emit_stack_reset): Update rs6000_emit_stack_tie calls. > (rs6000_emit_prologue): Likewise. > (rs6000_emit_epilogue): Likewise. Use in place of gen_frame_tie > and gen_stack_tie. > * config/rs6000/predicates.md (tie_operand): New. > * config/rs6000/rs6000.md (restore_stack_block): Generate new > stack tie rtl. > (restore_stack_nonlocal): Likewise. > (stack_tie): Update. > (frame_tie): Delete. > > Index: gcc/config/rs6000/rs6000.c > =================================================================== > --- gcc/config/rs6000/rs6000.c (revision 185830) > +++ gcc/config/rs6000/rs6000.c (working copy) > @@ -925,7 +925,6 @@ static const char *rs6000_invalid_within_doloop (c > static bool rs6000_legitimate_address_p (enum machine_mode, rtx, bool); > static bool rs6000_debug_legitimate_address_p (enum machine_mode, rtx, bool); > static rtx rs6000_generate_compare (rtx, enum machine_mode); > -static void rs6000_emit_stack_tie (void); > static bool spe_func_has_64bit_regs_p (void); > static rtx gen_frame_mem_offset (enum machine_mode, rtx, int); > static unsigned rs6000_hash_constant (rtx); > @@ -18517,12 +18516,28 @@ rs6000_aix_asm_output_dwarf_table_ref (char * fram > and the change to the stack pointer. */ > > static void > -rs6000_emit_stack_tie (void) > +rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed) > { > - rtx mem = gen_frame_mem (BLKmode, > - gen_rtx_REG (Pmode, STACK_POINTER_REGNUM)); > + rtx u; > + rtvec p; > + int i; > > - emit_insn (gen_stack_tie (mem)); > + if (REGNO (fp) == STACK_POINTER_REGNUM > + || (hard_frame_needed > + && REGNO (fp) == HARD_FRAME_POINTER_REGNUM)) > + fp = NULL_RTX; > + > + p = rtvec_alloc (1 + hard_frame_needed + (fp != NULL_RTX)); > + > + i = 0; > + RTVEC_ELT (p, i++) = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); > + if (hard_frame_needed) > + RTVEC_ELT (p, i++) = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM); > + if (fp != NULL_RTX) > + RTVEC_ELT (p, i++) = fp; > + u = gen_rtx_UNSPEC (Pmode, p, UNSPEC_TIE); > + > + emit_insn (gen_stack_tie (gen_frame_mem (BLKmode, u))); > } > > /* Emit the correct code for allocating stack space, as insns. > @@ -19142,7 +19157,7 @@ rs6000_emit_stack_reset (rs6000_stack_t *info, > || (TARGET_SPE_ABI > && info->spe_64bit_regs_used != 0 > && info->first_gp_reg_save != 32)) > - rs6000_emit_stack_tie (); > + rs6000_emit_stack_tie (frame_reg_rtx, frame_pointer_needed); > > if (frame_reg_rtx != sp_reg_rtx) > { > @@ -19362,7 +19377,7 @@ rs6000_emit_prologue (void) > } > rs6000_emit_allocate_stack (info->total_size, copy_reg); > if (frame_reg_rtx != sp_reg_rtx) > - rs6000_emit_stack_tie (); > + rs6000_emit_stack_tie (frame_reg_rtx, false); > } > > /* Handle world saves specially here. */ > @@ -19866,7 +19881,7 @@ rs6000_emit_prologue (void) > sp_offset = info->total_size; > rs6000_emit_allocate_stack (info->total_size, copy_reg); > if (frame_reg_rtx != sp_reg_rtx) > - rs6000_emit_stack_tie (); > + rs6000_emit_stack_tie (frame_reg_rtx, false); > } > > /* Set frame pointer, if needed. */ > @@ -20437,13 +20452,7 @@ rs6000_emit_epilogue (int sibcall) > /* Prevent reordering memory accesses against stack pointer restore. */ > else if (cfun->calls_alloca > || offset_below_red_zone_p (-info->total_size)) > - { > - rtx mem1 = gen_rtx_MEM (BLKmode, hard_frame_pointer_rtx); > - rtx mem2 = gen_rtx_MEM (BLKmode, sp_reg_rtx); > - MEM_NOTRAP_P (mem1) = 1; > - MEM_NOTRAP_P (mem2) = 1; > - emit_insn (gen_frame_tie (mem1, mem2)); > - } > + rs6000_emit_stack_tie (frame_reg_rtx, true); > > insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx, > GEN_INT (info->total_size))); > @@ -20456,11 +20465,7 @@ rs6000_emit_epilogue (int sibcall) > /* Prevent reordering memory accesses against stack pointer restore. */ > if (cfun->calls_alloca > || offset_below_red_zone_p (-info->total_size)) > - { > - rtx mem = gen_rtx_MEM (BLKmode, sp_reg_rtx); > - MEM_NOTRAP_P (mem) = 1; > - emit_insn (gen_stack_tie (mem)); > - } > + rs6000_emit_stack_tie (frame_reg_rtx, false); > insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, > GEN_INT (info->total_size))); > sp_offset = 0; > Index: gcc/config/rs6000/predicates.md > =================================================================== > --- gcc/config/rs6000/predicates.md (revision 185830) > +++ gcc/config/rs6000/predicates.md (working copy) > @@ -1451,3 +1451,11 @@ > > return 1; > }) > + > +;; Return 1 if OP is a stack tie operand. > +(define_predicate "tie_operand" > + (match_code "mem") > +{ > + return (GET_CODE (XEXP (op, 0)) == UNSPEC > + && XINT (XEXP (op, 0), 1) == UNSPEC_TIE); > +}) > Index: gcc/config/rs6000/rs6000.md > =================================================================== > --- gcc/config/rs6000/rs6000.md (revision 185830) > +++ gcc/config/rs6000/rs6000.md (working copy) > @@ -11989,17 +11989,20 @@ > (define_expand "restore_stack_block" > [(set (match_dup 2) (match_dup 3)) > (set (match_dup 4) (match_dup 2)) > - (set (match_dup 5) (unspec:BLK [(match_dup 5)] UNSPEC_TIE)) > + (set (match_dup 5) (const_int 0)) > (set (match_operand 0 "register_operand" "") > (match_operand 1 "register_operand" ""))] > "" > " > { > + rtx u; > + > operands[1] = force_reg (Pmode, operands[1]); > operands[2] = gen_reg_rtx (Pmode); > operands[3] = gen_frame_mem (Pmode, operands[0]); > operands[4] = gen_frame_mem (Pmode, operands[1]); > - operands[5] = gen_frame_mem (BLKmode, operands[0]); > + u = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, operands[0]), UNSPEC_TIE); > + operands[5] = gen_frame_mem (BLKmode, u); > }") > > (define_expand "save_stack_nonlocal" > @@ -12022,12 +12025,13 @@ > [(set (match_dup 2) (match_operand 1 "memory_operand" "")) > (set (match_dup 3) (match_dup 4)) > (set (match_dup 5) (match_dup 2)) > - (set (match_dup 6) (unspec:BLK [(match_dup 6)] UNSPEC_TIE)) > + (set (match_dup 6) (const_int 0)) > (set (match_operand 0 "register_operand" "") (match_dup 3))] > "" > " > { > int units_per_word = (TARGET_32BIT) ? 4 : 8; > + rtx u; > > /* Restore the backchain from the first word, sp from the second. */ > operands[2] = gen_reg_rtx (Pmode); > @@ -12035,7 +12039,8 @@ > operands[1] = adjust_address_nv (operands[1], Pmode, 0); > operands[4] = adjust_address_nv (operands[1], Pmode, units_per_word); > operands[5] = gen_frame_mem (Pmode, operands[3]); > - operands[6] = gen_frame_mem (BLKmode, operands[0]); > + u = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, operands[0]), UNSPEC_TIE); > + operands[6] = gen_frame_mem (BLKmode, u); > }") > > ;; TOC register handling. > @@ -15899,25 +15904,15 @@ > [(set_attr "type" "branch") > (set_attr "length" "4")]) > > -; These are to explain that changes to the stack pointer should > -; not be moved over stores to stack memory. > +; This is to explain that changes to the stack pointer should > +; not be moved over loads from or stores to stack memory. > (define_insn "stack_tie" > - [(set (match_operand:BLK 0 "memory_operand" "+m") > - (unspec:BLK [(match_dup 0)] UNSPEC_TIE))] > + [(set (match_operand:BLK 0 "tie_operand" "") > + (const_int 0))] > "" > "" > [(set_attr "length" "0")]) > > -; Like stack_tie, but depend on both fp and sp based memory. > -(define_insn "frame_tie" > - [(set (match_operand:BLK 0 "memory_operand" "+m") > - (unspec:BLK [(match_dup 0) > - (match_operand:BLK 1 "memory_operand" "m")] UNSPEC_TIE))] > - "" > - "" > - [(set_attr "length" "0")]) > - > - > (define_expand "epilogue" > [(use (const_int 0))] > "" > > -- > Alan Modra > Australia Development Lab, IBM >