Eric Botcazou asked that I re-apply this previously reverted patch, so here goes. Bootstrapped+regression-test x86_64-unknown-linux-gnu at r200585 with and without -m32. The 32-bit i686-* aka. -m32 previously exposed PR55030. Though that PR was originally about the regression causing the revert, it's also where the discussion about re-applying the patch is, so I cross-referenced it. (There are also notes that this patch has also been checked on half a dozen other targets.)
While there were no regressions testing the patch, there *was* a FAIL turned PASS: libgomp.c++/loop-3.C (libgomp.sum). I'm guessing that was somewhat spurious and caused by an unstable test-case which could just as well have been the other way round (thankful it didn't) but I'm a bit uncertain; maybe those gomp thingies make use of __builtin_setjmp somehow. See <http://gcc.gnu.org/ml/gcc-patches/2012-10/msg01172.html> for the reason for the original commit. PR middle-end/55030 * stmt.c (expand_nl_goto_receiver): Remove almost-copy of expand_builtin_setjmp_receiver. (expand_label): Adjust, call expand_builtin_setjmp_receiver with NULL for the label parameter. * builtins.c (expand_builtin_setjmp_receiver): Don't clobber the frame-pointer. Adjust comments. [HAVE_builtin_setjmp_receiver]: Emit builtin_setjmp_receiver only if LABEL is non-NULL. Index: gcc/builtins.c =================================================================== --- gcc/builtins.c (revision 192675) +++ gcc/builtins.c (revision 192676) @@ -885,14 +885,15 @@ expand_builtin_setjmp_setup (rtx buf_add } /* Construct the trailing part of a __builtin_setjmp call. This is - also called directly by the SJLJ exception handling code. */ + also called directly by the SJLJ exception handling code. + If RECEIVER_LABEL is NULL, instead contruct a nonlocal goto handler. */ void expand_builtin_setjmp_receiver (rtx receiver_label ATTRIBUTE_UNUSED) { rtx chain; - /* Clobber the FP when we get here, so we have to make sure it's + /* Mark the FP as used when we get here, so we have to make sure it's marked as used by this function. */ emit_use (hard_frame_pointer_rtx); @@ -907,17 +908,28 @@ expand_builtin_setjmp_receiver (rtx rece #ifdef HAVE_nonlocal_goto if (! HAVE_nonlocal_goto) #endif - { - emit_move_insn (virtual_stack_vars_rtx, hard_frame_pointer_rtx); - /* This might change the hard frame pointer in ways that aren't - apparent to early optimization passes, so force a clobber. */ - emit_clobber (hard_frame_pointer_rtx); - } + /* First adjust our frame pointer to its actual value. It was + previously set to the start of the virtual area corresponding to + the stacked variables when we branched here and now needs to be + adjusted to the actual hardware fp value. + + Assignments to virtual registers are converted by + instantiate_virtual_regs into the corresponding assignment + to the underlying register (fp in this case) that makes + the original assignment true. + So the following insn will actually be decrementing fp by + STARTING_FRAME_OFFSET. */ + emit_move_insn (virtual_stack_vars_rtx, hard_frame_pointer_rtx); #if !HARD_FRAME_POINTER_IS_ARG_POINTER if (fixed_regs[ARG_POINTER_REGNUM]) { #ifdef ELIMINABLE_REGS + /* If the argument pointer can be eliminated in favor of the + frame pointer, we don't need to restore it. We assume here + that if such an elimination is present, it can always be used. + This is the case on all known machines; if we don't make this + assumption, we do unnecessary saving on many machines. */ size_t i; static const struct elims {const int from, to;} elim_regs[] = ELIMINABLE_REGS; @@ -938,7 +950,7 @@ expand_builtin_setjmp_receiver (rtx rece #endif #ifdef HAVE_builtin_setjmp_receiver - if (HAVE_builtin_setjmp_receiver) + if (receiver_label != NULL && HAVE_builtin_setjmp_receiver) emit_insn (gen_builtin_setjmp_receiver (receiver_label)); else #endif Index: gcc/stmt.c =================================================================== --- gcc/stmt.c (revision 192675) +++ gcc/stmt.c (revision 192676) @@ -106,7 +106,6 @@ extern basic_block label_to_block_fn (st static int n_occurrences (int, const char *); static bool tree_conflicts_with_clobbers_p (tree, HARD_REG_SET *); -static void expand_nl_goto_receiver (void); static bool check_operand_nalternatives (tree, tree); static bool check_unique_operand_names (tree, tree, tree); static char *resolve_operand_name_1 (char *, tree, tree, tree); @@ -200,7 +199,7 @@ expand_label (tree label) if (DECL_NONLOCAL (label)) { - expand_nl_goto_receiver (); + expand_builtin_setjmp_receiver (NULL); nonlocal_goto_handler_labels = gen_rtx_EXPR_LIST (VOIDmode, label_r, nonlocal_goto_handler_labels); @@ -1556,77 +1555,6 @@ expand_return (tree retval) } } -/* Emit code to restore vital registers at the beginning of a nonlocal goto - handler. */ -static void -expand_nl_goto_receiver (void) -{ - rtx chain; - - /* Clobber the FP when we get here, so we have to make sure it's - marked as used by this function. */ - emit_use (hard_frame_pointer_rtx); - - /* Mark the static chain as clobbered here so life information - doesn't get messed up for it. */ - chain = targetm.calls.static_chain (current_function_decl, true); - if (chain && REG_P (chain)) - emit_clobber (chain); - -#ifdef HAVE_nonlocal_goto - if (! HAVE_nonlocal_goto) -#endif - /* First adjust our frame pointer to its actual value. It was - previously set to the start of the virtual area corresponding to - the stacked variables when we branched here and now needs to be - adjusted to the actual hardware fp value. - - Assignments are to virtual registers are converted by - instantiate_virtual_regs into the corresponding assignment - to the underlying register (fp in this case) that makes - the original assignment true. - So the following insn will actually be - decrementing fp by STARTING_FRAME_OFFSET. */ - emit_move_insn (virtual_stack_vars_rtx, hard_frame_pointer_rtx); - -#if !HARD_FRAME_POINTER_IS_ARG_POINTER - if (fixed_regs[ARG_POINTER_REGNUM]) - { -#ifdef ELIMINABLE_REGS - /* If the argument pointer can be eliminated in favor of the - frame pointer, we don't need to restore it. We assume here - that if such an elimination is present, it can always be used. - This is the case on all known machines; if we don't make this - assumption, we do unnecessary saving on many machines. */ - static const struct elims {const int from, to;} elim_regs[] = ELIMINABLE_REGS; - size_t i; - - for (i = 0; i < ARRAY_SIZE (elim_regs); i++) - if (elim_regs[i].from == ARG_POINTER_REGNUM - && elim_regs[i].to == HARD_FRAME_POINTER_REGNUM) - break; - - if (i == ARRAY_SIZE (elim_regs)) -#endif - { - /* Now restore our arg pointer from the address at which it - was saved in our stack frame. */ - emit_move_insn (crtl->args.internal_arg_pointer, - copy_to_reg (get_arg_pointer_save_area ())); - } - } -#endif - -#ifdef HAVE_nonlocal_goto_receiver - if (HAVE_nonlocal_goto_receiver) - emit_insn (gen_nonlocal_goto_receiver ()); -#endif - - /* We must not allow the code we just generated to be reordered by - scheduling. Specifically, the update of the frame pointer must - happen immediately, not later. */ - emit_insn (gen_blockage ()); -} /* Emit code to save the current value of stack. */ rtx brgds, H-P