This is the miscompilation of the cross-compiler targetting AVR by the native compiler on the SPARC at -O2, a latent problem in reorg.c that is exposed in the 4.5.x (and later) series by the introduction of __builtin_unreachable.
relax_delay_slots has code that tries to detect if a simple (conditional) jump is useless, i.e. if there is no active insn between the jump and the label. If so, it first re-emits the insns in the delay slot(s) of the jump and then invokes delete_related_insns on the jump. delete_related_insns first deletes the jump and then, if the number of uses of the label has reached zero, calls itself recursively on the label. Now, when invoked on a label, delete_related_insns not only deletes the label but also the entire extended basic block starting at the label if there is a barrier just before the label. With __builtin_unreachable you can have barriers at somewhat unexpected places and the jump might be guarding the fallthrough to the barrier; deleting the jump in this case doesn't mean that the block at the label can be deleted. The solution of deleting the barrier with the jump would work, but happens to be tricky to implement in delete_related_insns. The attached patch disables the optimization instead in this case, on the grounds that it isn't the job of reorg.c to optimize the CFG when no other RTL pass was able to do it before. Bootstrapped/regtested on SPARC/Linux, applied on mainline, 4.6/4.5 branches. 2011-11-19 Eric Botcazou <ebotca...@adacore.com> PR rtl-optimization/51187 * reorg.c (relax_delay_slots): Do not consider a jump useless if there is a barrier between the jump and its target label. 2011-11-19 Eric Botcazou <ebotca...@adacore.com> * gcc.dg/delay-slot-2.c: New test. -- Eric Botcazou
Index: reorg.c =================================================================== --- reorg.c (revision 181505) +++ reorg.c (working copy) @@ -3600,9 +3600,11 @@ relax_delay_slots (rtx first) } } + /* See if we have a simple (conditional) jump that is useless. */ if (! INSN_ANNULLED_BRANCH_P (delay_insn) - && prev_active_insn (target_label) == insn && ! condjump_in_parallel_p (delay_insn) + && prev_active_insn (target_label) == insn + && ! BARRIER_P (prev_nonnote_insn (target_label)) #ifdef HAVE_cc0 /* If the last insn in the delay slot sets CC0 for some insn, various code assumes that it is in a delay slot. We could
/* PR rtl-optimization/51187 */ /* Reported by Jurij Smakov <ju...@wooyd.org> */ /* { dg-do compile } */ /* { dg-options "-g -O2" } */ extern int printf (__const char *__restrict __format, ...); extern void print_c_condition (const char *); enum decision_type { DT_num_insns, DT_mode, DT_code, DT_veclen, DT_elt_zero_int, DT_elt_one_int, DT_elt_zero_wide, DT_elt_zero_wide_safe, DT_const_int, DT_veclen_ge, DT_dup, DT_pred, DT_c_test, DT_accept_op, DT_accept_insn }; struct decision_test { struct decision_test *next; enum decision_type type; union { int num_insns; struct { const char *name; } pred; const char *c_test; int veclen; int dup; long intval; int opno; struct { int code_number; int lineno; int num_clobbers_to_add; } insn; } u; }; enum routine_type { RECOG, SPLIT, PEEPHOLE2 }; void write_cond (struct decision_test *p, int depth, enum routine_type subroutine_type) { switch (p->type) { case DT_num_insns: printf ("peep2_current_count >= %d", p->u.num_insns); break; case DT_code: printf ("GET_CODE (x%d) == ", depth); break; case DT_veclen: printf ("XVECLEN (x%d, 0) == %d", depth, p->u.veclen); break; case DT_elt_zero_int: printf ("XINT (x%d, 0) == %d", depth, (int) p->u.intval); break; case DT_elt_one_int: printf ("XINT (x%d, 1) == %d", depth, (int) p->u.intval); break; case DT_elt_zero_wide: case DT_elt_zero_wide_safe: printf ("XWINT (x%d, 0) == ", depth); print_host_wide_int (p->u.intval); break; case DT_const_int: printf ("x%d == const_int_rtx[MAX_SAVED_CONST_INT + (%d)]", depth, (int) p->u.intval); break; case DT_veclen_ge: printf ("XVECLEN (x%d, 0) >= %d", depth, p->u.veclen); break; case DT_dup: printf ("rtx_equal_p (x%d, operands[%d])", depth, p->u.dup); break; case DT_pred: printf ("%s (x%d)", p->u.pred.name, depth); break; case DT_c_test: print_c_condition (p->u.c_test); break; case DT_accept_insn: ((void)(__builtin_expect(!(subroutine_type == RECOG), 0) ? __builtin_unreachable(), 0 : 0)); ((void)(__builtin_expect(!(p->u.insn.num_clobbers_to_add), 0) ? __builtin_unreachable(), 0 : 0)); printf ("pnum_clobbers != NULL"); break; default: __builtin_unreachable(); } } /* { dg-final { scan-assembler "printf" } } */