This is the miscompilation of the cross-compiler targetting AVR by the native 
compiler on the SPARC at -O2, a latent problem in reorg.c that is exposed in 
the 4.5.x (and later) series by the introduction of __builtin_unreachable.

relax_delay_slots has code that tries to detect if a simple (conditional) jump 
is useless, i.e. if there is no active insn between the jump and the label.
If so, it first re-emits the insns in the delay slot(s) of the jump and then 
invokes delete_related_insns on the jump.

delete_related_insns first deletes the jump and then, if the number of uses of 
the label has reached zero, calls itself recursively on the label.  Now, when 
invoked on a label, delete_related_insns not only deletes the label but also 
the entire extended basic block starting at the label if there is a barrier 
just before the label.

With __builtin_unreachable you can have barriers at somewhat unexpected places 
and the jump might be guarding the fallthrough to the barrier; deleting the 
jump in this case doesn't mean that the block at the label can be deleted.

The solution of deleting the barrier with the jump would work, but happens to 
be tricky to implement in delete_related_insns.  The attached patch disables 
the optimization instead in this case, on the grounds that it isn't the job of 
reorg.c to optimize the CFG when no other RTL pass was able to do it before.

Bootstrapped/regtested on SPARC/Linux, applied on mainline, 4.6/4.5 branches.


2011-11-19  Eric Botcazou  <ebotca...@adacore.com>

        PR rtl-optimization/51187
        * reorg.c (relax_delay_slots): Do not consider a jump useless if there
        is a barrier between the jump and its target label.


2011-11-19  Eric Botcazou  <ebotca...@adacore.com>

        * gcc.dg/delay-slot-2.c: New test.


-- 
Eric Botcazou
Index: reorg.c
===================================================================
--- reorg.c	(revision 181505)
+++ reorg.c	(working copy)
@@ -3600,9 +3600,11 @@ relax_delay_slots (rtx first)
 	    }
 	}
 
+      /* See if we have a simple (conditional) jump that is useless.  */
       if (! INSN_ANNULLED_BRANCH_P (delay_insn)
-	  && prev_active_insn (target_label) == insn
 	  && ! condjump_in_parallel_p (delay_insn)
+	  && prev_active_insn (target_label) == insn
+	  && ! BARRIER_P (prev_nonnote_insn (target_label))
 #ifdef HAVE_cc0
 	  /* If the last insn in the delay slot sets CC0 for some insn,
 	     various code assumes that it is in a delay slot.  We could
/* PR rtl-optimization/51187 */
/* Reported by Jurij Smakov <ju...@wooyd.org> */

/* { dg-do compile } */
/* { dg-options "-g -O2" } */

extern int printf (__const char *__restrict __format, ...);
extern void print_c_condition (const char *);

enum decision_type
{
  DT_num_insns,
  DT_mode, DT_code, DT_veclen,
  DT_elt_zero_int, DT_elt_one_int, DT_elt_zero_wide, DT_elt_zero_wide_safe,
  DT_const_int,
  DT_veclen_ge, DT_dup, DT_pred, DT_c_test,
  DT_accept_op, DT_accept_insn
};

struct decision_test
{
  struct decision_test *next;
  enum decision_type type;

  union
  {
    int num_insns;

    struct
    {
      const char *name;
    } pred;

    const char *c_test;
    int veclen;
    int dup;
    long intval;
    int opno;

    struct {
      int code_number;
      int lineno;
      int num_clobbers_to_add;
    } insn;
  } u;
};

enum routine_type {
  RECOG, SPLIT, PEEPHOLE2
};

void
write_cond (struct decision_test *p, int depth,
     enum routine_type subroutine_type)
{
  switch (p->type)
    {
    case DT_num_insns:
      printf ("peep2_current_count >= %d", p->u.num_insns);
      break;

    case DT_code:
      printf ("GET_CODE (x%d) == ", depth);
      break;

    case DT_veclen:
      printf ("XVECLEN (x%d, 0) == %d", depth, p->u.veclen);
      break;

    case DT_elt_zero_int:
      printf ("XINT (x%d, 0) == %d", depth, (int) p->u.intval);
      break;

    case DT_elt_one_int:
      printf ("XINT (x%d, 1) == %d", depth, (int) p->u.intval);
      break;

    case DT_elt_zero_wide:
    case DT_elt_zero_wide_safe:
      printf ("XWINT (x%d, 0) == ", depth);
      print_host_wide_int (p->u.intval);
      break;

    case DT_const_int:
      printf ("x%d == const_int_rtx[MAX_SAVED_CONST_INT + (%d)]",
       depth, (int) p->u.intval);
      break;

    case DT_veclen_ge:
      printf ("XVECLEN (x%d, 0) >= %d", depth, p->u.veclen);
      break;

    case DT_dup:
      printf ("rtx_equal_p (x%d, operands[%d])", depth, p->u.dup);
      break;

    case DT_pred:
      printf ("%s (x%d)", p->u.pred.name, depth);
      break;

    case DT_c_test:
      print_c_condition (p->u.c_test);
      break;

    case DT_accept_insn:
      ((void)(__builtin_expect(!(subroutine_type == RECOG), 0) ? __builtin_unreachable(), 0 : 0));
      ((void)(__builtin_expect(!(p->u.insn.num_clobbers_to_add), 0) ? __builtin_unreachable(), 0 : 0));
      printf ("pnum_clobbers != NULL");
      break;

    default:
      __builtin_unreachable();
    }
}

/* { dg-final { scan-assembler "printf" } } */

Reply via email to