On Sat, Nov 24, 2012 at 2:09 AM, Steven Bosscher wrote:
> On Fri, Nov 23, 2012 at 11:45 PM, Steven Bosscher wrote:
>> Removing the note is easier but it may hurt optimization later on:
>> CSE2 puts the note back in but this introduces a pass ordering
>> dependency. Perhaps that's not a big problem, but I'm not sure...
>
> Hmm, actually CSE2 fails to put the note back, I guess because it's a
> local CSE pass only...

Here's another possible fix: Just punt on all REG_EQUAL notes for the
IV. It's a bit of a big hammer, but there are no code changes in my
set of cc1-i files (compiled with -funroll-loops) on x86_64 and
powerpc64 so apparently it's not all that important to keep the notes.

Comments on this one?

Ciao!
Steven

gcc/
        * rtlanal.c (remove_reg_equal_equiv_notes_for_regno): Fix for
        deferred re-scanning.
        * loop-unroll.c (struct iv_to_split): Add new 'orig_var' member.
        (analyze_iv_to_split_insn): Record it.
        (analyze_insns_in_loop): Remove all REG_EQUAL notes for IVs that
        will be split.
        (apply_opt_in_copies): Use FOR_BB_INSNS_SAFE.

testsuite/
        * gcc.dg/pr55006.c: New test.

Index: rtlanal.c
===================================================================
--- rtlanal.c   (revision 193394)
+++ rtlanal.c   (working copy)
@@ -2015,15 +2015,18 @@ remove_reg_equal_equiv_notes (rtx insn)
 void
 remove_reg_equal_equiv_notes_for_regno (unsigned int regno)
 {
-  df_ref eq_use;
+  df_ref eq_use, next_eq_use;

   if (!df)
     return;

   /* This loop is a little tricky.  We cannot just go down the chain because
-     it is being modified by some actions in the loop.  So we just iterate
-     over the head.  We plan to drain the list anyway.  */
-  while ((eq_use = DF_REG_EQ_USE_CHAIN (regno)) != NULL)
+     it may be modified by some actions in the loop if re-scanning is not
+     deferred.  We also can't just iterate over the head because the chain
+     may be *not* modified if re-scanning *is* deferred.  */
+  for (eq_use = DF_REG_EQ_USE_CHAIN (regno);
+       eq_use != NULL;
+       eq_use = next_eq_use)
     {
       rtx insn = DF_REF_INSN (eq_use);
       rtx note = find_reg_equal_equiv_note (insn);
@@ -2033,6 +2036,8 @@ remove_reg_equal_equiv_notes_for_regno (
         remove_note.  */
       gcc_assert (note);

+      next_eq_use = DF_REF_NEXT_REG (eq_use);
+
       remove_note (insn, note);
     }
 }
Index: loop-unroll.c
===================================================================
--- loop-unroll.c       (revision 193394)
+++ loop-unroll.c       (working copy)
@@ -74,6 +74,7 @@ along with GCC; see the file COPYING3.
 struct iv_to_split
 {
   rtx insn;            /* The insn in that the induction variable occurs.  */
+  rtx orig_var;                /* The variable (register) for the IV before 
split.  */
   rtx base_var;                /* The variable on that the values in the 
further
                           iterations are based.  */
   rtx step;            /* Step of the induction variable.  */
@@ -1833,6 +1834,7 @@ analyze_iv_to_split_insn (rtx insn)
   /* Record the insn to split.  */
   ivts = XNEW (struct iv_to_split);
   ivts->insn = insn;
+  ivts->orig_var = dest;
   ivts->base_var = NULL_RTX;
   ivts->step = iv.step;
   ivts->next = NULL;
@@ -1913,6 +1915,12 @@ analyze_insns_in_loop (struct loop *loop

         if (ivts)
           {
+           /* Updating REG_EQUAL notes for this IV is tricky: We cannot tell
+              until after unrolling whether an EQ_USE is reached by the split
+              IV while the IV reg is still live.  See PR55006.
+              Be conservative, remove all such notes.  */
+           remove_reg_equal_equiv_notes_for_regno (REGNO (ivts->orig_var));
+
             slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT);
            gcc_assert (*slot1 == NULL);
             *slot1 = ivts;
@@ -2293,9 +2301,8 @@ apply_opt_in_copies (struct opt_info *op
                                        unrolling);
       bb->aux = 0;
       orig_insn = BB_HEAD (orig_bb);
-      for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
+      FOR_BB_INSNS_SAFE (bb, insn, next)
         {
-          next = NEXT_INSN (insn);
          if (!INSN_P (insn)
              || (DEBUG_INSN_P (insn)
                  && TREE_CODE (INSN_VAR_LOCATION_DECL (insn)) == LABEL_DECL))
Index: testsuite/gcc.dg/pr55006.c
===================================================================
--- testsuite/gcc.dg/pr55006.c  (revision 0)
+++ testsuite/gcc.dg/pr55006.c  (revision 0)
@@ -0,0 +1,88 @@
+/* PR rtl-optimization/55006 */
+/* { dg-do run } */
+/* { dg-options "-O3 -fomit-frame-pointer -funroll-loops" } */
+
+extern void abort (void) __attribute__ ((__noreturn__));
+extern void *memcpy (void *__restrict, const void *__restrict, __SIZE_TYPE__);
+
+static void __attribute__ ((__noinline__, __noclone__)) ga4076 (void)
+{
+  int D833;
+  float D834;
+  int D837;
+  int D840;
+  float D841;
+  int D844;
+  float dda[100];
+  int ids;
+  float limit3;
+  int offset2;
+  int pos1;
+  int S4;
+
+  static float A0[100] =
+  { 10e+0, 20e+0, 30e+0, 40e+0, 50e+0, 60e+0, 70e+0, 80e+0, 90e+0,
+    10e+1, 11e+1, 12e+1, 13e+1, 14e+1, 15e+1, 16e+1, 17e+1, 18e+1,
+    19e+1, 20e+1, 21e+1, 22e+1, 23e+1, 24e+1, 25e+1, 26e+1, 27e+1,
+    28e+1, 29e+1, 30e+1, 31e+1, 32e+1, 33e+1, 34e+1, 35e+1, 36e+1,
+    37e+1, 38e+1, 39e+1, 40e+1, 41e+1, 42e+1, 43e+1, 44e+1, 45e+1,
+    46e+1, 47e+1, 48e+1, 49e+1, 50e+1, 51e+1, 52e+1, 53e+1, 54e+1,
+    55e+1, 56e+1, 57e+1, 58e+1, 59e+1, 60e+1, 61e+1, 62e+1, 63e+1,
+    64e+1, 65e+1, 66e+1, 67e+1, 68e+1, 69e+1, 70e+1, 71e+1, 72e+1,
+    73e+1, 74e+1, 75e+1, 76e+1, 77e+1, 78e+1, 79e+1, 80e+1, 81e+1,
+    82e+1, 83e+1, 84e+1, 85e+1, 86e+1, 87e+1, 88e+1, 89e+1, 90e+1,
+    91e+1, 92e+1, 93e+1, 94e+1, 95e+1, 96e+1, 97e+1, 98e+1, 99e+1,
+    10e+2
+  };
+
+  memcpy (dda, A0, sizeof (dda));
+
+  limit3 = -1. * __builtin_inf();
+  pos1 = 0;
+  offset2 = 0;
+  S4 = 1;
+
+D831:
+  if (S4 > 100) goto L3; else goto D832;
+D832:
+  D833 = S4 - 1;
+  D834 = dda[D833];
+  if (D834 >= limit3) goto D835; else goto D836;
+D835:
+  D837 = S4 - 1;
+  limit3 = dda[D837];
+  pos1 = S4 + offset2;
+  goto L1;
+D836:
+  S4 = S4 + 1;
+  goto D831;
+L3:
+  pos1 = 1;
+  goto L2;
+L1:
+  if (S4 > 100) goto L2; else goto D839;
+D839:
+  D840 = S4 - 1;
+  D841 = dda[D840];
+  if (D841 > limit3) goto D842; else goto D843;
+D842:
+  D844 = S4 - 1;
+  limit3 = dda[D844];
+  pos1 = S4 + offset2;
+D843:
+  S4 = S4 + 1;
+  goto L1;
+L2:
+  ids = pos1;
+  if (ids != 100)
+    abort ();
+}
+
+
+int
+main (void)
+{
+  ga4076 ();
+  return 0;
+}
+

Reply via email to