On Sat, Nov 24, 2012 at 2:09 AM, Steven Bosscher wrote: > On Fri, Nov 23, 2012 at 11:45 PM, Steven Bosscher wrote: >> Removing the note is easier but it may hurt optimization later on: >> CSE2 puts the note back in but this introduces a pass ordering >> dependency. Perhaps that's not a big problem, but I'm not sure... > > Hmm, actually CSE2 fails to put the note back, I guess because it's a > local CSE pass only...
Here's another possible fix: Just punt on all REG_EQUAL notes for the IV. It's a bit of a big hammer, but there are no code changes in my set of cc1-i files (compiled with -funroll-loops) on x86_64 and powerpc64 so apparently it's not all that important to keep the notes. Comments on this one? Ciao! Steven gcc/ * rtlanal.c (remove_reg_equal_equiv_notes_for_regno): Fix for deferred re-scanning. * loop-unroll.c (struct iv_to_split): Add new 'orig_var' member. (analyze_iv_to_split_insn): Record it. (analyze_insns_in_loop): Remove all REG_EQUAL notes for IVs that will be split. (apply_opt_in_copies): Use FOR_BB_INSNS_SAFE. testsuite/ * gcc.dg/pr55006.c: New test. Index: rtlanal.c =================================================================== --- rtlanal.c (revision 193394) +++ rtlanal.c (working copy) @@ -2015,15 +2015,18 @@ remove_reg_equal_equiv_notes (rtx insn) void remove_reg_equal_equiv_notes_for_regno (unsigned int regno) { - df_ref eq_use; + df_ref eq_use, next_eq_use; if (!df) return; /* This loop is a little tricky. We cannot just go down the chain because - it is being modified by some actions in the loop. So we just iterate - over the head. We plan to drain the list anyway. */ - while ((eq_use = DF_REG_EQ_USE_CHAIN (regno)) != NULL) + it may be modified by some actions in the loop if re-scanning is not + deferred. We also can't just iterate over the head because the chain + may be *not* modified if re-scanning *is* deferred. */ + for (eq_use = DF_REG_EQ_USE_CHAIN (regno); + eq_use != NULL; + eq_use = next_eq_use) { rtx insn = DF_REF_INSN (eq_use); rtx note = find_reg_equal_equiv_note (insn); @@ -2033,6 +2036,8 @@ remove_reg_equal_equiv_notes_for_regno ( remove_note. */ gcc_assert (note); + next_eq_use = DF_REF_NEXT_REG (eq_use); + remove_note (insn, note); } } Index: loop-unroll.c =================================================================== --- loop-unroll.c (revision 193394) +++ loop-unroll.c (working copy) @@ -74,6 +74,7 @@ along with GCC; see the file COPYING3. struct iv_to_split { rtx insn; /* The insn in that the induction variable occurs. */ + rtx orig_var; /* The variable (register) for the IV before split. */ rtx base_var; /* The variable on that the values in the further iterations are based. */ rtx step; /* Step of the induction variable. */ @@ -1833,6 +1834,7 @@ analyze_iv_to_split_insn (rtx insn) /* Record the insn to split. */ ivts = XNEW (struct iv_to_split); ivts->insn = insn; + ivts->orig_var = dest; ivts->base_var = NULL_RTX; ivts->step = iv.step; ivts->next = NULL; @@ -1913,6 +1915,12 @@ analyze_insns_in_loop (struct loop *loop if (ivts) { + /* Updating REG_EQUAL notes for this IV is tricky: We cannot tell + until after unrolling whether an EQ_USE is reached by the split + IV while the IV reg is still live. See PR55006. + Be conservative, remove all such notes. */ + remove_reg_equal_equiv_notes_for_regno (REGNO (ivts->orig_var)); + slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT); gcc_assert (*slot1 == NULL); *slot1 = ivts; @@ -2293,9 +2301,8 @@ apply_opt_in_copies (struct opt_info *op unrolling); bb->aux = 0; orig_insn = BB_HEAD (orig_bb); - for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next) + FOR_BB_INSNS_SAFE (bb, insn, next) { - next = NEXT_INSN (insn); if (!INSN_P (insn) || (DEBUG_INSN_P (insn) && TREE_CODE (INSN_VAR_LOCATION_DECL (insn)) == LABEL_DECL)) Index: testsuite/gcc.dg/pr55006.c =================================================================== --- testsuite/gcc.dg/pr55006.c (revision 0) +++ testsuite/gcc.dg/pr55006.c (revision 0) @@ -0,0 +1,88 @@ +/* PR rtl-optimization/55006 */ +/* { dg-do run } */ +/* { dg-options "-O3 -fomit-frame-pointer -funroll-loops" } */ + +extern void abort (void) __attribute__ ((__noreturn__)); +extern void *memcpy (void *__restrict, const void *__restrict, __SIZE_TYPE__); + +static void __attribute__ ((__noinline__, __noclone__)) ga4076 (void) +{ + int D833; + float D834; + int D837; + int D840; + float D841; + int D844; + float dda[100]; + int ids; + float limit3; + int offset2; + int pos1; + int S4; + + static float A0[100] = + { 10e+0, 20e+0, 30e+0, 40e+0, 50e+0, 60e+0, 70e+0, 80e+0, 90e+0, + 10e+1, 11e+1, 12e+1, 13e+1, 14e+1, 15e+1, 16e+1, 17e+1, 18e+1, + 19e+1, 20e+1, 21e+1, 22e+1, 23e+1, 24e+1, 25e+1, 26e+1, 27e+1, + 28e+1, 29e+1, 30e+1, 31e+1, 32e+1, 33e+1, 34e+1, 35e+1, 36e+1, + 37e+1, 38e+1, 39e+1, 40e+1, 41e+1, 42e+1, 43e+1, 44e+1, 45e+1, + 46e+1, 47e+1, 48e+1, 49e+1, 50e+1, 51e+1, 52e+1, 53e+1, 54e+1, + 55e+1, 56e+1, 57e+1, 58e+1, 59e+1, 60e+1, 61e+1, 62e+1, 63e+1, + 64e+1, 65e+1, 66e+1, 67e+1, 68e+1, 69e+1, 70e+1, 71e+1, 72e+1, + 73e+1, 74e+1, 75e+1, 76e+1, 77e+1, 78e+1, 79e+1, 80e+1, 81e+1, + 82e+1, 83e+1, 84e+1, 85e+1, 86e+1, 87e+1, 88e+1, 89e+1, 90e+1, + 91e+1, 92e+1, 93e+1, 94e+1, 95e+1, 96e+1, 97e+1, 98e+1, 99e+1, + 10e+2 + }; + + memcpy (dda, A0, sizeof (dda)); + + limit3 = -1. * __builtin_inf(); + pos1 = 0; + offset2 = 0; + S4 = 1; + +D831: + if (S4 > 100) goto L3; else goto D832; +D832: + D833 = S4 - 1; + D834 = dda[D833]; + if (D834 >= limit3) goto D835; else goto D836; +D835: + D837 = S4 - 1; + limit3 = dda[D837]; + pos1 = S4 + offset2; + goto L1; +D836: + S4 = S4 + 1; + goto D831; +L3: + pos1 = 1; + goto L2; +L1: + if (S4 > 100) goto L2; else goto D839; +D839: + D840 = S4 - 1; + D841 = dda[D840]; + if (D841 > limit3) goto D842; else goto D843; +D842: + D844 = S4 - 1; + limit3 = dda[D844]; + pos1 = S4 + offset2; +D843: + S4 = S4 + 1; + goto L1; +L2: + ids = pos1; + if (ids != 100) + abort (); +} + + +int +main (void) +{ + ga4076 (); + return 0; +} +