http://gcc.gnu.org/bugzilla/show_bug.cgi?id=23286

--- Comment #40 from Bernhard Reutner-Fischer <aldot at gcc dot gnu.org> 
2012-02-23 13:34:37 UTC ---
The ATTRIBUTE_UNUSED of do_hoist_insertion can be removed.

diff --git a/gcc/tree-ssa-pre.c b/gcc/tree-ssa-pre.c
index 0f777b4..bfc7a92 100644
--- a/gcc/tree-ssa-pre.c
+++ b/gcc/tree-ssa-pre.c
@@ -3865,7 +3865,7 @@ do_pre_partial_partial_insertion (basic_block block,
basic_block dom)
    The caller has to make sure that BLOCK has at least two successors.  */

 static bool
-do_hoist_insertion (basic_block block ATTRIBUTE_UNUSED)
+do_hoist_insertion (basic_block block)
 {
   edge e;
   edge_iterator ei;
@@ -3878,6 +3878,13 @@ do_hoist_insertion (basic_block block ATTRIBUTE_UNUSED)
   /* At least two successors, or else...  */
   gcc_assert (EDGE_COUNT (block->succs) >= 2);

+  /* We cheat about AVAIL_OUT in the first block
+     so pretend we are done in the second iteration.  */
+  if (block->prev_bb
+      && block->prev_bb->index == ENTRY_BLOCK
+      && pre_stats.hoist_insert)
+    return false;
+
   /* Check that all successors of BLOCK are dominated by block.
      We could use dominated_by_p() for this, but actually there is a much
      quicker check: any successor that is dominated by BLOCK can't have
@@ -3890,9 +3897,12 @@ do_hoist_insertion (basic_block block ATTRIBUTE_UNUSED)
   availout_in_some = BITMAP_ALLOC (&grand_bitmap_obstack);

   /* A hoistable value must be in ANTIC_IN(block)
-     but not in AVAIL_OUT(BLOCK).  */
+     but not in AVAIL_OUT(BLOCK).
+     To give more opportunity to hoisting,
+     cheat by disregarding AVAIL_OUT of the ENTRY_BLOCK.  */
   bitmap_set_copy (hoistable_set, ANTIC_IN (block));
-  bitmap_set_subtract_values (hoistable_set, AVAIL_OUT (block));
+  if (block->prev_bb && block->prev_bb->index != ENTRY_BLOCK)
+    bitmap_set_subtract_values (hoistable_set, AVAIL_OUT (block));

   /* Short-cut for a common case: hoistable_set is empty.  */
   if (bitmap_empty_p (&hoistable_set->values))


so for a simplified PR5738
$ cat pr5738.c
struct foo
{
  unsigned short *p;
};

#define foo_s s
void
func (struct foo *foo_s, unsigned int *coord, _Bool delta)
{
  unsigned short change;

  if (delta)
    {
      change = *((foo_s)->p++);
      *coord += change;
    }
  else
    {
      change = *((foo_s)->p++);
      *coord += change;
//      *coord += *((foo_s)->p++) << 8;
    }
}

we end up a little bit better, with something like

func (struct foo * sD.1705, unsigned intD.9 * coordD.1706, _BoolD.1685
deltaD.1707)
{
  unsigned intD.9 pretmp.6D.1727;
  short unsigned intD.16 * pretmp.5D.1726;
  short unsigned intD.16 pretmp.4D.1725;
  short unsigned intD.16 * pretmp.3D.1724;
  short unsigned intD.16 changeD.1710;
  unsigned intD.9 D.1718;
  unsigned intD.9 D.1717;
  unsigned intD.9 D.1716;
  short unsigned intD.16 * D.1715;
  short unsigned intD.16 * D.1714;

  # BLOCK 2 freq:10000
  # PRED: ENTRY [100.0%]  (fallthru,exec)
  # VUSE <.MEMD.1720_17(D)>
  # PT = nonlocal escaped 
  pretmp.3D.1724_22 = sD.1705_2(D)->pD.1704;
  # VUSE <.MEMD.1720_17(D)>
  pretmp.4D.1725_23 = *pretmp.3D.1724_22;
  # PT = nonlocal escaped 
  pretmp.5D.1726_24 = pretmp.3D.1724_22 + 2;
  # VUSE <.MEMD.1720_17(D)>
  pretmp.6D.1727_25 = *coordD.1706_6(D);
  pretmp.6D.1727_26 = (unsigned intD.9) pretmp.4D.1725_23;
  pretmp.6D.1727_27 = pretmp.6D.1727_25 + pretmp.6D.1727_26;
  if (deltaD.1707_1(D) != 0)
    goto <bb 3>;
  else
    goto <bb 4>;
  # SUCC: 3 [39.0%]  (true,exec) 4 [61.0%]  (false,exec)

  # BLOCK 3 freq:3900
  # PRED: 2 [39.0%]  (true,exec)
  # .MEMD.1720_18 = VDEF <.MEMD.1720_17(D)>
  sD.1705_2(D)->pD.1704 = pretmp.5D.1726_24;
  # .MEMD.1720_19 = VDEF <.MEMD.1720_18>
  *coordD.1706_6(D) = pretmp.6D.1727_27;
  goto <bb 5>;
  # SUCC: 5 [100.0%]  (fallthru,exec)

  # BLOCK 4 freq:6100
  # PRED: 2 [61.0%]  (false,exec)
  # .MEMD.1720_20 = VDEF <.MEMD.1720_17(D)>
  sD.1705_2(D)->pD.1704 = pretmp.5D.1726_24;
  # .MEMD.1720_21 = VDEF <.MEMD.1720_20>
  *coordD.1706_6(D) = pretmp.6D.1727_27;
  # SUCC: 5 [100.0%]  (fallthru,exec)

  # BLOCK 5 freq:10000
  # PRED: 3 [100.0%]  (fallthru,exec) 4 [100.0%]  (fallthru,exec)
  # .MEMD.1720_16 = PHI <.MEMD.1720_19(3), .MEMD.1720_21(4)>
  # VUSE <.MEMD.1720_16>
  return;
  # SUCC: EXIT [100.0%] 

}
which translates to nearly proper code:

func:
.LFB0:
        .cfi_startproc
        movq    (%rdi), %rax    # sD.1705_2(D)->pD.1704, pretmp.3D.1724
        leaq    2(%rax), %rcx   #, pretmp.5D.1726
        movzwl  (%rax), %eax    # *pretmp.3D.1724_22, pretmp.6D.1727
        addl    (%rsi), %eax    # *coordD.1706_6(D), pretmp.6D.1727
        testb   %dl, %dl        # deltaD.1707
        movq    %rcx, (%rdi)    # pretmp.5D.1726, sD.1705_2(D)->pD.1704
        movl    %eax, (%rsi)    # pretmp.6D.1727, *coordD.1706_6(D)
        je      .L2     #,
        ret
.L2:
        ret
        .cfi_endproc

where the expected code would be something like (i think):

func:
.LFB0:
        .cfi_startproc
        movq    (%rdi), %rax    # sD.1705_2(D)->pD.1704, D.1714
        movzwl  (%rax), %edx    #* D.1714, changeD.1710
        addq    $2, %rax        #, tmp77
        movq    %rax, (%rdi)    # tmp77, sD.1705_2(D)->pD.1704
        addl    %edx, (%rsi)    # changeD.1710, *coordD.1706_6(D)
        ret
        .cfi_endproc
.LFE0:

So we just need to recognize that BB3 and BB4 are identical (everything in BB3
can be hoisted and BB4 is dead).

Reply via email to