http://gcc.gnu.org/bugzilla/show_bug.cgi?id=23286
--- Comment #40 from Bernhard Reutner-Fischer <aldot at gcc dot gnu.org> 2012-02-23 13:34:37 UTC --- The ATTRIBUTE_UNUSED of do_hoist_insertion can be removed. diff --git a/gcc/tree-ssa-pre.c b/gcc/tree-ssa-pre.c index 0f777b4..bfc7a92 100644 --- a/gcc/tree-ssa-pre.c +++ b/gcc/tree-ssa-pre.c @@ -3865,7 +3865,7 @@ do_pre_partial_partial_insertion (basic_block block, basic_block dom) The caller has to make sure that BLOCK has at least two successors. */ static bool -do_hoist_insertion (basic_block block ATTRIBUTE_UNUSED) +do_hoist_insertion (basic_block block) { edge e; edge_iterator ei; @@ -3878,6 +3878,13 @@ do_hoist_insertion (basic_block block ATTRIBUTE_UNUSED) /* At least two successors, or else... */ gcc_assert (EDGE_COUNT (block->succs) >= 2); + /* We cheat about AVAIL_OUT in the first block + so pretend we are done in the second iteration. */ + if (block->prev_bb + && block->prev_bb->index == ENTRY_BLOCK + && pre_stats.hoist_insert) + return false; + /* Check that all successors of BLOCK are dominated by block. We could use dominated_by_p() for this, but actually there is a much quicker check: any successor that is dominated by BLOCK can't have @@ -3890,9 +3897,12 @@ do_hoist_insertion (basic_block block ATTRIBUTE_UNUSED) availout_in_some = BITMAP_ALLOC (&grand_bitmap_obstack); /* A hoistable value must be in ANTIC_IN(block) - but not in AVAIL_OUT(BLOCK). */ + but not in AVAIL_OUT(BLOCK). + To give more opportunity to hoisting, + cheat by disregarding AVAIL_OUT of the ENTRY_BLOCK. */ bitmap_set_copy (hoistable_set, ANTIC_IN (block)); - bitmap_set_subtract_values (hoistable_set, AVAIL_OUT (block)); + if (block->prev_bb && block->prev_bb->index != ENTRY_BLOCK) + bitmap_set_subtract_values (hoistable_set, AVAIL_OUT (block)); /* Short-cut for a common case: hoistable_set is empty. */ if (bitmap_empty_p (&hoistable_set->values)) so for a simplified PR5738 $ cat pr5738.c struct foo { unsigned short *p; }; #define foo_s s void func (struct foo *foo_s, unsigned int *coord, _Bool delta) { unsigned short change; if (delta) { change = *((foo_s)->p++); *coord += change; } else { change = *((foo_s)->p++); *coord += change; // *coord += *((foo_s)->p++) << 8; } } we end up a little bit better, with something like func (struct foo * sD.1705, unsigned intD.9 * coordD.1706, _BoolD.1685 deltaD.1707) { unsigned intD.9 pretmp.6D.1727; short unsigned intD.16 * pretmp.5D.1726; short unsigned intD.16 pretmp.4D.1725; short unsigned intD.16 * pretmp.3D.1724; short unsigned intD.16 changeD.1710; unsigned intD.9 D.1718; unsigned intD.9 D.1717; unsigned intD.9 D.1716; short unsigned intD.16 * D.1715; short unsigned intD.16 * D.1714; # BLOCK 2 freq:10000 # PRED: ENTRY [100.0%] (fallthru,exec) # VUSE <.MEMD.1720_17(D)> # PT = nonlocal escaped pretmp.3D.1724_22 = sD.1705_2(D)->pD.1704; # VUSE <.MEMD.1720_17(D)> pretmp.4D.1725_23 = *pretmp.3D.1724_22; # PT = nonlocal escaped pretmp.5D.1726_24 = pretmp.3D.1724_22 + 2; # VUSE <.MEMD.1720_17(D)> pretmp.6D.1727_25 = *coordD.1706_6(D); pretmp.6D.1727_26 = (unsigned intD.9) pretmp.4D.1725_23; pretmp.6D.1727_27 = pretmp.6D.1727_25 + pretmp.6D.1727_26; if (deltaD.1707_1(D) != 0) goto <bb 3>; else goto <bb 4>; # SUCC: 3 [39.0%] (true,exec) 4 [61.0%] (false,exec) # BLOCK 3 freq:3900 # PRED: 2 [39.0%] (true,exec) # .MEMD.1720_18 = VDEF <.MEMD.1720_17(D)> sD.1705_2(D)->pD.1704 = pretmp.5D.1726_24; # .MEMD.1720_19 = VDEF <.MEMD.1720_18> *coordD.1706_6(D) = pretmp.6D.1727_27; goto <bb 5>; # SUCC: 5 [100.0%] (fallthru,exec) # BLOCK 4 freq:6100 # PRED: 2 [61.0%] (false,exec) # .MEMD.1720_20 = VDEF <.MEMD.1720_17(D)> sD.1705_2(D)->pD.1704 = pretmp.5D.1726_24; # .MEMD.1720_21 = VDEF <.MEMD.1720_20> *coordD.1706_6(D) = pretmp.6D.1727_27; # SUCC: 5 [100.0%] (fallthru,exec) # BLOCK 5 freq:10000 # PRED: 3 [100.0%] (fallthru,exec) 4 [100.0%] (fallthru,exec) # .MEMD.1720_16 = PHI <.MEMD.1720_19(3), .MEMD.1720_21(4)> # VUSE <.MEMD.1720_16> return; # SUCC: EXIT [100.0%] } which translates to nearly proper code: func: .LFB0: .cfi_startproc movq (%rdi), %rax # sD.1705_2(D)->pD.1704, pretmp.3D.1724 leaq 2(%rax), %rcx #, pretmp.5D.1726 movzwl (%rax), %eax # *pretmp.3D.1724_22, pretmp.6D.1727 addl (%rsi), %eax # *coordD.1706_6(D), pretmp.6D.1727 testb %dl, %dl # deltaD.1707 movq %rcx, (%rdi) # pretmp.5D.1726, sD.1705_2(D)->pD.1704 movl %eax, (%rsi) # pretmp.6D.1727, *coordD.1706_6(D) je .L2 #, ret .L2: ret .cfi_endproc where the expected code would be something like (i think): func: .LFB0: .cfi_startproc movq (%rdi), %rax # sD.1705_2(D)->pD.1704, D.1714 movzwl (%rax), %edx #* D.1714, changeD.1710 addq $2, %rax #, tmp77 movq %rax, (%rdi) # tmp77, sD.1705_2(D)->pD.1704 addl %edx, (%rsi) # changeD.1710, *coordD.1706_6(D) ret .cfi_endproc .LFE0: So we just need to recognize that BB3 and BB4 are identical (everything in BB3 can be hoisted and BB4 is dead).