http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49573

           Summary: wrong rtl pre transformation
           Product: gcc
           Version: 4.6.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: rtl-optimization
        AssignedTo: unassig...@gcc.gnu.org
        ReportedBy: car...@google.com
              Host: linux
            Target: arm-unknown-linux-gnueabi


Created attachment 24626
  --> http://gcc.gnu.org/bugzilla/attachment.cgi?id=24626
testcase

The attached testcase is extracted from skia.

Check out gcc source from gcc4.6 branch earlier than r175024, configured as
arm-unknown-linux-gnueabi and build it. Compile the attached testcase with
options -march=armv7-a -O2 -fno-exceptions, it generates the following code:

_ZN12SkGlyphCache10VisitCacheEPK12SkDescriptor:
    @ args = 0, pretend = 0, frame = 0
    @ frame_needed = 0, uses_anonymous_args = 0
    stmfd    sp!, {r3, r4, r5, r6, r7, lr}
    mov    r5, r0
    ldr    r6, .L17
    ldr    r0, [r6, #0]
    bl    _Z3fooi
    ldr    r4, [r6, #4]
    cmp    r4, #0
    beq    .L2
.L11:
    ldr    r1, [r4, #8]
    mov    r2, r5
    ldr    ip, [r1, #4]
    mov    r3, r1
    add    ip, r1, ip
.L4:
    ldr    r0, [r3], #4
    ldr    r1, [r2], #4
    cmp    r0, r1
    bne    .L3
    cmp    ip, r3
    bhi    .L4
    ldr    r3, [r4, #4]
    cmp    r3, #0
    beq    .L16               // A
    ldr    r2, [r4, #0]   
    str    r2, [r3, #0]
    ldr    r7, [r4, #0]
.L8:
    cmp    r7, #0             // B
    mov    r0, r4
    ldrne    r3, [r4, #4]
    strne    r3, [r7, #4]
    mov    r3, #0
    str    r3, [r4, #0]
    str    r3, [r4, #4]
    ldmfd    sp!, {r3, r4, r5, r6, r7, pc}
.L3:
    ldr    r4, [r4, #0]
    cmp    r4, #0
    bne    .L11
.L2:
.L10:
    mov    r0, #12
    bl    _Znwj
    mov    r4, r0
    bl    _ZN12SkGlyphCacheC1Ev
    mov    r0, r4
    ldmfd    sp!, {r3, r4, r5, r6, r7, pc}
.L16:
    ldr    r3, [r4, #0]      
    str    r3, [r6, #4]
    b    .L8
.L18:
    .align    2
.L17:
    .word    gGCGlobals

Note that when the branch instruction A is taken, and later branch to L8,
register r7 will not contain any meaningful value, so instruction B is
unpredictable, and these are incorrect codes.

The culprit is rtl pre pass. Before pre, we have:

    ...
778 (note 116 107 117 8 [bb 8] NOTE_INSN_BASIC_BLOCK)
779 
780 (insn 117 116 118 8 (set (reg/f:SI 201 [ D.1864 ])
781         (mem/f:SI (reg/v/f:SI 196 [ cache ]) [4 MEM[(struct SkGlyphCache *
*)cache_51]+0 S4 A32])) SkGlyphCache.ii:26 
    168 {*arm_movsi_insn}
782      (nil))
783 
784 (insn 118 117 121 8 (set (mem/s/f:SI (reg/f:SI 200 [ D.1863 ]) [4
D.1863_32->fNext+0 S4 A32])
785         (reg/f:SI 201 [ D.1864 ])) SkGlyphCache.ii:26 168 {*arm_movsi_insn}
786      (expr_list:REG_DEAD (reg/f:SI 201 [ D.1864 ])
787         (expr_list:REG_DEAD (reg/f:SI 200 [ D.1863 ])
788             (nil))))
789 790 (code_label 121 118 122 9 7 "" [1 uses])
791 
792 (note 122 121 123 9 [bb 9] NOTE_INSN_BASIC_BLOCK)
793 
794 (insn 123 122 125 9 (set (reg/f:SI 202 [ D.1864 ])
795         (mem/f:SI (reg/v/f:SI 196 [ cache ]) [4 MEM[(struct SkGlyphCache *
*)cache_51]+0 S4 A32])) SkGlyphCache.ii:28 
    168 {*arm_movsi_insn}
796      (nil))
797 
798 (insn 125 123 126 9 (set (reg/f:SI 220)
799         (symbol_ref:SI ("gGCGlobals") [flags 0xc0]  <var_decl
0x7f90256e8320 gGCGlobals>)) SkGlyphCache.ii:28 168 {*ar
    m_movsi_insn}
800      (expr_list:REG_EQUAL (symbol_ref:SI ("gGCGlobals") [flags 0xc0] 
<var_decl 0x7f90256e8320 gGCGlobals>)
801         (nil)))
802 
803 (insn 126 125 127 9 (set (mem/s/f/c:SI (plus:SI (reg/f:SI 220)
804                 (const_int 4 [0x4])) [4 gGCGlobals.fHead+0 S4 A32])
805         (reg/f:SI 202 [ D.1864 ])) SkGlyphCache.ii:28 168 {*arm_movsi_insn}
806      (expr_list:REG_DEAD (reg/f:SI 220)
807         (expr_list:REG_DEAD (reg/f:SI 202 [ D.1864 ])
808             (nil))))
809 
810 (code_label 127 126 128 10 8 "" [0 uses])
811 
812 (note 128 127 129 10 [bb 10] NOTE_INSN_BASIC_BLOCK)
813 
814 (insn 129 128 130 10 (set (reg/f:SI 203 [ D.1864 ])
815         (mem/f:SI (reg/v/f:SI 196 [ cache ]) [4 MEM[(struct SkGlyphCache *
*)cache_51]+0 S4 A32])) SkGlyphCache.ii:30     168 {*arm_movsi_insn}
816      (nil))
817 
818 (insn 130 129 131 10 (set (reg:CC 24 cc)
819         (compare:CC (reg/f:SI 203 [ D.1864 ])
820             (const_int 0 [0]))) SkGlyphCache.ii:30 198 {*arm_cmpsi_insn}
821      (nil))
822 
      ...

bb8 and bb9 are predecessors of bb10. insn 129 is fully redundant to insn 117
and 123. But gcc failed to identify this, instead it identified insn 129 is
partial redundant to insn 123. This is still OK. But it did wrong
transformation. It adds a new instruction to load the memory into a new
register r225 at the end of bb8, and modifies the first instruction of bb10 to
move r225 to old register. But it doesn't touch bb9, so if control flows from
bb9 to bb10, the new register r225 contains no meaningful value.

    ...
286 (note 116 107 117 8 [bb 8] NOTE_INSN_BASIC_BLOCK)
287 
288 (insn 117 116 118 8 (set (reg/f:SI 201 [ D.1864 ])
289         (mem/f:SI (reg/v/f:SI 196 [ cache ]) [4 MEM[(struct SkGlyphCache *
*)cache_51]+0 S4 A32])) SkGlyphCache.ii:26     168 {*arm_movsi_insn}
290      (nil))
291 
292 (insn 118 117 185 8 (set (mem/s/f:SI (reg/f:SI 200 [ D.1863 ]) [4
D.1863_32->fNext+0 S4 A32])
293         (reg/f:SI 201 [ D.1864 ])) SkGlyphCache.ii:26 168 {*arm_movsi_insn}
294      (expr_list:REG_DEAD (reg/f:SI 201 [ D.1864 ])
295         (expr_list:REG_DEAD (reg/f:SI 200 [ D.1863 ])
296             (nil))))
297 
298 (insn 185 118 121 8 (set (reg/f:SI 225 [ D.1864 ])
299         (mem/s/f:SI (reg/v/f:SI 196 [ cache ]) [4 cache_31->fNext+0 S4
A32])) -1
300      (nil))
301 
302 (code_label 121 185 122 9 7 "" [1 uses])
303 
304 (note 122 121 123 9 [bb 9] NOTE_INSN_BASIC_BLOCK)
305 
306 (insn 123 122 183 9 (set (reg/f:SI 202 [ D.1864 ])
307         (mem/f:SI (reg/v/f:SI 196 [ cache ]) [4 MEM[(struct SkGlyphCache *
*)cache_51]+0 S4 A32])) SkGlyphCache.ii:28     168 {*arm_movsi_insn}
308      (nil))
309 
310 (insn 183 123 126 9 (set (reg/f:SI 220)
311         (reg/f:SI 224)) SkGlyphCache.ii:28 -1
312      (expr_list:REG_EQUAL (symbol_ref:SI ("gGCGlobals") [flags 0xc0] 
<var_decl 0x7f90256e8320 gGCGlobals>)
313         (nil)))
314 
315 (insn 126 183 127 9 (set (mem/s/f/c:SI (plus:SI (reg/f:SI 220)
316                 (const_int 4 [0x4])) [4 gGCGlobals.fHead+0 S4 A32])
317         (reg/f:SI 202 [ D.1864 ])) SkGlyphCache.ii:28 168 {*arm_movsi_insn}
318      (expr_list:REG_DEAD (reg/f:SI 220)
319         (expr_list:REG_DEAD (reg/f:SI 202 [ D.1864 ])
320             (nil))))
321 
322 (code_label 127 126 128 10 8 "" [0 uses])
323 
324 (note 128 127 184 10 [bb 10] NOTE_INSN_BASIC_BLOCK)
325 
326 (insn 184 128 130 10 (set (reg/f:SI 203 [ D.1864 ])
327         (reg/f:SI 225 [ D.1864 ])) SkGlyphCache.ii:30 -1
328      (expr_list:REG_EQUAL (mem/f:SI (reg/v/f:SI 196 [ cache ]) [4
MEM[(struct SkGlyphCache * *)cache_51]+0 S4 A32])
329         (nil)))
330 
331 (insn 130 184 131 10 (set (reg:CC 24 cc)
332         (compare:CC (reg/f:SI 203 [ D.1864 ])
333             (const_int 0 [0]))) SkGlyphCache.ii:30 198 {*arm_cmpsi_insn}
334      (nil))
    ...

I can't reproduce it on top of 4.6 branch because after patch 175024 gcc
doesn't think there is any redundant. Neither can I reproduce it on top of
trunk for similar reason, gcc failed to identify any redundant after patch
171703.

Reply via email to