[Bug tree-optimization/18576] missing jump threading because of type changes
--- Additional Comments From steven at gcc dot gnu dot org 2004-11-20 16:44 --- Confirmed, I see similar code on x86: .L4: xorl%ebx, %ebx .L7: xorl%edx, %edx testb %bl, %bl jne .L2 -- What|Removed |Added Status|UNCONFIRMED |NEW Ever Confirmed||1 Last reconfirmed|-00-00 00:00:00 |2004-11-20 16:44:20 date|| http://gcc.gnu.org/bugzilla/show_bug.cgi?id=18576
[Bug tree-optimization/18576] missing jump threading because of type changes
--- Additional Comments From steven at gcc dot gnu dot org 2004-11-20 17:01 --- I have the following in the .optimized dump: flow_bb_inside_loop_p (loop, source_loop) { unsigned char D.1171; struct loop * D.1170; struct loop * * D.1169; struct loop * * D.1168; unsigned int D.1167; unsigned int D.1166; struct loop * * D.1165; int D.1164; int D.1163; int iftmp.0; int D.1161; struct loop * outer; struct loop * loop; unsigned char D.1146; unsigned char D.1145; int iftmp.1; int D.1140; int D.1160; # BLOCK 0 # PRED: ENTRY [100.0%] (fallthru,exec) if (loop == source_loop) goto L8; else goto L0; # SUCC: 6 [10.4%] (true,exec) 1 [89.6%] (false,exec) # BLOCK 1 # PRED: 0 [89.6%] (false,exec) L0:; D.1164 = loop-depth; if (source_loop-depth = D.1164) goto L4; else goto L1; # SUCC: 4 [50.0%] (true,exec) 2 [50.0%] (false,exec) # BLOCK 2 # PRED: 1 [50.0%] (false,exec) L1:; if (loop != *(source_loop-pred + (struct loop * *) ((unsigned int) D.1164 * 4))) goto L4; else goto L2; # SUCC: 4 [81.0%] (true,exec) 3 [19.0%] (false,exec) # BLOCK 3 # PRED: 2 [19.0%] (false,exec) L2:; iftmp.0 = 1; goto bb 7 (L14); # SUCC: 7 [100.0%] (fallthru,exec) # BLOCK 4 # PRED: 1 [50.0%] (true,exec) 2 [81.0%] (true,exec) L4:; iftmp.0 = 0; # SUCC: 7 [100.0%] (fallthru) # BLOCK 7 # PRED: 4 [100.0%] (fallthru) 3 [100.0%] (fallthru,exec) L14:; if ((unsigned char) (int) (unsigned char) iftmp.0 != 0) goto L8; else goto L7; # SUCC: 6 [33.0%] (true,exec) 5 [67.0%] (false,exec) # BLOCK 5 # PRED: 7 [67.0%] (false,exec) L7:; iftmp.1 = 0; goto bb 8 (L15); # SUCC: 8 [100.0%] (fallthru,exec) # BLOCK 6 # PRED: 0 [10.4%] (true,exec) 7 [33.0%] (true,exec) L8:; iftmp.1 = 1; # SUCC: 8 [100.0%] (fallthru) # BLOCK 8 # PRED: 6 [100.0%] (fallthru) 5 [100.0%] (fallthru,exec) L15:; return (int) (unsigned char) iftmp.1; # SUCC: EXIT [100.0%] } Control flow graph: ENTRY | 0 |\ | \ 1 \ |\ \ | \ \ 2 \ \ |\ | | | \| | 3 4 | | / | |/| 7 / |\ / | \ / 5 6 | / |/ 8 | EXIT Dominator tree: 0 /|\ 6 8 1 /|\ 4 7 2 | | 5 3 The problematic part is here: # BLOCK 3 # PRED: 2 [19.0%] (false,exec) L2:; iftmp.0 = 1; goto bb 7 (L14); # SUCC: 7 [100.0%] (fallthru,exec) # BLOCK 4 # PRED: 1 [50.0%] (true,exec) 2 [81.0%] (true,exec) L4:; iftmp.0 = 0; # SUCC: 7 [100.0%] (fallthru) # BLOCK 7 # PRED: 4 [100.0%] (fallthru) 3 [100.0%] (fallthru,exec) L14:; if ((unsigned char) (int) (unsigned char) iftmp.0 != 0) goto L8; else goto L7; # SUCC: 6 [33.0%] (true,exec) 5 [67.0%] (false,exec) Possible causes of the missed jump thread: 1) Are casts getting in the way? 2) Do we need that one of edges into the condition we want to thread through dominates the block? -- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=18576
[Bug tree-optimization/18576] missing jump threading because of type changes
--- Additional Comments From pinskia at gcc dot gnu dot org 2004-11-20 17:11 --- Note changing flow_loop_nested_p to return an int instead of unsigned char, the jump threading works -- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=18576
[Bug tree-optimization/18576] missing jump threading because of type changes
--- Additional Comments From steven at gcc dot gnu dot org 2004-11-20 17:11 --- Situation in DOM3: # BLOCK 4 # PRED: 3 [100.0%] (fallthru,exec) 2 [81.0%] (true,exec) 1 [50.0%] (true,exec) # iftmp.0_1 = PHI 0(2), 1(3), 0(1); L4:; D.1171_13 = (unsigned char) iftmp.0_1; D.1160_14 = (int) D.1171_13; D.1145_16 = (unsigned char) D.1160_14; if (D.1145_16 != 0) goto L8; else goto L7; # SUCC: 6 [33.0%] (true,exec) 5 [67.0%] (false,exec) So this is almost certainly casts again getting in the way. -- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=18576
[Bug tree-optimization/18576] missing jump threading because of type changes
--- Additional Comments From pinskia at gcc dot gnu dot org 2004-11-20 06:37 --- The corresponding asm for the tcb: cmpw cr7,r3,r4 mr r11,r3 li r10,0 li r3,1 beqlr- cr7 lwz r0,0(r11) lwz r2,0(r4) slwi r9,r0,2 cmpw cr7,r2,r0 ble- cr7,L7 lwz r2,4(r4) lwzx r0,r9,r2 xor r10,r11,r0 subfic r2,r10,0 adde r10,r2,r10 L7: mr r3,r10 blr the mainline (which is much worse): cmpw cr7,r3,r4 li r10,0 beq- cr7,L2 lwz r0,0(r3) li r11,1 lwz r2,0(r4) slwi r9,r0,2 cmpw cr7,r2,r0 bgt- cr7,L12 L4: li r11,0 L7: cmpwi cr7,r11,0 bne- cr7,L2 mr r3,r10 blr L12: lwz r2,4(r4) lwzx r0,r9,r2 cmpw cr7,r3,r0 bne+ cr7,L4 b L7 L2: li r10,1 mr r3,r10 blr -- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=18576
[Bug tree-optimization/18576] missing jump threading because of type changes
--- Additional Comments From pinskia at gcc dot gnu dot org 2004-11-20 06:43 --- Oh, even though the asm is fixed on the tcb we are still missing it on the tree level. By the way I noticed this while trying to speedup flow_bb_inside_loop_p in the first place. -- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=18576