Hi,
On ARM/Thumb-2, if-conversion is sometimes unnecessarily constrained by
combine pass (example below). In such cases, it would be profitable to
teach if-conversion to apply a reverse transformation, but only if
if-converting is possible. The attached patch implements that and
improves performance on ARM, but in order to "uncombine" insns it
references CC_REGNO directly and thus does not work on other
architecture. Could anybody suggest a machine-independent approach?
Consider:
int main(int argc, char **argv) {
int i, x = 42;
for (i = 0; i < 1024; i++) {
if (i % argc)
x *= x;
else
x += 4;
}
printf("%d\n", x);
}
arm-unknown-linux-gnueabi-gcc -mthumb -mfloat-abi=softfp -mcpu=cortex-a8
-mtune=cortex-a8 -O2 thumb-vfp-cond.c -o thumb-vfp-cond.s -S
Combine propagates compare into if_then_else pattern that matches with
cbz/cbnz Thumb-2 patterns (there's no such problem in ARM mode),
resulting in the following rtl:
(if_then_else (eq (reg:SI 1 r1 [+4 ])
(const_int 0 [0]))
(label_ref:SI 22)
(pc))
Then if-conversion fails to match cond_exec instruction, because its
predicate contains a comparison with regular reg instead of CC:
(insn 19 18 22 4 (cond_exec (ne (reg:SI 1 r1 [+4 ])
(const_int 0 [0]))
(set (reg/v:SI 5 r5 [orig:110 x ] [110])
(mult:SI (reg/v:SI 5 r5 [orig:110 x ] [110])
(reg/v:SI 5 r5 [orig:110 x ] [110]))))
thumb-vfp-cond.c:8 -1
(nil))
The attached patch allows if-conversion to "uncombine" such if_then_else
patterns into two insn: 1) separate comparison that sets CC, and 2)
if_then_else that uses it, which can be converted to cond_exec. This
transformation is only applied in case if-conversion was successful.
Also, this fix works only for if_then_elses with both then and else
blocks (converting only if-then and if-else blocks proved unprofitable).
Spec2000 results (ref)
test base peak %
time time
gzip 289.07 290.4 -0.46
vpr 255.75 257.18 -0.56
gcc 120.17 119.08 0.91
mcf 427.3 425.81 0.35
crafty 156.56 152.53 2.57
parser 373.05 373.44 -0.10
eon 142.07 142.54 -0.33
perlbmk 217.38 218.2 -0.38
gap 138.77 138.56 0.15
vortex 260.64 260.43 0.08
bzip2 255.3 256.01 -0.28
twolf 433.83 433.98 -0.03
Geomean 234.34 233.96 0.16
The results on train are a bit better than on ref, but in both cases
crafty grows by 2-2.5%.
It was in total 4221 conditional instructions in Spec2000 before the
patch and 5772 after, so if-conversion succeeded about 30% more times.
Regtested on ARM, mostly ok except for one test
(gcc.target/arm/pr46631.c) that will be investigated. I can't reproduce
that by hand.
2013-08-02 Dmitry Plotnikov <le...@ispras.ru>
* ifcvt.c (cond_exec_process_if_block): Fix test_expr for if-then-elses
when it has comparison without CC reg.
diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
index f081ecd..d915170 100644
--- a/gcc/ifcvt.c
+++ b/gcc/ifcvt.c
@@ -441,6 +448,7 @@ cond_exec_process_if_block (ce_if_block_t * ce_info,
rtx else_first_tail = NULL_RTX; /* First match at the tail of ELSE */
int then_n_insns, else_n_insns, n_insns;
enum rtx_code false_code;
+ rtx uncombined_compare = NULL_RTX;
/* If test is comprised of && or || elements, and we've failed at
handling
all of them together, just use the last test if it is the special
case of
@@ -462,6 +470,26 @@ cond_exec_process_if_block (ce_if_block_t * ce_info,
if (! test_expr)
return FALSE;
+ /* Extract predicate for compare from combined if-then-else. */
+
+ if (then_bb && else_bb
+ && REGNO (XEXP (test_expr,0)) != CC_REGNUM)
+ {
+ /* Test_expr with non CC reg, so we need to emit compare and make
+ new test_expr with CC reg. */
+ rtx tmp_test_expr;
+ rtx tmp_cc_reg = gen_rtx_REG(CCmode, CC_REGNUM);
+
+ tmp_test_expr = gen_rtx_fmt_ee (GET_CODE (test_expr),
+ GET_MODE (test_expr),
+ tmp_cc_reg, const0_rtx);
+ uncombined_compare = gen_rtx_SET (GET_MODE (test_expr), tmp_cc_reg,
+ gen_rtx_COMPARE (CCmode,
+ XEXP
(test_expr,0),
+ XEXP
(test_expr,1)));
+ test_expr = tmp_test_expr;
+ }
+
/* If the conditional jump is more than just a conditional jump,
then we can not do conditional execution conversion on this
block. */
if (! onlyjump_p (BB_END (test_bb)))
@@ -683,6 +711,19 @@ cond_exec_process_if_block (ce_if_block_t * ce_info,
IFCVT_MODIFY_FINAL (ce_info);
#endif
+ /* Second part of compare extraction. We should emit compare insn
here. */
+ if (uncombined_compare != NULL_RTX)
+ {
+ rtx test_bb_end = last_active_insn (test_bb, TRUE);
+ if (test_bb_end)
+ emit_insn_after (uncombined_compare, test_bb_end);
+ else if (then_start)
+ emit_insn_before (uncombined_compare, then_start);
+ else if (else_start)
+ emit_insn_before (uncombined_compare, else_start);
+
+ }
+
/* Conversion succeeded. */
if (dump_file)
fprintf (dump_file, "%d insn%s converted to conditional execution.\n",
diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
index f081ecd..d915170 100644
--- a/gcc/ifcvt.c
+++ b/gcc/ifcvt.c
@@ -441,6 +448,7 @@ cond_exec_process_if_block (ce_if_block_t * ce_info,
rtx else_first_tail = NULL_RTX; /* First match at the tail of ELSE */
int then_n_insns, else_n_insns, n_insns;
enum rtx_code false_code;
+ rtx uncombined_compare = NULL_RTX;
/* If test is comprised of && or || elements, and we've failed at handling
all of them together, just use the last test if it is the special case of
@@ -462,6 +470,26 @@ cond_exec_process_if_block (ce_if_block_t * ce_info,
if (! test_expr)
return FALSE;
+ /* Extract predicate for compare from combined if-then-else. */
+
+ if (then_bb && else_bb
+ && REGNO (XEXP (test_expr,0)) != CC_REGNUM)
+ {
+ /* Test_expr with non CC reg, so we need to emit compare and make
+ new test_expr with CC reg. */
+ rtx tmp_test_expr;
+ rtx tmp_cc_reg = gen_rtx_REG(CCmode, CC_REGNUM);
+
+ tmp_test_expr = gen_rtx_fmt_ee (GET_CODE (test_expr),
+ GET_MODE (test_expr),
+ tmp_cc_reg, const0_rtx);
+ uncombined_compare = gen_rtx_SET (GET_MODE (test_expr), tmp_cc_reg,
+ gen_rtx_COMPARE (CCmode,
+ XEXP (test_expr,0),
+ XEXP (test_expr,1)));
+ test_expr = tmp_test_expr;
+ }
+
/* If the conditional jump is more than just a conditional jump,
then we can not do conditional execution conversion on this block. */
if (! onlyjump_p (BB_END (test_bb)))
@@ -683,6 +711,19 @@ cond_exec_process_if_block (ce_if_block_t * ce_info,
IFCVT_MODIFY_FINAL (ce_info);
#endif
+ /* Second part of compare extraction. We should emit compare insn here. */
+ if (uncombined_compare != NULL_RTX)
+ {
+ rtx test_bb_end = last_active_insn (test_bb, TRUE);
+ if (test_bb_end)
+ emit_insn_after (uncombined_compare, test_bb_end);
+ else if (then_start)
+ emit_insn_before (uncombined_compare, then_start);
+ else if (else_start)
+ emit_insn_before (uncombined_compare, else_start);
+
+ }
+
/* Conversion succeeded. */
if (dump_file)
fprintf (dump_file, "%d insn%s converted to conditional execution.\n",