Hi,

On ARM/Thumb-2, if-conversion is sometimes unnecessarily constrained by combine pass (example below). In such cases, it would be profitable to teach if-conversion to apply a reverse transformation, but only if if-converting is possible. The attached patch implements that and improves performance on ARM, but in order to "uncombine" insns it references CC_REGNO directly and thus does not work on other architecture. Could anybody suggest a machine-independent approach?


Consider:

int main(int argc, char **argv) {
   int i, x = 42;
   for (i = 0; i < 1024; i++) {
     if (i % argc)
       x *= x;
     else
       x += 4;
   }
   printf("%d\n", x);

}

arm-unknown-linux-gnueabi-gcc -mthumb -mfloat-abi=softfp -mcpu=cortex-a8 -mtune=cortex-a8 -O2 thumb-vfp-cond.c -o thumb-vfp-cond.s -S

Combine propagates compare into if_then_else pattern that matches with cbz/cbnz Thumb-2 patterns (there's no such problem in ARM mode), resulting in the following rtl:

(if_then_else (eq (reg:SI 1 r1 [+4 ])
         (const_int 0 [0]))
     (label_ref:SI 22)
     (pc))

Then if-conversion fails to match cond_exec instruction, because its predicate contains a comparison with regular reg instead of CC:

(insn 19 18 22 4 (cond_exec (ne (reg:SI 1 r1 [+4 ])
             (const_int 0 [0]))
         (set (reg/v:SI 5 r5 [orig:110 x ] [110])
             (mult:SI (reg/v:SI 5 r5 [orig:110 x ] [110])
(reg/v:SI 5 r5 [orig:110 x ] [110])))) thumb-vfp-cond.c:8 -1
      (nil))

The attached patch allows if-conversion to "uncombine" such if_then_else patterns into two insn: 1) separate comparison that sets CC, and 2) if_then_else that uses it, which can be converted to cond_exec. This transformation is only applied in case if-conversion was successful. Also, this fix works only for if_then_elses with both then and else blocks (converting only if-then and if-else blocks proved unprofitable).

Spec2000 results (ref)

test            base    peak     %
                 time    time
gzip            289.07  290.4   -0.46
vpr             255.75  257.18  -0.56
gcc             120.17  119.08  0.91
mcf             427.3   425.81  0.35
crafty          156.56  152.53  2.57
parser          373.05  373.44  -0.10
eon             142.07  142.54  -0.33
perlbmk         217.38  218.2   -0.38
gap             138.77  138.56  0.15
vortex          260.64  260.43  0.08
bzip2           255.3   256.01  -0.28
twolf           433.83  433.98  -0.03
Geomean         234.34  233.96  0.16

The results on train are a bit better than on ref, but in both cases
crafty grows by 2-2.5%.
It was in total 4221 conditional instructions in Spec2000 before the
patch and 5772 after, so if-conversion succeeded about 30% more times.

Regtested on ARM, mostly ok except for one test (gcc.target/arm/pr46631.c) that will be investigated. I can't reproduce that by hand.

2013-08-02  Dmitry Plotnikov  <le...@ispras.ru>

    * ifcvt.c (cond_exec_process_if_block): Fix test_expr for if-then-elses
    when it has comparison without CC reg.

diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
index f081ecd..d915170 100644
--- a/gcc/ifcvt.c
+++ b/gcc/ifcvt.c
@@ -441,6 +448,7 @@ cond_exec_process_if_block (ce_if_block_t * ce_info,
   rtx else_first_tail = NULL_RTX;  /* First match at the tail of ELSE */
   int then_n_insns, else_n_insns, n_insns;
   enum rtx_code false_code;
+  rtx uncombined_compare = NULL_RTX;

/* If test is comprised of && or || elements, and we've failed at handling all of them together, just use the last test if it is the special case of
@@ -462,6 +470,26 @@ cond_exec_process_if_block (ce_if_block_t * ce_info,
   if (! test_expr)
     return FALSE;

+  /* Extract predicate for compare from combined if-then-else.  */
+
+  if (then_bb && else_bb
+      && REGNO (XEXP (test_expr,0)) != CC_REGNUM)
+  {
+      /* Test_expr with non CC reg, so we need to emit compare and make
+         new test_expr with CC reg.  */
+      rtx tmp_test_expr;
+      rtx tmp_cc_reg = gen_rtx_REG(CCmode, CC_REGNUM);
+
+      tmp_test_expr = gen_rtx_fmt_ee (GET_CODE (test_expr),
+                                      GET_MODE (test_expr),
+                                      tmp_cc_reg, const0_rtx);
+      uncombined_compare = gen_rtx_SET (GET_MODE (test_expr), tmp_cc_reg,
+                                        gen_rtx_COMPARE (CCmode,
+ XEXP (test_expr,0), + XEXP (test_expr,1)));
+      test_expr = tmp_test_expr;
+  }
+
   /* If the conditional jump is more than just a conditional jump,
then we can not do conditional execution conversion on this block. */
   if (! onlyjump_p (BB_END (test_bb)))
@@ -683,6 +711,19 @@ cond_exec_process_if_block (ce_if_block_t * ce_info,
   IFCVT_MODIFY_FINAL (ce_info);
 #endif

+ /* Second part of compare extraction. We should emit compare insn here. */
+  if (uncombined_compare != NULL_RTX)
+  {
+      rtx test_bb_end = last_active_insn (test_bb, TRUE);
+      if (test_bb_end)
+        emit_insn_after (uncombined_compare, test_bb_end);
+      else if (then_start)
+        emit_insn_before (uncombined_compare, then_start);
+      else if (else_start)
+        emit_insn_before (uncombined_compare, else_start);
+
+  }
+
   /* Conversion succeeded.  */
   if (dump_file)
     fprintf (dump_file, "%d insn%s converted to conditional execution.\n",
diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
index f081ecd..d915170 100644
--- a/gcc/ifcvt.c
+++ b/gcc/ifcvt.c
@@ -441,6 +448,7 @@ cond_exec_process_if_block (ce_if_block_t * ce_info,
   rtx else_first_tail = NULL_RTX;	/* First match at the tail of ELSE */
   int then_n_insns, else_n_insns, n_insns;
   enum rtx_code false_code;
+  rtx uncombined_compare = NULL_RTX;
 
   /* If test is comprised of && or || elements, and we've failed at handling
      all of them together, just use the last test if it is the special case of
@@ -462,6 +470,26 @@ cond_exec_process_if_block (ce_if_block_t * ce_info,
   if (! test_expr)
     return FALSE;
 
+  /* Extract predicate for compare from combined if-then-else.  */
+
+  if (then_bb && else_bb
+      && REGNO (XEXP (test_expr,0)) != CC_REGNUM)
+  {
+      /* Test_expr with non CC reg, so we need to emit compare and make
+         new test_expr with CC reg.  */
+      rtx tmp_test_expr;
+      rtx tmp_cc_reg = gen_rtx_REG(CCmode, CC_REGNUM);
+
+      tmp_test_expr = gen_rtx_fmt_ee (GET_CODE (test_expr),
+                                      GET_MODE (test_expr),
+                                      tmp_cc_reg, const0_rtx);
+      uncombined_compare = gen_rtx_SET (GET_MODE (test_expr), tmp_cc_reg,
+                                        gen_rtx_COMPARE (CCmode,
+                                                         XEXP (test_expr,0),
+                                                         XEXP (test_expr,1)));
+      test_expr = tmp_test_expr;
+  }
+
   /* If the conditional jump is more than just a conditional jump,
      then we can not do conditional execution conversion on this block.  */
   if (! onlyjump_p (BB_END (test_bb)))
@@ -683,6 +711,19 @@ cond_exec_process_if_block (ce_if_block_t * ce_info,
   IFCVT_MODIFY_FINAL (ce_info);
 #endif
 
+  /* Second part of compare extraction. We should emit compare insn here.  */
+  if (uncombined_compare != NULL_RTX)
+  {
+      rtx test_bb_end = last_active_insn (test_bb, TRUE);
+      if (test_bb_end)
+        emit_insn_after (uncombined_compare, test_bb_end);
+      else if (then_start)
+        emit_insn_before (uncombined_compare, then_start);
+      else if (else_start)
+        emit_insn_before (uncombined_compare, else_start);
+
+  }
+
   /* Conversion succeeded.  */
   if (dump_file)
     fprintf (dump_file, "%d insn%s converted to conditional execution.\n",

Reply via email to