Wrong list.

-----Original Message-----
From: Bin Cheng [mailto:bin.ch...@arm.com] 
Sent: Monday, March 25, 2013 3:01 PM
To: g...@gcc.gnu.org
Subject: [PATCH GCC]Relax the probability condition in CE pass when
optimizing for code size

Hi,
The CE pass has been adapted to work with the probability of then/else
branches. Now the transformation is done only when it's profitable.
Problem is the change affects both performance and size, causing size
regression in many cases (especially in C library like Newlib). 
So this patch relaxes the probability condition when we are optimizing for
size.

Below is an example from Newlib:

unsigned int strlen (const char *);
void * realloc (void * __r, unsigned int __size) ; void * memcpy (void *,
const void *, unsigned int); int argz_add(char **argz , unsigned int
*argz_len , const char *str) {
  int len_to_add = 0;
  unsigned int last = *argz_len;

  if (str == ((void *)0))
    return 0;

  len_to_add = strlen(str) + 1;
  *argz_len += len_to_add;

  if(!(*argz = (char *)realloc(*argz, *argz_len)))
    return 12;

  memcpy(*argz + last, str, len_to_add);
  return 0;
}

The generated assembly for Os/cortex-m0 is like:

argz_add:
        push    {r0, r1, r2, r4, r5, r6, r7, lr}
        mov     r6, r0
        mov     r7, r1
        mov     r4, r2
        ldr     r5, [r1]
        beq     .L3
        mov     r0, r2
        bl      strlen
        add     r0, r0, #1
        add     r1, r0, r5
        str     r0, [sp, #4]
        str     r1, [r7]
        ldr     r0, [r6]
        bl      realloc
        mov     r3, #12
        str     r0, [r6]
        cmp     r0, #0
        beq     .L2
        add     r0, r0, r5
        mov     r1, r4
        ldr     r2, [sp, #4]
        bl      memcpy
        mov     r3, #0
        b       .L2
.L3:
        mov     r3, r2
.L2:
        mov     r0, r3

In which branch/mov instructions around .L3 can be CEed with this patch.

During the work I observed passes before combine might interfere with CE
pass, so this patch is enabled for ce2/ce3 after combination pass.

It is tested on x86/thumb2 for both normal and Os. Is it ok for trunk?


2013-03-25  Bin Cheng  <bin.ch...@arm.com>

        * ifcvt.c (ifcvt_after_combine): New static variable.
        (cheap_bb_rtx_cost_p): Set scale to REG_BR_PROB_BASE when optimizing
        for size.
        (rest_of_handle_if_conversion, rest_of_handle_if_after_combine):
        Clear/set the variable ifcvt_after_combine.
Index: gcc/ifcvt.c
===================================================================
--- gcc/ifcvt.c (revision 197029)
+++ gcc/ifcvt.c (working copy)
@@ -67,6 +67,9 @@
 
 #define NULL_BLOCK     ((basic_block) NULL)
 
+/* TRUE if after combine pass.  */
+static bool ifcvt_after_combine;
+
 /* # of IF-THEN or IF-THEN-ELSE blocks we looked at  */
 static int num_possible_if_blocks;
 
@@ -144,8 +147,14 @@ cheap_bb_rtx_cost_p (const_basic_block bb, int sca
   /* Our branch probability/scaling factors are just estimates and don't
      account for cases where we can get speculation for free and other
      secondary benefits.  So we fudge the scale factor to make speculating
-     appear a little more profitable.  */
+     appear a little more profitable when optimizing for performance.  */
   scale += REG_BR_PROB_BASE / 8;
+
+  /* Set the scale to REG_BR_PROB_BASE to be more agressive when
+     optimizing for size and after combine pass.  */
+  if (!optimize_function_for_speed_p (cfun) && ifcvt_after_combine)
+    scale = REG_BR_PROB_BASE;
+
   max_cost *= scale;
 
   while (1)
@@ -4445,6 +4454,7 @@ gate_handle_if_conversion (void)
 static unsigned int
 rest_of_handle_if_conversion (void)
 {
+  ifcvt_after_combine = false;
   if (flag_if_conversion)
     {
       if (dump_file)
@@ -4494,6 +4504,7 @@ gate_handle_if_after_combine (void)
 static unsigned int
 rest_of_handle_if_after_combine (void)
 {
+  ifcvt_after_combine = true;
   if_convert ();
   return 0;
 }

Reply via email to