On 11/05/16 12:52, Jiong Wang wrote:


On 09/05/16 16:08, Segher Boessenkool wrote:
Hi Christophe,

On Mon, May 09, 2016 at 03:54:26PM +0200, Christophe Lyon wrote:
After this patch, I've noticed that
gcc.target/arm/pr43920-2.c
now fails at:
/* { dg-final { scan-assembler-times "pop" 2 } } */

Before the patch, the generated code was:
[...]
         pop     {r3, r4, r5, r6, r7, pc}
.L4:
         mov     r0, #-1
.L1:
         pop     {r3, r4, r5, r6, r7, pc}

it is now:
[...]
.L1:
         pop     {r3, r4, r5, r6, r7, pc}
.L4:
         mov     r0, #-1
         b       .L1

The new version does not seem better, as it adds a branch on the path
and it is not smaller.
That looks like bb-reorder isn't doing its job?  Maybe it thinks that
pop is too expensive to copy?
I think so. Filed PR71061

ARM backend is not setting the length attribute correctly, that the bb
failed copy_bb_p check.


The length attributes for these pop pattern has been correctted by r237331.

Unfortunately I am afraid even we fixed the backend length issue, this testcase will keep failing, because it's specify "-Os" that some bb copy won't be triggerd.


A further investigation shows this issue is actually gcc couldn't optimize

"bl to pop" into "pop" which is "jump to return" into "return",  so a better
place to fix this issue is at try_optimize_cfg where we are doing these
jump/return optimization already:

  /* Try to change a branch to a return to just that return.  */
  rtx_insn *ret, *use;
  ...

Currently we are using ANY_RETURN_P to check whether an rtx is return
while ANY_RETURN_P only covered RETURN, SIMPLE_RETURN without side-effect:

#define ANY_RETURN_P(X) \
  (GET_CODE (X) == RETURN || GET_CODE (X) == SIMPLE_RETURN)

It is possible that some architectures support return instruction with
side-effect, for example ARM has pop instruction which will do a return
and pop registers at the same time.  The instruction pattern for that is
something like:

  (jump_insn 90 89 93 7 (parallel [
            (return)
            (set/f (reg/f:SI 13 sp)
                (plus:SI (reg/f:SI 13 sp)
                    (const_int 24 [0x18])))
            (set/f (reg:SI 3 r3)
                (mem/c:SI (reg/f:SI 13 sp) [3  S4 A32]))
            (set/f (reg:SI 4 r4)
                (mem/c:SI (plus:SI (reg/f:SI 13 sp)
                        (const_int 4 [0x4])) [3  S4 A32]))
  ...

Above pattern will fail ANY_RETURN_P check, that gcc can't optimize the
folowing jump to return:

[bb 1]
(jump_insn 76 38 77 4 (set (pc)
        (label_ref 43)) pr43920-2.c:27 203 {*arm_jump}
     (nil)
 -> 43)

[bb 2]
(code_label 43)
...
(jump_insn 90 89 93 7 (parallel [
            (return)
            (set/f (reg/f:SI 13 sp)
                (plus:SI (reg/f:SI 13 sp)
                    (const_int 24 [0x18])))
            (set/f (reg:SI 3 r3)
                (mem/c:SI (reg/f:SI 13 sp) [3  S4 A32]))
            (set/f (reg:SI 4 r4)
                (mem/c:SI (plus:SI (reg/f:SI 13 sp)
                        (const_int 4 [0x4])) [3  S4 A32]))

into:

(jump_insn 76 38 77 4 (parallel [
            (return)
            (set/f (reg/f:SI 13 sp)
                (plus:SI (reg/f:SI 13 sp)
                    (const_int 24 [0x18])))
            (set/f (reg:SI 3 r3)
                (mem/c:SI (reg/f:SI 13 sp) [3  S4 A32]))
            (set/f (reg:SI 4 r4)
                (mem/c:SI (plus:SI (reg/f:SI 13 sp)
                        (const_int 4 [0x4])) [3  S4 A32]))
            (set/f (reg:SI 5 r5)
                (mem/c:SI (plus:SI (reg/f:SI 13 sp)
                        (const_int 8 [0x8])) [3  S4 A32]))

This patch:
  * rename existed ANY_RETURN_P into ANY_PURE_RETURN_P.
  * Re-implement ANY_RETURN_P to consider complex JUMP_INSN with
    PARALLEL in the pattern.
  * Removed the side_effect_p check on the last insn in both bb1
    and bb2.  This is because suppose we have bb1 and bb2 contains
    the following single jump_insn and both fall through to EXIT_BB:

    bb 1                                bb 2

    (jump_insn                       (jump_insn
       (parallel [                      (parallel [
            (return)                         (return)
            (set/f                           (set/f
              (reg:SI 15 pc)                   (reg:SI 15 pc)
              (mem:SI                          (mem:SI
                (post_inc:SI                     (post_inc:SI
                (reg/f:SI 13 sp))                (reg/f:SI 13 sp))
        ])                               ])
     -> return)                        -> return)

                \                    /
                 \                  /
                  \                /
                   v              v
                        EXIT_BB

    cross jump optimization will try to change the jump_insn in bb1 into
    a unconditional jump to bb2, then we will enter the next iteration
    of try_optimize_cfg, and it will be a new "jump to return", then
    bb1 will be optimized back into above patterns, and thus another round
    of cross jump optimization, we will end up with infinite loop here.

boostrap ok on x86_64/arm32/arm64, no gcc/g++ regression on any of
them.

OK for trunk?

2016-06-14  Jiong Wang<jiong.w...@arm.com>

    * cfgcleanup.c (output.h): New include.
    (try_optimize_cfg): Check instruction length when optimizing jump to
    return into return, consider optimize for size.
    (flow_find_cross_jump): Remove side_effect_p check on the last insn
    in both bb1 and bb2.
    * jump.c (return_in_jump): New function.
    (redirect_jump_2): Consider complex pattern when updating jump label.
    * rtl.c (classify_insn): Use ANY_PURE_RETURN_P instead of
    ANY_RETURN_P.
    * rtl.h (return_in_jump): New declaration.
    (ANY_RETURN_P): Rename to ANY_PURE_RETURN_P.  Re-implement
    through return_in_jump.


diff --git a/gcc/cfgcleanup.c b/gcc/cfgcleanup.c
index 023b9d2..a4bea09 100644
--- a/gcc/cfgcleanup.c
+++ b/gcc/cfgcleanup.c
@@ -41,6 +41,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tm_p.h"
 #include "insn-config.h"
 #include "emit-rtl.h"
+#include "output.h"
 #include "cselib.h"
 #include "params.h"
 #include "tree-pass.h"
@@ -1337,7 +1338,7 @@ flow_find_cross_jump (basic_block bb1, basic_block bb2, rtx_insn **f1,
   i1 = BB_END (bb1);
   last1 = afterlast1 = last2 = afterlast2 = NULL;
   if (onlyjump_p (i1)
-      || (returnjump_p (i1) && !side_effects_p (PATTERN (i1))))
+      || returnjump_p (i1))
     {
       last1 = i1;
       i1 = PREV_INSN (i1);
@@ -1345,7 +1346,7 @@ flow_find_cross_jump (basic_block bb1, basic_block bb2, rtx_insn **f1,
 
   i2 = BB_END (bb2);
   if (onlyjump_p (i2)
-      || (returnjump_p (i2) && !side_effects_p (PATTERN (i2))))
+      || returnjump_p (i2))
     {
       last2 = i2;
       /* Count everything except for unconditional jump as insn.
@@ -2825,7 +2826,11 @@ try_optimize_cfg (int mode)
 	      rtx_insn *ret, *use;
 	      if (single_succ_p (b)
 		  && onlyjump_p (BB_END (b))
-		  && bb_is_just_return (single_succ (b), &ret, &use))
+		  && bb_is_just_return (single_succ (b), &ret, &use)
+		  && ((get_attr_min_length (ret)
+		       <= get_attr_min_length (BB_END (b)))
+		      || optimize_bb_for_speed_p (b)))
+
 		{
 		  if (redirect_jump (as_a <rtx_jump_insn *> (BB_END (b)),
 				     PATTERN (ret), 0))
diff --git a/gcc/jump.c b/gcc/jump.c
index 5b433af..e962921 100644
--- a/gcc/jump.c
+++ b/gcc/jump.c
@@ -1437,7 +1437,35 @@ delete_for_peephole (rtx_insn *from, rtx_insn *to)
      since the peephole that replaces this sequence
      is also an unconditional jump in that case.  */
 }
-
+
+/* If X is RETURN or SIMPLE_RETURN then return itself.  If X is PARALLEL, return
+   the contained RETURN or SIMPLE_RETURN if it's not a CALL_INSN, otherwise
+   return NULL_RTX.  */
+
+rtx
+return_in_jump (rtx x)
+{
+  if (ANY_PURE_RETURN_P (x))
+    return x;
+
+  rtx ret = NULL_RTX;
+
+  if (GET_CODE (x) == PARALLEL)
+    {
+      int j = 0;
+      for (; j < XVECLEN (x, 0); j++)
+	if (ANY_PURE_RETURN_P (XVECEXP (x, 0, j)))
+	  ret = XVECEXP (x, 0, j);
+        /* Return NULL_RTX for CALL_INSN.  */
+	else if (GET_CODE (XVECEXP (x, 0, j)) == CALL
+		 || (GET_CODE (XVECEXP (x, 0, j)) == SET
+		     && GET_CODE (SET_SRC (XVECEXP (x, 0, j))) == CALL))
+	  return NULL_RTX;
+    }
+
+  return ret;
+}
+
 /* A helper function for redirect_exp_1; examines its input X and returns
    either a LABEL_REF around a label, or a RETURN if X was NULL.  */
 static rtx
@@ -1586,8 +1614,14 @@ redirect_jump_2 (rtx_jump_insn *jump, rtx olabel, rtx nlabel, int delete_unused,
      moving FUNCTION_END note.  Just sanity check that no user still worry
      about this.  */
   gcc_assert (delete_unused >= 0);
-  JUMP_LABEL (jump) = nlabel;
-  if (!ANY_RETURN_P (nlabel))
+  /* "nlabel" might be a pattern where "return/simple_return" is inside a
+     PARALLEL that it can't be used for JUMP_LABEL directly.  */
+  rtx ret = return_in_jump (nlabel);
+  if (ret)
+    JUMP_LABEL (jump) = ret;
+  else
+    JUMP_LABEL (jump) = nlabel;
+  if (!ret)
     ++LABEL_NUSES (nlabel);
 
   /* Update labels in any REG_EQUAL note.  */
diff --git a/gcc/rtl.h b/gcc/rtl.h
index b531ab7..a2db61b 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -961,9 +961,12 @@ is_a_helper <rtx_note *>::test (rtx_insn *insn)
 }
 
 /* Predicate yielding nonzero iff X is a return or simple_return.  */
-#define ANY_RETURN_P(X) \
+#define ANY_PURE_RETURN_P(X) \
   (GET_CODE (X) == RETURN || GET_CODE (X) == SIMPLE_RETURN)
 
+/* See comments for return_in_jump.  */
+#define ANY_RETURN_P(X) (return_in_jump (X) != NULL_RTX)
+
 /* 1 if X is a unary operator.  */
 
 #define UNARY_P(X)   \
@@ -3503,6 +3506,7 @@ extern enum rtx_code reversed_comparison_code_parts (enum rtx_code, const_rtx,
 						     const_rtx, const_rtx);
 extern void delete_for_peephole (rtx_insn *, rtx_insn *);
 extern int condjump_in_parallel_p (const rtx_insn *);
+extern rtx return_in_jump (rtx);
 
 /* In emit-rtl.c.  */
 extern int max_reg_num (void);
diff --git a/gcc/rtl.c b/gcc/rtl.c
index a445cdc..fd96839 100644
--- a/gcc/rtl.c
+++ b/gcc/rtl.c
@@ -694,7 +694,7 @@ classify_insn (rtx x)
     return CODE_LABEL;
   if (GET_CODE (x) == CALL)
     return CALL_INSN;
-  if (ANY_RETURN_P (x))
+  if (ANY_PURE_RETURN_P (x))
     return JUMP_INSN;
   if (GET_CODE (x) == SET)
     {
@@ -712,7 +712,7 @@ classify_insn (rtx x)
       for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
 	if (GET_CODE (XVECEXP (x, 0, j)) == CALL)
 	  return CALL_INSN;
-	else if (ANY_RETURN_P (XVECEXP (x, 0, j)))
+	else if (ANY_PURE_RETURN_P (XVECEXP (x, 0, j)))
 	  has_return_p = true;
 	else if (GET_CODE (XVECEXP (x, 0, j)) == SET
 		 && GET_CODE (SET_DEST (XVECEXP (x, 0, j))) == PC)

Reply via email to