Hi,

This patch adds a new pattern in 4-insn combine. It supports the following sign_extend(op: zero_extend, zero_extend) optimization. In the patch, newpat is split twice. The first split becomes newi1pat and the second becomes newi2pat. They replace i1, i2 and i3 if all of them can be recognized.

7: r126:SI=zero_extend([r123:DI+0x1])
6: r125:SI=zero_extend([r123:DI])
8: r127:SI=r125:SI+r126:SI
9: r124:DI=sign_extend(r127:SI)

are replaced by:

7: r125:DI=zero_extend([r123:DI])
8: r127:DI=zero_extend([r123:DI+0x1])
9: r124:DI=r127:DI+r125:DI

The attachments are the patch diff file and change log file.

Bootstrapped and tested on powerpc64le-linux-gnu with no regressions.  Is this okay for trunk? Any recommendations? Thanks a lot.


        * combine.c (combine_validate_cost): Add an argument for newi1pat.
        (try_combine): Add a 4-insn combine pattern for optimizing rtx
        sign_extend (op:zero_extend, zero_extend).
diff --git a/gcc/combine.c b/gcc/combine.c
index c88382efbd3..73259e6a9ed 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -851,10 +851,11 @@ do_SUBST_LINK (struct insn_link **into, struct insn_link 
*newval)
 
 static bool
 combine_validate_cost (rtx_insn *i0, rtx_insn *i1, rtx_insn *i2, rtx_insn *i3,
-                      rtx newpat, rtx newi2pat, rtx newotherpat)
+                      rtx newpat, rtx newi2pat, rtx newotherpat,
+                      rtx newi1pat)
 {
   int i0_cost, i1_cost, i2_cost, i3_cost;
-  int new_i2_cost, new_i3_cost;
+  int new_i1_cost, new_i2_cost, new_i3_cost;
   int old_cost, new_cost;
 
   /* Lookup the original insn_costs.  */
@@ -915,6 +916,20 @@ combine_validate_cost (rtx_insn *i0, rtx_insn *i1, 
rtx_insn *i2, rtx_insn *i3,
       new_i2_cost = 0;
     }
 
+  if (newi1pat)
+    {
+       tmp = PATTERN (i1);
+       PATTERN (i1) = newi1pat;
+       tmpi = INSN_CODE (i1);
+       INSN_CODE (i1) = -1;
+       new_i1_cost = insn_cost (i1, optimize_this_for_speed_p);
+       PATTERN (i1) = tmp;
+       INSN_CODE (i1) = tmpi;
+       new_cost = new_i1_cost > 0 ? new_i1_cost + new_cost : 0;
+    }
+  else
+    new_i1_cost = 0;
+
   if (undobuf.other_insn)
     {
       int old_other_cost, new_other_cost;
@@ -958,7 +973,10 @@ combine_validate_cost (rtx_insn *i0, rtx_insn *i1, 
rtx_insn *i2, rtx_insn *i3,
        fprintf (dump_file, "%d + ", i1_cost);
       fprintf (dump_file, "%d + %d = %d\n", i2_cost, i3_cost, old_cost);
 
-      if (newi2pat)
+      if (newi1pat)
+       fprintf (dump_file, "replacement costs %d + %d + %d = %d\n",
+                new_i1_cost, new_i2_cost, new_i3_cost, new_cost);
+      else if (newi2pat)
        fprintf (dump_file, "replacement costs %d + %d = %d\n",
                 new_i2_cost, new_i3_cost, new_cost);
       else
@@ -973,7 +991,10 @@ combine_validate_cost (rtx_insn *i0, rtx_insn *i1, 
rtx_insn *i2, rtx_insn *i3,
   INSN_COST (i3) = new_i3_cost;
   if (i1)
     {
-      INSN_COST (i1) = 0;
+      if (newi1pat)
+       INSN_COST (i1) = new_i1_cost;
+      else
+       INSN_COST (i1) = 0;
       if (i0)
        INSN_COST (i0) = 0;
     }
@@ -2672,7 +2693,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, 
rtx_insn *i0,
             int *new_direct_jump_p, rtx_insn *last_combined_insn)
 {
   /* New patterns for I3 and I2, respectively.  */
-  rtx newpat, newi2pat = 0;
+  rtx newpat, newi2pat = 0, newi1pat = 0;
   rtvec newpat_vec_with_clobbers = 0;
   int substed_i2 = 0, substed_i1 = 0, substed_i0 = 0;
   /* Indicates need to preserve SET in I0, I1 or I2 in I3 if it is not
@@ -2682,8 +2703,9 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, 
rtx_insn *i0,
   int total_sets;
   /* Nonzero if I2's or I1's body now appears in I3.  */
   int i2_is_used = 0, i1_is_used = 0;
-  /* INSN_CODEs for new I3, new I2, and user of condition code.  */
+  /* INSN_CODEs for new I3, new I2, new I1 and user of condition code.  */
   int insn_code_number, i2_code_number = 0, other_code_number = 0;
+  int i1_code_number = 0;
   /* Contains I3 if the destination of I3 is used in its source, which means
      that the old life of I3 is being killed.  If that usage is placed into
      I2 and not in I3, a REG_DEAD note must be made.  */
@@ -2756,7 +2778,9 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, 
rtx_insn *i0,
          else if (BINARY_P (src) && CONSTANT_P (XEXP (src, 1)))
            ngood++;
          else if (GET_CODE (src) == ASHIFT || GET_CODE (src) == ASHIFTRT
-                  || GET_CODE (src) == LSHIFTRT)
+                  || GET_CODE (src) == LSHIFTRT
+                  || GET_CODE (src) == SIGN_EXTEND
+                  || GET_CODE (src) == ZERO_EXTEND)
            nshift++;
        }
 
@@ -3399,6 +3423,12 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, 
rtx_insn *i0,
              i1src = subst (i1src, pc_rtx, pc_rtx, 0, 0, 0);
            }
 
+         if (i0)
+           {
+             subst_low_luid = DF_INSN_LUID (i0);
+             i0src = subst (i0src, pc_rtx, pc_rtx, 0, 0, 0);
+           }
+
          subst_low_luid = DF_INSN_LUID (i2);
          i2src = subst (i2src, pc_rtx, pc_rtx, 0, 0, 0);
        }
@@ -3920,6 +3950,50 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, 
rtx_insn *i0,
              rtx src_op0 = XEXP (setsrc, 0);
              rtx src_op1 = XEXP (setsrc, 1);
 
+             /* Double split when src of i0 and i1 are both ZERO_EXTEND.  */
+             if (i0 && i1
+                 && GET_CODE (PATTERN (i0)) == SET
+                 && GET_CODE (PATTERN (i1)) == SET
+                 && GET_CODE (SET_SRC (PATTERN (i0))) == ZERO_EXTEND
+                 && GET_CODE (SET_SRC (PATTERN (i1))) == ZERO_EXTEND
+                 && (rtx_equal_p (XEXP (*split, 0),
+                                  XEXP (SET_SRC (PATTERN (i1)), 0))
+                     || rtx_equal_p (XEXP (*split, 0),
+                                     XEXP (SET_SRC (PATTERN (i0)), 0))))
+               {
+                 newi1pat = NULL_RTX;
+                 rtx newdest, *i0_i1dest;
+                 machine_mode new_mode;
+
+                 new_mode = GET_MODE (*split);
+                 if (rtx_equal_p (XEXP (*split, 0),
+                                  XEXP (SET_SRC (PATTERN (i1)), 0)))
+                   i0_i1dest = &i1dest;
+                 else
+                   i0_i1dest = &i0dest;
+
+                 if (REGNO (i1dest) < FIRST_PSEUDO_REGISTER)
+                   newdest = gen_rtx_REG (new_mode, REGNO (*i0_i1dest));
+                 else
+                   {
+                     SUBST_MODE (regno_reg_rtx[REGNO (*i0_i1dest)], new_mode);
+                     newdest = regno_reg_rtx[REGNO (*i0_i1dest)];
+                   }
+
+                 newi1pat =  gen_rtx_SET (newdest, *split);
+                 SUBST (*split, newdest);
+
+                 i1_code_number = recog_for_combine (&newi1pat, i2,
+                                                       &new_i2_notes);
+                 if (i1_code_number < 0)
+                   {
+                     undo_all ();
+                     return 0;
+                   }
+
+                 split = find_split_point (&newpat, i3, false);
+               }
+
              /* Split "X = Y op Y" as "Z = Y; X = Z op Z".  */
              if (rtx_equal_p (src_op0, src_op1))
                {
@@ -4209,11 +4283,12 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, 
rtx_insn *i0,
 
   /* Only allow this combination if insn_cost reports that the
      replacement instructions are cheaper than the originals.  */
-  if (!combine_validate_cost (i0, i1, i2, i3, newpat, newi2pat, other_pat))
-    {
-      undo_all ();
-      return 0;
-    }
+    if (!combine_validate_cost (i0, i1, i2, i3, newpat, newi2pat, other_pat,
+                               newi1pat))
+      {
+       undo_all ();
+       return 0;
+      }
 
   if (MAY_HAVE_DEBUG_BIND_INSNS)
     {
@@ -4242,6 +4317,18 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, 
rtx_insn *i0,
                /* Put back the new mode.  */
                adjust_reg_mode (reg, new_mode);
              }
+           else if (reg == i0dest)
+             {
+               propagate_for_debug (i0, last_combined_insn, reg, i0src,
+                                    this_basic_block);
+               adjust_reg_mode (reg, new_mode);
+             }
+           else if (reg == i1dest)
+             {
+               propagate_for_debug (i1, last_combined_insn, reg, i1src,
+                                    this_basic_block);
+               adjust_reg_mode (reg, new_mode);
+             }
            else
              {
                rtx tempreg = gen_raw_REG (old_mode, REGNO (reg));
@@ -4552,10 +4639,21 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, 
rtx_insn *i0,
       {
        LOG_LINKS (i1) = NULL;
        REG_NOTES (i1) = 0;
-       if (MAY_HAVE_DEBUG_BIND_INSNS)
-         propagate_for_debug (i1, last_combined_insn, i1dest, i1src,
-                              this_basic_block);
-       SET_INSN_DELETED (i1);
+       if (newi1pat)
+         {
+           if (MAY_HAVE_DEBUG_BIND_INSNS)
+             propagate_for_debug (i1, last_combined_insn, i1dest, i1src,
+                                  this_basic_block);
+           INSN_CODE (i1) = i1_code_number;
+           PATTERN (i1) = newi1pat;
+         }
+       else
+         {
+           if (MAY_HAVE_DEBUG_BIND_INSNS)
+           propagate_for_debug (i1, last_combined_insn, i1dest, i1src,
+                                this_basic_block);
+           SET_INSN_DELETED (i1);
+         }
       }
 
     if (i0)

Reply via email to