On Wed, 29 Nov 2023 at 20:05, Joern Rennecke
<joern.renne...@embecosm.com> wrote:

> > I suspect it'd be more useful to add handling of LSHIFTRT and ASHIFTRT
> > .  Some ports do
> > a lot of static shifting.
>
> > +    case SS_ASHIFT:
> > +    case US_ASHIFT:
> > +      if (!mask || XEXP (x, 1) == const0_rtx)
> > +       return 0;
>
> P.S.: I just realize that this is a pasto: in the case of a const0_rtx
> shift count,
> we returning 0 will usually be wrong.

I've attached my current patch version.
    ext-dce.cc: handle vector modes.
    
    * ext-dce.cc: Amend comment to explain how liveness of vectors is tracked.
      (carry_backpropagate): Use GET_MODE_INNER.
      (ext_dce_process_sets): Likewise.  Only apply big endian correction for
      subregs if they don't have a vector mode.
      (ext_cde_process_uses): Likewise.

    * ext-dce.cc: carry_backpropagate: [US]S_ASHIFT fix, handle [LA]SHIFTRT
    
    * ext-dce.cc (safe_for_live_propagation): Add LSHIFTRT and ASHIFTRT.
      (carry_backpropagate): Reformat top comment.
      Add handling of LSHIFTRT and ASHIFTRT.
      Fix bit count for [SU]MUL_HIGHPART.
      Fix pasto for [SU]S_ASHIFT.

    * ext-dce.c: Fixes for carry handling.
    
    * ext-dce.c (safe_for_live_propagation): Handle MINUS.
      (ext_dce_process_uses): Break out carry handling into ..
      (carry_backpropagate): This new function.
      Better handling of ASHIFT.
      Add handling of SMUL_HIGHPART, UMUL_HIGHPART, SIGN_EXTEND, SS_ASHIFT and
      US_ASHIFT.

    * ext-dce.c: fix SUBREG_BYTE test
    
    As mentioned in
    https://gcc.gnu.org/pipermail/gcc-patches/2023-November/637486.html
    and
    https://gcc.gnu.org/pipermail/gcc-patches/2023-November/638473.html


diff --git a/gcc/ext-dce.cc b/gcc/ext-dce.cc
index 4e4c57de117..228c50e8b73 100644
--- a/gcc/ext-dce.cc
+++ b/gcc/ext-dce.cc
@@ -38,7 +38,10 @@ along with GCC; see the file COPYING3.  If not see
    bit 0..7   (least significant byte)
    bit 8..15  (second least significant byte)
    bit 16..31
-   bit 32..BITS_PER_WORD-1  */
+   bit 32..BITS_PER_WORD-1
+
+   For vector modes, we apply these bit groups to every lane; if any of the
+   bits in the group are live in any lane, we consider this group live.  */
 
 /* Note this pass could be used to narrow memory loads too.  It's
    not clear if that's profitable or not in general.  */
@@ -83,6 +86,7 @@ safe_for_live_propagation (rtx_code code)
     case SIGN_EXTEND:
     case TRUNCATE:
     case PLUS:
+    case MINUS:
     case MULT:
     case SMUL_HIGHPART:
     case UMUL_HIGHPART:
@@ -96,6 +100,8 @@ safe_for_live_propagation (rtx_code code)
     case SS_ASHIFT:
     case US_ASHIFT:
     case ASHIFT:
+    case LSHIFTRT:
+    case ASHIFTRT:
       return true;
 
     /* There may be other safe codes.  If so they can be added
@@ -215,13 +221,22 @@ ext_dce_process_sets (rtx_insn *insn, rtx obj, bitmap 
livenow, bitmap live_tmp)
 
          /* Phase one of destination handling.  First remove any wrapper
             such as SUBREG or ZERO_EXTRACT.  */
-         unsigned HOST_WIDE_INT mask = GET_MODE_MASK (GET_MODE (x));
+         unsigned HOST_WIDE_INT mask
+           = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (x)));
          if (SUBREG_P (x)
              && !paradoxical_subreg_p (x)
              && SUBREG_BYTE (x).is_constant ())
            {
-             bit = subreg_lsb (x).to_constant ();
-             mask = GET_MODE_MASK (GET_MODE (SUBREG_REG (x))) << bit;
+             enum machine_mode omode = GET_MODE_INNER (GET_MODE (x));
+             enum machine_mode imode = GET_MODE (SUBREG_REG (x));
+             bit = 0;
+             if (!VECTOR_MODE_P (GET_MODE (x))
+                 || (GET_MODE_SIZE (imode).is_constant ()
+                     && (GET_MODE_SIZE (omode).to_constant ()
+                         > GET_MODE_SIZE (imode).to_constant ())))
+               bit = subreg_lsb (x).to_constant ();
+             mask = (GET_MODE_MASK (GET_MODE_INNER (GET_MODE (SUBREG_REG (x))))
+                     << bit);
              gcc_assert (mask);
              if (!mask)
                mask = -0x100000000ULL;
@@ -365,6 +380,84 @@ binop_implies_op2_fully_live (rtx_code code)
     }
 }
 
+/* X, with code CODE, is an operation for which safe_for_live_propagation
+   holds true, and bits set in MASK are live in the result.  Compute a
+   mask of (potentially) live bits in the non-constant inputs.  In case of
+   binop_implies_op2_fully_live (e.g. shifts), the computed mask may
+   exclusively pertain to the first operand.  */
+
+HOST_WIDE_INT
+carry_backpropagate (HOST_WIDE_INT mask, enum rtx_code code, rtx x)
+{
+  enum machine_mode mode = GET_MODE_INNER (GET_MODE (x));
+  HOST_WIDE_INT mmask = GET_MODE_MASK (mode);
+  switch (code)
+    {
+    case ASHIFT:
+      if (CONSTANT_P (XEXP (x, 1))
+         && known_lt (UINTVAL (XEXP (x, 1)), GET_MODE_BITSIZE (mode)))
+       return mask >> INTVAL (XEXP (x, 1));
+      /* Fall through.  */
+    case PLUS: case MINUS:
+    case MULT:
+      return mask ? ((2ULL << floor_log2 (mask)) - 1) : 0;
+    case LSHIFTRT:
+      if (CONSTANT_P (XEXP (x, 1))
+         && known_lt (UINTVAL (XEXP (x, 1)), GET_MODE_BITSIZE (mode)))
+       return mmask & (mask << INTVAL (XEXP (x, 1)));
+      return mmask;
+    case ASHIFTRT:
+      if (CONSTANT_P (XEXP (x, 1))
+         && known_lt (UINTVAL (XEXP (x, 1)), GET_MODE_BITSIZE (mode)))
+       {
+         HOST_WIDE_INT sign = 0;
+         if (HOST_BITS_PER_WIDE_INT - clz_hwi (mask) + INTVAL (XEXP (x, 1))
+             > GET_MODE_BITSIZE (mode).to_constant ())
+           sign = (1ULL << GET_MODE_BITSIZE (mode).to_constant ()) - 1;
+         return sign | (mmask & (mask << INTVAL (XEXP (x, 1))));
+       }
+      return mmask;
+    case SMUL_HIGHPART: case UMUL_HIGHPART:
+      if (!mask || XEXP (x, 1) == const0_rtx)
+       return 0;
+      if (CONSTANT_P (XEXP (x, 1)))
+       {
+         if (pow2p_hwi (INTVAL (XEXP (x, 1))))
+           return mmask & (mask << (GET_MODE_BITSIZE (mode).to_constant ()
+                                    - exact_log2 (INTVAL (XEXP (x, 1)))));
+
+         int bits = (HOST_BITS_PER_WIDE_INT
+                     + GET_MODE_BITSIZE (mode).to_constant ()
+                     - clz_hwi (mask) - ctz_hwi (INTVAL (XEXP (x, 1))));
+         if (bits < GET_MODE_BITSIZE (mode).to_constant ())
+           return (1ULL << bits) - 1;
+       }
+      return mmask;
+    case SIGN_EXTEND:
+      if (mask & ~mmask)
+       mask |= 1ULL << (GET_MODE_BITSIZE (mode).to_constant () - 1);
+      return mask;
+
+    /* We propagate for the shifted operand, but not the shift
+       count.  The count is handled specially.  */
+    case SS_ASHIFT:
+    case US_ASHIFT:
+      if (!mask)
+       return 0;
+      if (CONSTANT_P (XEXP (x, 1))
+         && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (mode).to_constant ())
+       {
+         return ((mmask & ~((unsigned HOST_WIDE_INT)mmask
+                            >> (INTVAL (XEXP (x, 1))
+                                + (XEXP (x, 1) != const0_rtx
+                                   && code == SS_ASHIFT))))
+                 | (mask >> INTVAL (XEXP (x, 1))));
+       }
+      return mmask;
+    default:
+      return mask;
+    }
+}
 /* Process uses in INSN contained in OBJ.  Set appropriate bits in LIVENOW
    for any chunks of pseudos that become live, potentially filtering using
    bits from LIVE_TMP.
@@ -414,11 +507,19 @@ ext_dce_process_uses (rtx_insn *insn, rtx obj, bitmap 
livenow,
 
          /* ?!? How much of this should mirror SET handling, potentially
             being shared?   */
-         if (SUBREG_BYTE (dst).is_constant () && SUBREG_P (dst))
+         if (SUBREG_P (dst) && SUBREG_BYTE (dst).is_constant ())
            {
-             bit = subreg_lsb (dst).to_constant ();
-             if (bit >= HOST_BITS_PER_WIDE_INT)
-               bit = HOST_BITS_PER_WIDE_INT - 1;
+             enum machine_mode omode = GET_MODE_INNER (GET_MODE (dst));
+             enum machine_mode imode = GET_MODE (SUBREG_REG (dst));
+             if (!VECTOR_MODE_P (GET_MODE (dst))
+                 || (GET_MODE_SIZE (imode).is_constant ()
+                     && (GET_MODE_SIZE (omode).to_constant ()
+                         > GET_MODE_SIZE (imode).to_constant ())))
+               {
+                 bit = subreg_lsb (dst).to_constant ();
+                 if (bit >= HOST_BITS_PER_WIDE_INT)
+                   bit = HOST_BITS_PER_WIDE_INT - 1;
+               }
              dst = SUBREG_REG (dst);
            }
          else if (GET_CODE (dst) == ZERO_EXTRACT
@@ -464,7 +565,7 @@ ext_dce_process_uses (rtx_insn *insn, rtx obj, bitmap 
livenow,
                {
                  rtx inner = XEXP (src, 0);
                  unsigned HOST_WIDE_INT src_mask
-                   = GET_MODE_MASK (GET_MODE (inner));
+                   = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (inner)));
 
                  /* DST_MASK could be zero if we had something in the SET
                     that we couldn't handle.  */
@@ -480,11 +581,7 @@ ext_dce_process_uses (rtx_insn *insn, rtx obj, bitmap 
livenow,
                 sure everything that should get marked as live is marked
                 from here onward.  */
 
-             /* ?!? What is the point of this adjustment to DST_MASK?  */
-             if (code == PLUS || code == MINUS
-                 || code == MULT || code == ASHIFT)
-               dst_mask
-                 = dst_mask ? ((2ULL << floor_log2 (dst_mask)) - 1) : 0;
+             dst_mask = carry_backpropagate (dst_mask, code, src);
 
              /* We will handle the other operand of a binary operator
                 at the bottom of the loop by resetting Y.  */
@@ -516,12 +613,20 @@ ext_dce_process_uses (rtx_insn *insn, rtx obj, bitmap 
livenow,
                         and process normally (conservatively).  */
                      if (!REG_P (SUBREG_REG (y)))
                        break;
-                     bit = subreg_lsb (y).to_constant ();
-                     if (dst_mask)
+                     enum machine_mode omode = GET_MODE_INNER (GET_MODE (y));
+                     enum machine_mode imode = GET_MODE (SUBREG_REG (y));
+                     if (!VECTOR_MODE_P (GET_MODE (y))
+                         || (GET_MODE_SIZE (imode).is_constant ()
+                             && (GET_MODE_SIZE (omode).to_constant ()
+                                 > GET_MODE_SIZE (imode).to_constant ())))
                        {
-                         dst_mask <<= bit;
-                         if (!dst_mask)
-                           dst_mask = -0x100000000ULL;
+                         bit = subreg_lsb (y).to_constant ();
+                         if (dst_mask)
+                           {
+                             dst_mask <<= bit;
+                             if (!dst_mask)
+                               dst_mask = -0x100000000ULL;
+                           }
                        }
                      y = SUBREG_REG (y);
                    }
@@ -539,7 +644,8 @@ ext_dce_process_uses (rtx_insn *insn, rtx obj, bitmap 
livenow,
                         propagate destination liveness through, then just
                         set the mask to the mode's mask.  */
                      if (!safe_for_live_propagation (code))
-                       tmp_mask = GET_MODE_MASK (GET_MODE (y));
+                       tmp_mask
+                         = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (y)));
 
                      if (tmp_mask & 0xff)
                        bitmap_set_bit (livenow, rn);

Reply via email to