On Wed, 29 Nov 2023 at 20:05, Joern Rennecke <joern.renne...@embecosm.com> wrote:
> > I suspect it'd be more useful to add handling of LSHIFTRT and ASHIFTRT > > . Some ports do > > a lot of static shifting. > > > + case SS_ASHIFT: > > + case US_ASHIFT: > > + if (!mask || XEXP (x, 1) == const0_rtx) > > + return 0; > > P.S.: I just realize that this is a pasto: in the case of a const0_rtx > shift count, > we returning 0 will usually be wrong. I've attached my current patch version.
ext-dce.cc: handle vector modes. * ext-dce.cc: Amend comment to explain how liveness of vectors is tracked. (carry_backpropagate): Use GET_MODE_INNER. (ext_dce_process_sets): Likewise. Only apply big endian correction for subregs if they don't have a vector mode. (ext_cde_process_uses): Likewise. * ext-dce.cc: carry_backpropagate: [US]S_ASHIFT fix, handle [LA]SHIFTRT * ext-dce.cc (safe_for_live_propagation): Add LSHIFTRT and ASHIFTRT. (carry_backpropagate): Reformat top comment. Add handling of LSHIFTRT and ASHIFTRT. Fix bit count for [SU]MUL_HIGHPART. Fix pasto for [SU]S_ASHIFT. * ext-dce.c: Fixes for carry handling. * ext-dce.c (safe_for_live_propagation): Handle MINUS. (ext_dce_process_uses): Break out carry handling into .. (carry_backpropagate): This new function. Better handling of ASHIFT. Add handling of SMUL_HIGHPART, UMUL_HIGHPART, SIGN_EXTEND, SS_ASHIFT and US_ASHIFT. * ext-dce.c: fix SUBREG_BYTE test As mentioned in https://gcc.gnu.org/pipermail/gcc-patches/2023-November/637486.html and https://gcc.gnu.org/pipermail/gcc-patches/2023-November/638473.html diff --git a/gcc/ext-dce.cc b/gcc/ext-dce.cc index 4e4c57de117..228c50e8b73 100644 --- a/gcc/ext-dce.cc +++ b/gcc/ext-dce.cc @@ -38,7 +38,10 @@ along with GCC; see the file COPYING3. If not see bit 0..7 (least significant byte) bit 8..15 (second least significant byte) bit 16..31 - bit 32..BITS_PER_WORD-1 */ + bit 32..BITS_PER_WORD-1 + + For vector modes, we apply these bit groups to every lane; if any of the + bits in the group are live in any lane, we consider this group live. */ /* Note this pass could be used to narrow memory loads too. It's not clear if that's profitable or not in general. */ @@ -83,6 +86,7 @@ safe_for_live_propagation (rtx_code code) case SIGN_EXTEND: case TRUNCATE: case PLUS: + case MINUS: case MULT: case SMUL_HIGHPART: case UMUL_HIGHPART: @@ -96,6 +100,8 @@ safe_for_live_propagation (rtx_code code) case SS_ASHIFT: case US_ASHIFT: case ASHIFT: + case LSHIFTRT: + case ASHIFTRT: return true; /* There may be other safe codes. If so they can be added @@ -215,13 +221,22 @@ ext_dce_process_sets (rtx_insn *insn, rtx obj, bitmap livenow, bitmap live_tmp) /* Phase one of destination handling. First remove any wrapper such as SUBREG or ZERO_EXTRACT. */ - unsigned HOST_WIDE_INT mask = GET_MODE_MASK (GET_MODE (x)); + unsigned HOST_WIDE_INT mask + = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (x))); if (SUBREG_P (x) && !paradoxical_subreg_p (x) && SUBREG_BYTE (x).is_constant ()) { - bit = subreg_lsb (x).to_constant (); - mask = GET_MODE_MASK (GET_MODE (SUBREG_REG (x))) << bit; + enum machine_mode omode = GET_MODE_INNER (GET_MODE (x)); + enum machine_mode imode = GET_MODE (SUBREG_REG (x)); + bit = 0; + if (!VECTOR_MODE_P (GET_MODE (x)) + || (GET_MODE_SIZE (imode).is_constant () + && (GET_MODE_SIZE (omode).to_constant () + > GET_MODE_SIZE (imode).to_constant ()))) + bit = subreg_lsb (x).to_constant (); + mask = (GET_MODE_MASK (GET_MODE_INNER (GET_MODE (SUBREG_REG (x)))) + << bit); gcc_assert (mask); if (!mask) mask = -0x100000000ULL; @@ -365,6 +380,84 @@ binop_implies_op2_fully_live (rtx_code code) } } +/* X, with code CODE, is an operation for which safe_for_live_propagation + holds true, and bits set in MASK are live in the result. Compute a + mask of (potentially) live bits in the non-constant inputs. In case of + binop_implies_op2_fully_live (e.g. shifts), the computed mask may + exclusively pertain to the first operand. */ + +HOST_WIDE_INT +carry_backpropagate (HOST_WIDE_INT mask, enum rtx_code code, rtx x) +{ + enum machine_mode mode = GET_MODE_INNER (GET_MODE (x)); + HOST_WIDE_INT mmask = GET_MODE_MASK (mode); + switch (code) + { + case ASHIFT: + if (CONSTANT_P (XEXP (x, 1)) + && known_lt (UINTVAL (XEXP (x, 1)), GET_MODE_BITSIZE (mode))) + return mask >> INTVAL (XEXP (x, 1)); + /* Fall through. */ + case PLUS: case MINUS: + case MULT: + return mask ? ((2ULL << floor_log2 (mask)) - 1) : 0; + case LSHIFTRT: + if (CONSTANT_P (XEXP (x, 1)) + && known_lt (UINTVAL (XEXP (x, 1)), GET_MODE_BITSIZE (mode))) + return mmask & (mask << INTVAL (XEXP (x, 1))); + return mmask; + case ASHIFTRT: + if (CONSTANT_P (XEXP (x, 1)) + && known_lt (UINTVAL (XEXP (x, 1)), GET_MODE_BITSIZE (mode))) + { + HOST_WIDE_INT sign = 0; + if (HOST_BITS_PER_WIDE_INT - clz_hwi (mask) + INTVAL (XEXP (x, 1)) + > GET_MODE_BITSIZE (mode).to_constant ()) + sign = (1ULL << GET_MODE_BITSIZE (mode).to_constant ()) - 1; + return sign | (mmask & (mask << INTVAL (XEXP (x, 1)))); + } + return mmask; + case SMUL_HIGHPART: case UMUL_HIGHPART: + if (!mask || XEXP (x, 1) == const0_rtx) + return 0; + if (CONSTANT_P (XEXP (x, 1))) + { + if (pow2p_hwi (INTVAL (XEXP (x, 1)))) + return mmask & (mask << (GET_MODE_BITSIZE (mode).to_constant () + - exact_log2 (INTVAL (XEXP (x, 1))))); + + int bits = (HOST_BITS_PER_WIDE_INT + + GET_MODE_BITSIZE (mode).to_constant () + - clz_hwi (mask) - ctz_hwi (INTVAL (XEXP (x, 1)))); + if (bits < GET_MODE_BITSIZE (mode).to_constant ()) + return (1ULL << bits) - 1; + } + return mmask; + case SIGN_EXTEND: + if (mask & ~mmask) + mask |= 1ULL << (GET_MODE_BITSIZE (mode).to_constant () - 1); + return mask; + + /* We propagate for the shifted operand, but not the shift + count. The count is handled specially. */ + case SS_ASHIFT: + case US_ASHIFT: + if (!mask) + return 0; + if (CONSTANT_P (XEXP (x, 1)) + && UINTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (mode).to_constant ()) + { + return ((mmask & ~((unsigned HOST_WIDE_INT)mmask + >> (INTVAL (XEXP (x, 1)) + + (XEXP (x, 1) != const0_rtx + && code == SS_ASHIFT)))) + | (mask >> INTVAL (XEXP (x, 1)))); + } + return mmask; + default: + return mask; + } +} /* Process uses in INSN contained in OBJ. Set appropriate bits in LIVENOW for any chunks of pseudos that become live, potentially filtering using bits from LIVE_TMP. @@ -414,11 +507,19 @@ ext_dce_process_uses (rtx_insn *insn, rtx obj, bitmap livenow, /* ?!? How much of this should mirror SET handling, potentially being shared? */ - if (SUBREG_BYTE (dst).is_constant () && SUBREG_P (dst)) + if (SUBREG_P (dst) && SUBREG_BYTE (dst).is_constant ()) { - bit = subreg_lsb (dst).to_constant (); - if (bit >= HOST_BITS_PER_WIDE_INT) - bit = HOST_BITS_PER_WIDE_INT - 1; + enum machine_mode omode = GET_MODE_INNER (GET_MODE (dst)); + enum machine_mode imode = GET_MODE (SUBREG_REG (dst)); + if (!VECTOR_MODE_P (GET_MODE (dst)) + || (GET_MODE_SIZE (imode).is_constant () + && (GET_MODE_SIZE (omode).to_constant () + > GET_MODE_SIZE (imode).to_constant ()))) + { + bit = subreg_lsb (dst).to_constant (); + if (bit >= HOST_BITS_PER_WIDE_INT) + bit = HOST_BITS_PER_WIDE_INT - 1; + } dst = SUBREG_REG (dst); } else if (GET_CODE (dst) == ZERO_EXTRACT @@ -464,7 +565,7 @@ ext_dce_process_uses (rtx_insn *insn, rtx obj, bitmap livenow, { rtx inner = XEXP (src, 0); unsigned HOST_WIDE_INT src_mask - = GET_MODE_MASK (GET_MODE (inner)); + = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (inner))); /* DST_MASK could be zero if we had something in the SET that we couldn't handle. */ @@ -480,11 +581,7 @@ ext_dce_process_uses (rtx_insn *insn, rtx obj, bitmap livenow, sure everything that should get marked as live is marked from here onward. */ - /* ?!? What is the point of this adjustment to DST_MASK? */ - if (code == PLUS || code == MINUS - || code == MULT || code == ASHIFT) - dst_mask - = dst_mask ? ((2ULL << floor_log2 (dst_mask)) - 1) : 0; + dst_mask = carry_backpropagate (dst_mask, code, src); /* We will handle the other operand of a binary operator at the bottom of the loop by resetting Y. */ @@ -516,12 +613,20 @@ ext_dce_process_uses (rtx_insn *insn, rtx obj, bitmap livenow, and process normally (conservatively). */ if (!REG_P (SUBREG_REG (y))) break; - bit = subreg_lsb (y).to_constant (); - if (dst_mask) + enum machine_mode omode = GET_MODE_INNER (GET_MODE (y)); + enum machine_mode imode = GET_MODE (SUBREG_REG (y)); + if (!VECTOR_MODE_P (GET_MODE (y)) + || (GET_MODE_SIZE (imode).is_constant () + && (GET_MODE_SIZE (omode).to_constant () + > GET_MODE_SIZE (imode).to_constant ()))) { - dst_mask <<= bit; - if (!dst_mask) - dst_mask = -0x100000000ULL; + bit = subreg_lsb (y).to_constant (); + if (dst_mask) + { + dst_mask <<= bit; + if (!dst_mask) + dst_mask = -0x100000000ULL; + } } y = SUBREG_REG (y); } @@ -539,7 +644,8 @@ ext_dce_process_uses (rtx_insn *insn, rtx obj, bitmap livenow, propagate destination liveness through, then just set the mask to the mode's mask. */ if (!safe_for_live_propagation (code)) - tmp_mask = GET_MODE_MASK (GET_MODE (y)); + tmp_mask + = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (y))); if (tmp_mask & 0xff) bitmap_set_bit (livenow, rn);