Hi, we recently hit a problem where fwprop would not propagate a memory address into an insn because our backend (s390) tells it that the address_cost ()s for an address with index are higher than for one without. Subsequently, should_replace_address () returns false and no propagation is performed.
This checks seems to be just an early bail out since, when disabling it, try_fwprop_subst () still checks src costs and would allow the propagation. The problem is, though, that it relies on the newly propagated-into insn already created before checking the costs so it cannot be called at the same place should_replace_address () is being called. In this patch I quickly worked around this, adding an update flag to try_fwprop_subst () that, when set to false, does not actually commit the propagation but still checks costs. I'm sure there is a better and much smaller way and I don't indent to apply this in its current state (there's a lot of boilerplate code to keep default behavior) but it might serve as basis for discussion/ideas. Richard mentioned the insn_cost hook, but this would require the insn to exist as well. Regards Robin
diff --git a/gcc/fwprop.c b/gcc/fwprop.c index 0fca0f1edbc..6eeb77b93ed 100644 --- a/gcc/fwprop.c +++ b/gcc/fwprop.c @@ -392,7 +392,7 @@ canonicalize_address (rtx x) static bool should_replace_address (rtx old_rtx, rtx new_rtx, machine_mode mode, - addr_space_t as, bool speed) + addr_space_t as, bool speed, bool cost_addr) { int gain; @@ -405,8 +405,11 @@ should_replace_address (rtx old_rtx, rtx new_rtx, machine_mode mode, return true; /* Prefer the new address if it is less expensive. */ - gain = (address_cost (old_rtx, mode, as, speed) - - address_cost (new_rtx, mode, as, speed)); + if (cost_addr) + gain = (address_cost (old_rtx, mode, as, speed) + - address_cost (new_rtx, mode, as, speed)); + else + gain = 0; /* If the addresses have equivalent cost, prefer the new address if it has the highest `set_src_cost'. That has the potential of @@ -448,6 +451,10 @@ enum { PR_OPTIMIZE_FOR_SPEED = 4 }; +static bool +try_fwprop_subst (df_ref use, rtx *loc, rtx new_rtx, rtx_insn *def_insn, + bool set_reg_equal, bool update); + /* Replace all occurrences of OLD in *PX with NEW and try to simplify the resulting expression. Replace *PX with a new RTL expression if an @@ -458,7 +465,11 @@ enum { that is because there is no simplify_gen_* function for LO_SUM). */ static bool -propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags) +propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags); + +static bool +propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags, + rtx *loc, df_ref use, rtx_insn *def_insn, bool set_reg_equal) { rtx x = *px, tem = NULL_RTX, op0, op1, op2; enum rtx_code code = GET_CODE (x); @@ -491,7 +502,8 @@ propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags) case RTX_UNARY: op0 = XEXP (x, 0); op_mode = GET_MODE (op0); - valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags); + valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags, + loc, use, def_insn, set_reg_equal); if (op0 == XEXP (x, 0)) return true; tem = simplify_gen_unary (code, mode, op0, op_mode); @@ -501,8 +513,10 @@ propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags) case RTX_COMM_ARITH: op0 = XEXP (x, 0); op1 = XEXP (x, 1); - valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags); - valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags); + valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags, + loc, use, def_insn, set_reg_equal); + valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags, + loc, use, def_insn, set_reg_equal); if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1)) return true; tem = simplify_gen_binary (code, mode, op0, op1); @@ -513,8 +527,10 @@ propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags) op0 = XEXP (x, 0); op1 = XEXP (x, 1); op_mode = GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1); - valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags); - valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags); + valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags, + loc, use, def_insn, set_reg_equal); + valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags, + loc, use, def_insn, set_reg_equal); if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1)) return true; tem = simplify_gen_relational (code, mode, op_mode, op0, op1); @@ -526,9 +542,12 @@ propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags) op1 = XEXP (x, 1); op2 = XEXP (x, 2); op_mode = GET_MODE (op0); - valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags); - valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags); - valid_ops &= propagate_rtx_1 (&op2, old_rtx, new_rtx, flags); + valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags, + loc, use, def_insn, set_reg_equal); + valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags, + loc, use, def_insn, set_reg_equal); + valid_ops &= propagate_rtx_1 (&op2, old_rtx, new_rtx, flags, + loc, use, def_insn, set_reg_equal); if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1) && op2 == XEXP (x, 2)) return true; if (op_mode == VOIDmode) @@ -541,7 +560,8 @@ propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags) if (code == SUBREG) { op0 = XEXP (x, 0); - valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags); + valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags, + loc, use, def_insn, set_reg_equal); if (op0 == XEXP (x, 0)) return true; tem = simplify_gen_subreg (mode, op0, GET_MODE (SUBREG_REG (x)), @@ -561,7 +581,8 @@ propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags) op0 = new_op0 = targetm.delegitimize_address (op0); valid_ops &= propagate_rtx_1 (&new_op0, old_rtx, new_rtx, - flags | PR_CAN_APPEAR); + flags | PR_CAN_APPEAR, + loc, use, def_insn, set_reg_equal); /* Dismiss transformation that we do not want to carry on. */ if (!valid_ops @@ -572,12 +593,20 @@ propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags) canonicalize_address (new_op0); + tem = replace_equiv_address_nv (x, new_op0); + + bool ok = try_fwprop_subst (use, loc, tem, def_insn, + set_reg_equal, false); + /* Copy propagations are always ok. Otherwise check the costs. */ - if (!(REG_P (old_rtx) && REG_P (new_rtx)) - && !should_replace_address (op0, new_op0, GET_MODE (x), - MEM_ADDR_SPACE (x), - flags & PR_OPTIMIZE_FOR_SPEED)) - return true; + if (!(REG_P (old_rtx) && REG_P (new_rtx))) + { + if (!should_replace_address (op0, new_op0, GET_MODE (x), + MEM_ADDR_SPACE (x), + flags & PR_OPTIMIZE_FOR_SPEED, + !ok)) + return true; + } tem = replace_equiv_address_nv (x, new_op0); } @@ -590,8 +619,10 @@ propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags) /* The only simplification we do attempts to remove references to op0 or make it constant -- in both cases, op0's invalidity will not make the result invalid. */ - propagate_rtx_1 (&op0, old_rtx, new_rtx, flags | PR_CAN_APPEAR); - valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags); + propagate_rtx_1 (&op0, old_rtx, new_rtx, flags | PR_CAN_APPEAR, + loc, use, def_insn, set_reg_equal); + valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags, + loc, use, def_insn, set_reg_equal); if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1)) return true; @@ -642,6 +673,13 @@ propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags) return valid_ops || can_appear || CONSTANT_P (tem); } +static bool +propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags) +{ + return propagate_rtx_1 (px, old_rtx, new_rtx, flags, + NULL, NULL, NULL, false); +} + /* Return true if X constains a non-constant mem. */ @@ -666,7 +704,8 @@ varying_mem_p (const_rtx x) static rtx propagate_rtx (rtx x, machine_mode mode, rtx old_rtx, rtx new_rtx, - bool speed) + bool speed, df_ref use, rtx_insn *def_insn, + bool set_reg_equal) { rtx tem; bool collapsed; @@ -689,7 +728,8 @@ propagate_rtx (rtx x, machine_mode mode, rtx old_rtx, rtx new_rtx, flags |= PR_OPTIMIZE_FOR_SPEED; tem = x; - collapsed = propagate_rtx_1 (&tem, old_rtx, copy_rtx (new_rtx), flags); + collapsed = propagate_rtx_1 (&tem, old_rtx, copy_rtx (new_rtx), flags, + &x, use, def_insn, set_reg_equal); if (tem == x || !collapsed) return NULL_RTX; @@ -706,6 +746,14 @@ propagate_rtx (rtx x, machine_mode mode, rtx old_rtx, rtx new_rtx, return tem; } +static rtx +propagate_rtx (rtx x, machine_mode mode, rtx old_rtx, rtx new_rtx, + bool speed) +{ + return propagate_rtx (x, mode, old_rtx, new_rtx, + speed, NULL, NULL, false); +} + @@ -950,7 +998,7 @@ update_df (rtx_insn *insn, rtx note) static bool try_fwprop_subst (df_ref use, rtx *loc, rtx new_rtx, rtx_insn *def_insn, - bool set_reg_equal) + bool set_reg_equal, bool update) { rtx_insn *insn = DF_REF_INSN (use); rtx set = single_set (insn); @@ -1002,6 +1050,9 @@ try_fwprop_subst (df_ref use, rtx *loc, rtx new_rtx, rtx_insn *def_insn, ok = true; } + if (!update) + return ok; + if (ok) { confirm_change_group (); @@ -1045,6 +1096,14 @@ try_fwprop_subst (df_ref use, rtx *loc, rtx new_rtx, rtx_insn *def_insn, return ok; } +static bool +try_fwprop_subst (df_ref use, rtx *loc, rtx new_rtx, rtx_insn *def_insn, + bool set_reg_equal) +{ + return try_fwprop_subst (use, loc, new_rtx, def_insn, + set_reg_equal, true); +} + /* For the given single_set INSN, containing SRC known to be a ZERO_EXTEND or SIGN_EXTEND of a register, return true if INSN is redundant due to the register being set by a LOAD_EXTEND_OP @@ -1353,7 +1412,8 @@ forward_propagate_and_simplify (df_ref use, rtx_insn *def_insn, rtx def_set) mode = GET_MODE (*loc); new_rtx = propagate_rtx (*loc, mode, reg, src, - optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn))); + optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn)), + use, def_insn, set_reg_equal); if (!new_rtx) return false;