On Mon, Nov 18, 2024 at 9:13 PM Richard Sandiford
<[email protected]> wrote:
>
> In an attempt to reduce compile time, rtl-ssa computes the cost
> of existing instructions lazily rather than eagerly. However,
> this means that it might need to calculate the cost of an existing
> instruction while a change group is already in progress for the
> instruction. rtl_ssa::insn_info::calculate_cost therefore temporarily
> undoes any in-progress changes in order to get back the original pattern
> and insn code.
>
> rtl-ssa's main use of insn costs is in rtl_ssa::changes_are_worthwhile,
> which calculates the cost of a change involving an arbitrary number
> of instructions. Summing up the original cost of N instructions
> while those N instructions have in-progress changes could lead to
> O(N*N) rtl changes, since each lazy calculation might have to
> temporarily undo the changes to all N instructions.
Wheee ...
>
> We can avoid that by converting the current temporarily_undo_changes/
> redo_changes pair into an RAII class and extending it to allow
> nested uses. rtl_ssa::changes_are_worthwhile can then undo the
> in-progress changes once, before computing the original cost of all
> the instructions.
>
> Tested on aarch64-linux-gnu. Also tested against the testcase in
> the PR, where the old compile time was 3.7x greater than the new compile
> time (tested with a stage 3 --enable-checking=yes,extra,rtl compiler).
> late-combine went from being 73% of compile time to less than 1%
> (rounded to 0% by -fmem-report). The main time sinks now seem to be
> DOM and FRE.
>
> OK to install?
OK.
Thanks,
Richard.
> Richard
>
>
> gcc/
> PR rtl-optimization/117297
> * recog.h (temporarily_undo_changes, redo_changes): Delete in
> favor of...
> (undo_recog_changes): ...this new RAII class.
> * fwprop.cc (should_replace_address): Update accordingly.
> (fwprop_propagation::check_mem): Likewise.
> (try_fwprop_subst_note): Likewise.
> (try_fwprop_subst_pattern): Likewise.
> * rtl-ssa/insns.cc (insn_info::calculate_cost): Likewise.
> * rtl-ssa/changes.cc (rtl_ssa::changes_are_worthwhile): Temporarily
> undo all in-progress changes while computing the cost of the original
> sequence.
> * recog.cc (temporarily_undone_changes): Replace with...
> (undo_recog_changes::s_num_changes): ...this static member variable.
> (validate_change_1): Update check accordingly.
> (confirm_change_group): Likewise.
> (num_validated_changes): Likewise.
> (temporarily_undo_changes): Replace with...
> (undo_recog_changes::undo_recog_changes): ...this constructor.
> (redo_changes): Replace with...
> (undo_recog_changes::~undo_recog_changes): ...this destructor.
> ---
> gcc/fwprop.cc | 30 ++++++++++++++++--------------
> gcc/recog.cc | 39 +++++++++++++--------------------------
> gcc/recog.h | 31 +++++++++++++++++++++++++++++--
> gcc/rtl-ssa/changes.cc | 26 +++++++++++++++++---------
> gcc/rtl-ssa/insns.cc | 3 +--
> 5 files changed, 76 insertions(+), 53 deletions(-)
>
> diff --git a/gcc/fwprop.cc b/gcc/fwprop.cc
> index 8cba6b7ce9f..5ddefdf6d2f 100644
> --- a/gcc/fwprop.cc
> +++ b/gcc/fwprop.cc
> @@ -146,10 +146,11 @@ should_replace_address (int old_num_changes, rtx mem,
> rtx_insn *insn)
>
> /* Prefer the new address if it is less expensive. */
> bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
> - temporarily_undo_changes (old_num_changes);
> - gain = address_cost (XEXP (mem, 0), GET_MODE (mem),
> - MEM_ADDR_SPACE (mem), speed);
> - redo_changes (old_num_changes);
> + {
> + undo_recog_changes undo (old_num_changes);
> + gain = address_cost (XEXP (mem, 0), GET_MODE (mem),
> + MEM_ADDR_SPACE (mem), speed);
> + }
> gain -= address_cost (XEXP (mem, 0), GET_MODE (mem),
> MEM_ADDR_SPACE (mem), speed);
>
> @@ -160,9 +161,8 @@ should_replace_address (int old_num_changes, rtx mem,
> rtx_insn *insn)
> if (gain == 0)
> {
> gain = set_src_cost (XEXP (mem, 0), VOIDmode, speed);
> - temporarily_undo_changes (old_num_changes);
> + undo_recog_changes undo (old_num_changes);
> gain -= set_src_cost (XEXP (mem, 0), VOIDmode, speed);
> - redo_changes (old_num_changes);
> }
>
> return (gain > 0);
> @@ -220,9 +220,11 @@ fwprop_propagation::check_mem (int old_num_changes, rtx
> mem)
> return false;
> }
>
> - temporarily_undo_changes (old_num_changes);
> - bool can_simplify = can_simplify_addr (XEXP (mem, 0));
> - redo_changes (old_num_changes);
> + bool can_simplify = [&]()
> + {
> + undo_recog_changes undo (old_num_changes);
> + return can_simplify_addr (XEXP (mem, 0));
> + } ();
> if (!can_simplify)
> {
> failure_reason = "would replace a frame address";
> @@ -414,9 +416,10 @@ try_fwprop_subst_note (insn_info *use_insn, set_info
> *def,
> {
> fprintf (dump_file, "\nin notes of insn %d, replacing:\n ",
> INSN_UID (use_rtl));
> - temporarily_undo_changes (0);
> - print_inline_rtx (dump_file, note, 2);
> - redo_changes (0);
> + {
> + undo_recog_changes undo (0);
> + print_inline_rtx (dump_file, note, 2);
> + }
> fprintf (dump_file, "\n with:\n ");
> print_inline_rtx (dump_file, note, 2);
> fprintf (dump_file, "\n");
> @@ -468,9 +471,8 @@ try_fwprop_subst_pattern (obstack_watermark &attempt,
> insn_change &use_change,
> {
> fprintf (dump_file, "\npropagating insn %d into insn %d, replacing:\n",
> def_insn->uid (), use_insn->uid ());
> - temporarily_undo_changes (0);
> + undo_recog_changes undo (0);
> print_rtl_single (dump_file, PATTERN (use_rtl));
> - redo_changes (0);
> }
>
> bool ok = recog (attempt, use_change);
> diff --git a/gcc/recog.cc b/gcc/recog.cc
> index 4c63f9301c6..d1811f3618d 100644
> --- a/gcc/recog.cc
> +++ b/gcc/recog.cc
> @@ -194,7 +194,7 @@ static change_t *changes;
> static int changes_allocated;
>
> static int num_changes = 0;
> -static int temporarily_undone_changes = 0;
> +int undo_recog_changes::s_num_changes = 0;
>
> /* Validate a proposed change to OBJECT. LOC is the location in the rtl
> at which NEW_RTX will be placed. If NEW_LEN is >= 0, XVECLEN (NEW_RTX, 0)
> @@ -220,7 +220,7 @@ static bool
> validate_change_1 (rtx object, rtx *loc, rtx new_rtx, bool in_group,
> bool unshare, int new_len = -1)
> {
> - gcc_assert (temporarily_undone_changes == 0);
> + gcc_assert (!undo_recog_changes::is_active ());
> rtx old = *loc;
>
> /* Single-element parallels aren't valid and won't match anything.
> @@ -536,7 +536,7 @@ confirm_change_group (void)
> int i;
> rtx last_object = NULL;
>
> - gcc_assert (temporarily_undone_changes == 0);
> + gcc_assert (!undo_recog_changes::is_active ());
> for (i = 0; i < num_changes; i++)
> {
> rtx object = changes[i].object;
> @@ -592,7 +592,7 @@ num_validated_changes (void)
> void
> cancel_changes (int num)
> {
> - gcc_assert (temporarily_undone_changes == 0);
> + gcc_assert (!undo_recog_changes::is_active ());
> int i;
>
> /* Back out all the changes. Do this in the opposite order in which
> @@ -627,34 +627,21 @@ swap_change (int num)
> }
> }
>
> -/* Temporarily undo all the changes numbered NUM and up, with a view
> - to reapplying them later. The next call to the changes machinery
> - must be:
> -
> - redo_changes (NUM)
> -
> - otherwise things will end up in an invalid state. */
> -
> -void
> -temporarily_undo_changes (int num)
> +undo_recog_changes::undo_recog_changes (int num)
> + : m_old_num_changes (s_num_changes)
> {
> - gcc_assert (temporarily_undone_changes == 0 && num <= num_changes);
> - for (int i = num_changes - 1; i >= num; i--)
> + gcc_assert (num <= num_changes - s_num_changes);
> + for (int i = num_changes - s_num_changes - 1; i >= num; i--)
> swap_change (i);
> - temporarily_undone_changes = num_changes - num;
> + s_num_changes = num_changes - num;
> }
>
> -/* Redo the changes that were temporarily undone by:
> -
> - temporarily_undo_changes (NUM). */
> -
> -void
> -redo_changes (int num)
> +undo_recog_changes::~undo_recog_changes ()
> {
> - gcc_assert (temporarily_undone_changes == num_changes - num);
> - for (int i = num; i < num_changes; ++i)
> + for (int i = num_changes - s_num_changes;
> + i < num_changes - m_old_num_changes; ++i)
> swap_change (i);
> - temporarily_undone_changes = 0;
> + s_num_changes = m_old_num_changes;
> }
>
> /* Reduce conditional compilation elsewhere. */
> diff --git a/gcc/recog.h b/gcc/recog.h
> index 1dccce78ba4..17ccdb6296b 100644
> --- a/gcc/recog.h
> +++ b/gcc/recog.h
> @@ -202,6 +202,35 @@ inline insn_propagation::insn_propagation (rtx_insn
> *insn)
> {
> }
>
> +/* An RAII class that temporarily undoes part of the current change group.
> + The sequence:
> +
> + {
> + ...;
> + undo_recog_changes undo (NUM);
> + STMTS;
> + }
> +
> + executes STMTS with all the changes numbered NUM and up temporarily
> + reverted. STMTS must not try to add to the current change group.
> +
> + Nested uses are supported, but each nested NUM must be no greater than
> + outer NUMs. */
> +
> +class undo_recog_changes
> +{
> +public:
> + undo_recog_changes (int);
> + ~undo_recog_changes ();
> +
> + static bool is_active () { return s_num_changes != 0; }
> +
> +private:
> + int m_old_num_changes;
> +
> + static int s_num_changes;
> +};
> +
> extern void init_recog (void);
> extern void init_recog_no_volatile (void);
> extern bool check_asm_operands (rtx);
> @@ -216,8 +245,6 @@ extern void confirm_change_group (void);
> extern bool apply_change_group (void);
> extern int num_validated_changes (void);
> extern void cancel_changes (int);
> -extern void temporarily_undo_changes (int);
> -extern void redo_changes (int);
> extern bool constrain_operands (int, alternative_mask);
> extern bool constrain_operands_cached (rtx_insn *, int);
> extern bool memory_address_addr_space_p (machine_mode, rtx, addr_space_t,
> diff --git a/gcc/rtl-ssa/changes.cc b/gcc/rtl-ssa/changes.cc
> index f2be1195bc0..5f3a0f0f0ec 100644
> --- a/gcc/rtl-ssa/changes.cc
> +++ b/gcc/rtl-ssa/changes.cc
> @@ -173,21 +173,29 @@ rtl_ssa::changes_are_worthwhile
> (array_slice<insn_change *const> changes,
> bool strict_p)
> {
> unsigned int old_cost = 0;
> - unsigned int new_cost = 0;
> sreal weighted_old_cost = 0;
> - sreal weighted_new_cost = 0;
> auto entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
> + {
> + undo_recog_changes undo (0);
> + for (insn_change *change : changes)
> + {
> + // Count zero for the old cost if the old instruction was a no-op
> + // move or had an unknown cost. This should reduce the chances of
> + // making an unprofitable change.
> + old_cost += change->old_cost ();
> + basic_block cfg_bb = change->bb ()->cfg_bb ();
> + if (optimize_bb_for_speed_p (cfg_bb))
> + weighted_old_cost += (cfg_bb->count.to_sreal_scale (entry_count)
> + * change->old_cost ());
> +
> + }
> + }
> + unsigned int new_cost = 0;
> + sreal weighted_new_cost = 0;
> for (insn_change *change : changes)
> {
> - // Count zero for the old cost if the old instruction was a no-op
> - // move or had an unknown cost. This should reduce the chances of
> - // making an unprofitable change.
> - old_cost += change->old_cost ();
> basic_block cfg_bb = change->bb ()->cfg_bb ();
> bool for_speed = optimize_bb_for_speed_p (cfg_bb);
> - if (for_speed)
> - weighted_old_cost += (cfg_bb->count.to_sreal_scale (entry_count)
> - * change->old_cost ());
> if (!change->is_deletion ()
> && INSN_CODE (change->rtl ()) != NOOP_MOVE_INSN_CODE)
> {
> diff --git a/gcc/rtl-ssa/insns.cc b/gcc/rtl-ssa/insns.cc
> index bfc6683cc45..4af79bee7b9 100644
> --- a/gcc/rtl-ssa/insns.cc
> +++ b/gcc/rtl-ssa/insns.cc
> @@ -49,14 +49,13 @@ void
> insn_info::calculate_cost () const
> {
> basic_block cfg_bb = BLOCK_FOR_INSN (m_rtl);
> - temporarily_undo_changes (0);
> + undo_recog_changes (0);
> if (INSN_CODE (m_rtl) == NOOP_MOVE_INSN_CODE)
> // insn_cost also uses 0 to mean "don't know". Callers that
> // want to distinguish the cases will need to check INSN_CODE.
> m_cost_or_uid = 0;
> else
> m_cost_or_uid = insn_cost (m_rtl, optimize_bb_for_speed_p (cfg_bb));
> - redo_changes (0);
> }
>
> // Add NOTE to the instruction's notes.
> --
> 2.25.1
>