On Thu, Mar 2, 2023 at 2:28 PM Richard Biener <rguent...@suse.de> wrote:
>
> The following puts a hard limit on the inherently quadratic STV chain
> discovery.  Without a limit for the compiler.i testcase in PR26854
> we see at -O2
>
>  machine dep reorg                  : 574.45 ( 53%)
>
> with release checking while with the proposed limit it's
>
>  machine dep reorg                  :   2.86 (  1%)
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu.
>
> OK?
>
> Thanks,
> Richard.
>
>         PR target/108738
>         * config/i386/i386.opt (--param x86-stv-max-visits): New param.
>         * doc/invoke.texi (--param x86-stv-max-visits): Document it.
>         * config/i386/i386-features.h (scalar_chain::max_visits): New.
>         (scalar_chain::build): Add bitmap parameter, return boolean.
>         (scalar_chain::add_insn): Likewise.
>         (scalar_chain::analyze_register_chain): Likewise.
>         * config/i386/i386-features.cc (scalar_chain::scalar_chain):
>         Initialize max_visits.
>         (scalar_chain::analyze_register_chain): When we exhaust
>         max_visits, abort.  Also abort when running into any
>         disallowed insn.
>         (scalar_chain::add_insn): Propagate abort.
>         (scalar_chain::build): Likewise.  When aborting amend
>         the set of disallowed insn with the insns set.
>         (convert_scalars_to_vector): Adjust.  Do not convert aborted
>         chains.

LGTM.

Thanks,
Uros.

> ---
>  gcc/config/i386/i386-features.cc | 77 +++++++++++++++++++++++---------
>  gcc/config/i386/i386-features.h  | 10 +++--
>  gcc/config/i386/i386.opt         |  4 ++
>  gcc/doc/invoke.texi              |  4 ++
>  4 files changed, 70 insertions(+), 25 deletions(-)
>
> diff --git a/gcc/config/i386/i386-features.cc 
> b/gcc/config/i386/i386-features.cc
> index eff91301009..c09abf8fc20 100644
> --- a/gcc/config/i386/i386-features.cc
> +++ b/gcc/config/i386/i386-features.cc
> @@ -296,6 +296,8 @@ scalar_chain::scalar_chain (enum machine_mode smode_, 
> enum machine_mode vmode_)
>
>    n_sse_to_integer = 0;
>    n_integer_to_sse = 0;
> +
> +  max_visits = x86_stv_max_visits;
>  }
>
>  /* Free chain's data.  */
> @@ -354,10 +356,12 @@ scalar_chain::mark_dual_mode_def (df_ref def)
>  }
>
>  /* Check REF's chain to add new insns into a queue
> -   and find registers requiring conversion.  */
> +   and find registers requiring conversion.  Return true if OK, false
> +   if the analysis was aborted.  */
>
> -void
> -scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref)
> +bool
> +scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref,
> +                                     bitmap disallowed)
>  {
>    df_link *chain;
>    bool mark_def = false;
> @@ -371,6 +375,9 @@ scalar_chain::analyze_register_chain (bitmap candidates, 
> df_ref ref)
>        if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref)))
>         continue;
>
> +      if (--max_visits == 0)
> +       return false;
> +
>        if (!DF_REF_REG_MEM_P (chain->ref))
>         {
>           if (bitmap_bit_p (insns, uid))
> @@ -381,6 +388,10 @@ scalar_chain::analyze_register_chain (bitmap candidates, 
> df_ref ref)
>               add_to_queue (uid);
>               continue;
>             }
> +
> +         /* If we run into parts of an aborted chain discovery abort.  */
> +         if (bitmap_bit_p (disallowed, uid))
> +           return false;
>         }
>
>        if (DF_REF_REG_DEF_P (chain->ref))
> @@ -401,15 +412,19 @@ scalar_chain::analyze_register_chain (bitmap 
> candidates, df_ref ref)
>
>    if (mark_def)
>      mark_dual_mode_def (ref);
> +
> +  return true;
>  }
>
> -/* Add instruction into a chain.  */
> +/* Add instruction into a chain.  Return true if OK, false if the search
> +   was aborted.  */
>
> -void
> -scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid)
> +bool
> +scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid,
> +                       bitmap disallowed)
>  {
>    if (!bitmap_set_bit (insns, insn_uid))
> -    return;
> +    return true;
>
>    if (dump_file)
>      fprintf (dump_file, "  Adding insn %d to chain #%d\n", insn_uid, 
> chain_id);
> @@ -426,22 +441,27 @@ scalar_chain::add_insn (bitmap candidates, unsigned int 
> insn_uid)
>    df_ref ref;
>    for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
>      if (!HARD_REGISTER_P (DF_REF_REG (ref)))
> -      analyze_register_chain (candidates, ref);
> +      if (!analyze_register_chain (candidates, ref, disallowed))
> +       return false;
>
>    /* The operand(s) of VEC_SELECT don't need to be converted/convertible.  */
>    if (def_set && GET_CODE (SET_SRC (def_set)) == VEC_SELECT)
> -    return;
> +    return true;
>
>    for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
>      if (!DF_REF_REG_MEM_P (ref))
> -      analyze_register_chain (candidates, ref);
> +      if (!analyze_register_chain (candidates, ref, disallowed))
> +       return false;
> +
> +  return true;
>  }
>
>  /* Build new chain starting from insn INSN_UID recursively
> -   adding all dependent uses and definitions.  */
> +   adding all dependent uses and definitions.  Return true if OK, false
> +   if the chain discovery was aborted.  */
>
> -void
> -scalar_chain::build (bitmap candidates, unsigned insn_uid)
> +bool
> +scalar_chain::build (bitmap candidates, unsigned insn_uid, bitmap disallowed)
>  {
>    queue = BITMAP_ALLOC (NULL);
>    bitmap_set_bit (queue, insn_uid);
> @@ -454,7 +474,17 @@ scalar_chain::build (bitmap candidates, unsigned 
> insn_uid)
>        insn_uid = bitmap_first_set_bit (queue);
>        bitmap_clear_bit (queue, insn_uid);
>        bitmap_clear_bit (candidates, insn_uid);
> -      add_insn (candidates, insn_uid);
> +      if (!add_insn (candidates, insn_uid, disallowed))
> +       {
> +         /* If we aborted the search put sofar found insn on the set of
> +            disallowed insns so that further searches reaching them also
> +            abort and thus we abort the whole but yet undiscovered chain.  */
> +         bitmap_ior_into (disallowed, insns);
> +         if (dump_file)
> +           fprintf (dump_file, "Aborted chain #%d discovery\n", chain_id);
> +         BITMAP_FREE (queue);
> +         return false;
> +       }
>      }
>
>    if (dump_file)
> @@ -478,6 +508,8 @@ scalar_chain::build (bitmap candidates, unsigned insn_uid)
>      }
>
>    BITMAP_FREE (queue);
> +
> +  return true;
>  }
>
>  /* Return a cost of building a vector costant
> @@ -2282,6 +2314,7 @@ convert_scalars_to_vector (bool timode_p)
>
>    for (unsigned i = 0; i <= 2; ++i)
>      {
> +      auto_bitmap disallowed;
>        bitmap_tree_view (&candidates[i]);
>        while (!bitmap_empty_p (&candidates[i]))
>         {
> @@ -2296,14 +2329,14 @@ convert_scalars_to_vector (bool timode_p)
>           /* Find instructions chain we want to convert to vector mode.
>              Check all uses and definitions to estimate all required
>              conversions.  */
> -         chain->build (&candidates[i], uid);
> -
> -         if (chain->compute_convert_gain () > 0)
> -           converted_insns += chain->convert ();
> -         else
> -           if (dump_file)
> -             fprintf (dump_file, "Chain #%d conversion is not profitable\n",
> -                      chain->chain_id);
> +         if (chain->build (&candidates[i], uid, disallowed))
> +           {
> +             if (chain->compute_convert_gain () > 0)
> +               converted_insns += chain->convert ();
> +             else if (dump_file)
> +               fprintf (dump_file, "Chain #%d conversion is not 
> profitable\n",
> +                        chain->chain_id);
> +           }
>
>           delete chain;
>         }
> diff --git a/gcc/config/i386/i386-features.h b/gcc/config/i386/i386-features.h
> index 00c2c5e8c2d..72a9f54b4e2 100644
> --- a/gcc/config/i386/i386-features.h
> +++ b/gcc/config/i386/i386-features.h
> @@ -148,12 +148,15 @@ class scalar_chain
>    /* Registers used in both vector and sclar modes.  */
>    bitmap defs_conv;
>
> +  /* Limit on chain discovery.  */
> +  unsigned max_visits;
> +
>    bitmap insns_conv;
>    hash_map<rtx, rtx> defs_map;
>    unsigned n_sse_to_integer;
>    unsigned n_integer_to_sse;
>
> -  void build (bitmap candidates, unsigned insn_uid);
> +  bool build (bitmap candidates, unsigned insn_uid, bitmap disallowed);
>    virtual int compute_convert_gain () = 0;
>    int convert ();
>
> @@ -168,8 +171,9 @@ class scalar_chain
>    void convert_registers ();
>
>   private:
> -  void add_insn (bitmap candidates, unsigned insn_uid);
> -  void analyze_register_chain (bitmap candidates, df_ref ref);
> +  bool add_insn (bitmap candidates, unsigned insn_uid, bitmap disallowed);
> +  bool analyze_register_chain (bitmap candidates, df_ref ref,
> +                              bitmap disallowed);
>    virtual void convert_insn (rtx_insn *insn) = 0;
>    virtual void convert_op (rtx *op, rtx_insn *insn) = 0;
>  };
> diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
> index 7d57f617d65..94fdd639ff1 100644
> --- a/gcc/config/i386/i386.opt
> +++ b/gcc/config/i386/i386.opt
> @@ -599,6 +599,10 @@ Target Mask(STV) Save
>  Disable Scalar to Vector optimization pass transforming 64-bit integer
>  computations into a vector ones.
>
> +-param=x86-stv-max-visits=
> +Target Joined UInteger Var(x86_stv_max_visits) Init(10000) IntegerRange(1, 
> 1000000) Param
> +The maximum number of use and def visits when discovering a STV chain before 
> the discovery is aborted.
> +
>  mdispatch-scheduler
>  Target RejectNegative Var(flag_dispatch_scheduler)
>  Do dispatch scheduling if processor is bdver1, bdver2, bdver3, bdver4
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index 0045661cc5d..2da68802356 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -16229,6 +16229,10 @@ The following choices of @var{name} are available on 
> i386 and x86_64 targets:
>  @item x86-stlf-window-ninsns
>  Instructions number above which STFL stall penalty can be compensated.
>
> +@item x86-stv-max-visits
> +The maximum number of use and def visits when discovering a STV chain before
> +the discovery is aborted.
> +
>  @end table
>
>  @end table
> --
> 2.35.3

Reply via email to