On Thu, Mar 2, 2023 at 2:28 PM Richard Biener <rguent...@suse.de> wrote: > > The following puts a hard limit on the inherently quadratic STV chain > discovery. Without a limit for the compiler.i testcase in PR26854 > we see at -O2 > > machine dep reorg : 574.45 ( 53%) > > with release checking while with the proposed limit it's > > machine dep reorg : 2.86 ( 1%) > > Bootstrapped and tested on x86_64-unknown-linux-gnu. > > OK? > > Thanks, > Richard. > > PR target/108738 > * config/i386/i386.opt (--param x86-stv-max-visits): New param. > * doc/invoke.texi (--param x86-stv-max-visits): Document it. > * config/i386/i386-features.h (scalar_chain::max_visits): New. > (scalar_chain::build): Add bitmap parameter, return boolean. > (scalar_chain::add_insn): Likewise. > (scalar_chain::analyze_register_chain): Likewise. > * config/i386/i386-features.cc (scalar_chain::scalar_chain): > Initialize max_visits. > (scalar_chain::analyze_register_chain): When we exhaust > max_visits, abort. Also abort when running into any > disallowed insn. > (scalar_chain::add_insn): Propagate abort. > (scalar_chain::build): Likewise. When aborting amend > the set of disallowed insn with the insns set. > (convert_scalars_to_vector): Adjust. Do not convert aborted > chains.
LGTM. Thanks, Uros. > --- > gcc/config/i386/i386-features.cc | 77 +++++++++++++++++++++++--------- > gcc/config/i386/i386-features.h | 10 +++-- > gcc/config/i386/i386.opt | 4 ++ > gcc/doc/invoke.texi | 4 ++ > 4 files changed, 70 insertions(+), 25 deletions(-) > > diff --git a/gcc/config/i386/i386-features.cc > b/gcc/config/i386/i386-features.cc > index eff91301009..c09abf8fc20 100644 > --- a/gcc/config/i386/i386-features.cc > +++ b/gcc/config/i386/i386-features.cc > @@ -296,6 +296,8 @@ scalar_chain::scalar_chain (enum machine_mode smode_, > enum machine_mode vmode_) > > n_sse_to_integer = 0; > n_integer_to_sse = 0; > + > + max_visits = x86_stv_max_visits; > } > > /* Free chain's data. */ > @@ -354,10 +356,12 @@ scalar_chain::mark_dual_mode_def (df_ref def) > } > > /* Check REF's chain to add new insns into a queue > - and find registers requiring conversion. */ > + and find registers requiring conversion. Return true if OK, false > + if the analysis was aborted. */ > > -void > -scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref) > +bool > +scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref, > + bitmap disallowed) > { > df_link *chain; > bool mark_def = false; > @@ -371,6 +375,9 @@ scalar_chain::analyze_register_chain (bitmap candidates, > df_ref ref) > if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref))) > continue; > > + if (--max_visits == 0) > + return false; > + > if (!DF_REF_REG_MEM_P (chain->ref)) > { > if (bitmap_bit_p (insns, uid)) > @@ -381,6 +388,10 @@ scalar_chain::analyze_register_chain (bitmap candidates, > df_ref ref) > add_to_queue (uid); > continue; > } > + > + /* If we run into parts of an aborted chain discovery abort. */ > + if (bitmap_bit_p (disallowed, uid)) > + return false; > } > > if (DF_REF_REG_DEF_P (chain->ref)) > @@ -401,15 +412,19 @@ scalar_chain::analyze_register_chain (bitmap > candidates, df_ref ref) > > if (mark_def) > mark_dual_mode_def (ref); > + > + return true; > } > > -/* Add instruction into a chain. */ > +/* Add instruction into a chain. Return true if OK, false if the search > + was aborted. */ > > -void > -scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid) > +bool > +scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid, > + bitmap disallowed) > { > if (!bitmap_set_bit (insns, insn_uid)) > - return; > + return true; > > if (dump_file) > fprintf (dump_file, " Adding insn %d to chain #%d\n", insn_uid, > chain_id); > @@ -426,22 +441,27 @@ scalar_chain::add_insn (bitmap candidates, unsigned int > insn_uid) > df_ref ref; > for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref)) > if (!HARD_REGISTER_P (DF_REF_REG (ref))) > - analyze_register_chain (candidates, ref); > + if (!analyze_register_chain (candidates, ref, disallowed)) > + return false; > > /* The operand(s) of VEC_SELECT don't need to be converted/convertible. */ > if (def_set && GET_CODE (SET_SRC (def_set)) == VEC_SELECT) > - return; > + return true; > > for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref)) > if (!DF_REF_REG_MEM_P (ref)) > - analyze_register_chain (candidates, ref); > + if (!analyze_register_chain (candidates, ref, disallowed)) > + return false; > + > + return true; > } > > /* Build new chain starting from insn INSN_UID recursively > - adding all dependent uses and definitions. */ > + adding all dependent uses and definitions. Return true if OK, false > + if the chain discovery was aborted. */ > > -void > -scalar_chain::build (bitmap candidates, unsigned insn_uid) > +bool > +scalar_chain::build (bitmap candidates, unsigned insn_uid, bitmap disallowed) > { > queue = BITMAP_ALLOC (NULL); > bitmap_set_bit (queue, insn_uid); > @@ -454,7 +474,17 @@ scalar_chain::build (bitmap candidates, unsigned > insn_uid) > insn_uid = bitmap_first_set_bit (queue); > bitmap_clear_bit (queue, insn_uid); > bitmap_clear_bit (candidates, insn_uid); > - add_insn (candidates, insn_uid); > + if (!add_insn (candidates, insn_uid, disallowed)) > + { > + /* If we aborted the search put sofar found insn on the set of > + disallowed insns so that further searches reaching them also > + abort and thus we abort the whole but yet undiscovered chain. */ > + bitmap_ior_into (disallowed, insns); > + if (dump_file) > + fprintf (dump_file, "Aborted chain #%d discovery\n", chain_id); > + BITMAP_FREE (queue); > + return false; > + } > } > > if (dump_file) > @@ -478,6 +508,8 @@ scalar_chain::build (bitmap candidates, unsigned insn_uid) > } > > BITMAP_FREE (queue); > + > + return true; > } > > /* Return a cost of building a vector costant > @@ -2282,6 +2314,7 @@ convert_scalars_to_vector (bool timode_p) > > for (unsigned i = 0; i <= 2; ++i) > { > + auto_bitmap disallowed; > bitmap_tree_view (&candidates[i]); > while (!bitmap_empty_p (&candidates[i])) > { > @@ -2296,14 +2329,14 @@ convert_scalars_to_vector (bool timode_p) > /* Find instructions chain we want to convert to vector mode. > Check all uses and definitions to estimate all required > conversions. */ > - chain->build (&candidates[i], uid); > - > - if (chain->compute_convert_gain () > 0) > - converted_insns += chain->convert (); > - else > - if (dump_file) > - fprintf (dump_file, "Chain #%d conversion is not profitable\n", > - chain->chain_id); > + if (chain->build (&candidates[i], uid, disallowed)) > + { > + if (chain->compute_convert_gain () > 0) > + converted_insns += chain->convert (); > + else if (dump_file) > + fprintf (dump_file, "Chain #%d conversion is not > profitable\n", > + chain->chain_id); > + } > > delete chain; > } > diff --git a/gcc/config/i386/i386-features.h b/gcc/config/i386/i386-features.h > index 00c2c5e8c2d..72a9f54b4e2 100644 > --- a/gcc/config/i386/i386-features.h > +++ b/gcc/config/i386/i386-features.h > @@ -148,12 +148,15 @@ class scalar_chain > /* Registers used in both vector and sclar modes. */ > bitmap defs_conv; > > + /* Limit on chain discovery. */ > + unsigned max_visits; > + > bitmap insns_conv; > hash_map<rtx, rtx> defs_map; > unsigned n_sse_to_integer; > unsigned n_integer_to_sse; > > - void build (bitmap candidates, unsigned insn_uid); > + bool build (bitmap candidates, unsigned insn_uid, bitmap disallowed); > virtual int compute_convert_gain () = 0; > int convert (); > > @@ -168,8 +171,9 @@ class scalar_chain > void convert_registers (); > > private: > - void add_insn (bitmap candidates, unsigned insn_uid); > - void analyze_register_chain (bitmap candidates, df_ref ref); > + bool add_insn (bitmap candidates, unsigned insn_uid, bitmap disallowed); > + bool analyze_register_chain (bitmap candidates, df_ref ref, > + bitmap disallowed); > virtual void convert_insn (rtx_insn *insn) = 0; > virtual void convert_op (rtx *op, rtx_insn *insn) = 0; > }; > diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt > index 7d57f617d65..94fdd639ff1 100644 > --- a/gcc/config/i386/i386.opt > +++ b/gcc/config/i386/i386.opt > @@ -599,6 +599,10 @@ Target Mask(STV) Save > Disable Scalar to Vector optimization pass transforming 64-bit integer > computations into a vector ones. > > +-param=x86-stv-max-visits= > +Target Joined UInteger Var(x86_stv_max_visits) Init(10000) IntegerRange(1, > 1000000) Param > +The maximum number of use and def visits when discovering a STV chain before > the discovery is aborted. > + > mdispatch-scheduler > Target RejectNegative Var(flag_dispatch_scheduler) > Do dispatch scheduling if processor is bdver1, bdver2, bdver3, bdver4 > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi > index 0045661cc5d..2da68802356 100644 > --- a/gcc/doc/invoke.texi > +++ b/gcc/doc/invoke.texi > @@ -16229,6 +16229,10 @@ The following choices of @var{name} are available on > i386 and x86_64 targets: > @item x86-stlf-window-ninsns > Instructions number above which STFL stall penalty can be compensated. > > +@item x86-stv-max-visits > +The maximum number of use and def visits when discovering a STV chain before > +the discovery is aborted. > + > @end table > > @end table > -- > 2.35.3