"H.J. Lu" <hjl.to...@gmail.com> writes:
> On Mon, May 30, 2022 at 09:35:43AM +0100, Richard Sandiford wrote:
>> "H.J. Lu" <hjl.to...@gmail.com> writes:
>> > ---
>> > RTL DSE tracks redundant constant stores within a basic block.  When RTL
>> > loop invariant motion hoists a constant initialization out of the loop
>> > into a separate basic block, the constant store value becomes unknown
>> > within the original basic block.  When recording store for RTL DSE, check
>> > if the source register is set only once to a constant by a non-partial
>> > unconditional load.  If yes, record the constant as the constant store
>> > source.  It eliminates unrolled zero stores after memset 0 in a loop
>> > where a vector register is used as the zero store source.
>> >
>> > Extract find_single_def_src from loop-iv.cc and move it to df-core.cc:
>> >
>> > 1. Rename to df_find_single_def_src.
>> > 2. Change the argument to rtx and use rtx_equal_p.
>> > 3. Return null for partial or conditional defs.
>> >
>> > gcc/
>> >
>> >    PR rtl-optimization/105638
>> >    * df-core.cc (df_find_single_def_sr): Moved and renamed from
>> >    find_single_def_src in loop-iv.cc.  Change the argument to rtx
>> >    and use rtx_equal_p.  Return null for partial or conditional
>> >    defs.
>> >    * df.h (df_find_single_def_src): New prototype.
>> >    * dse.cc (record_store): Use the constant source if the source
>> >    register is set only once.
>> >    * loop-iv.cc (find_single_def_src): Moved to df-core.cc.
>> >    (replace_single_def_regs): Replace find_single_def_src with
>> >    df_find_single_def_src.
>> >
>> > gcc/testsuite/
>> >
>> >    PR rtl-optimization/105638
>> >    * g++.target/i386/pr105638.C: New test.
>> > ---
>> >  gcc/df-core.cc                           | 44 +++++++++++++++++++++++
>> >  gcc/df.h                                 |  1 +
>> >  gcc/dse.cc                               | 14 ++++++++
>> >  gcc/loop-iv.cc                           | 45 +-----------------------
>> >  gcc/testsuite/g++.target/i386/pr105638.C | 44 +++++++++++++++++++++++
>> >  5 files changed, 104 insertions(+), 44 deletions(-)
>> >  create mode 100644 gcc/testsuite/g++.target/i386/pr105638.C
>> >
>> > diff --git a/gcc/df-core.cc b/gcc/df-core.cc
>> > index a901b84878f..f9b4de8eb7a 100644
>> > --- a/gcc/df-core.cc
>> > +++ b/gcc/df-core.cc
>> > @@ -2009,6 +2009,50 @@ df_reg_used (rtx_insn *insn, rtx reg)
>> >    return df_find_use (insn, reg) != NULL;
>> >  }
>> >  
>> > +/* If REG has a single definition, return its known value, otherwise 
>> > return
>> > +   null.  */
>> > +
>> > +rtx
>> > +df_find_single_def_src (rtx reg)
>> > +{
>> > +  rtx src = NULL_RTX;
>> > +
>> > +  /* Don't look through unbounded number of single definition REG copies,
>> > +     there might be loops for sources with uninitialized variables.  */
>> > +  for (int cnt = 0; cnt < 128; cnt++)
>> > +    {
>> > +      df_ref adef = DF_REG_DEF_CHAIN (REGNO (reg));
>> > +      if (adef == NULL || DF_REF_NEXT_REG (adef) != NULL
>> > +    || DF_REF_IS_ARTIFICIAL (adef)
>> > +    || (DF_REF_FLAGS (adef)
>> > +        & (DF_REF_PARTIAL | DF_REF_CONDITIONAL)))
>> > +  return NULL_RTX;
>> > +
>> > +      rtx set = single_set (DF_REF_INSN (adef));
>> > +      if (set == NULL || !rtx_equal_p (SET_DEST (set), reg))
>> > +  return NULL_RTX;
>> > +
>> > +      rtx note = find_reg_equal_equiv_note (DF_REF_INSN (adef));
>> > +      if (note && function_invariant_p (XEXP (note, 0)))
>> > +  {
>> > +    src = XEXP (note, 0);
>> > +    break;
>> > +  }
>> 
>> Seems simpler to return this directly, rather than break and then
>> check function_invariant_p again.
>
> Fixed.
>
>> 
>> > +      src = SET_SRC (set);
>> > +
>> > +      if (REG_P (src))
>> > +  {
>> > +    reg = src;
>> > +    continue;
>> > +  }
>> > +      break;
>> > +    }
>> > +  if (!function_invariant_p (src))
>> > +    return NULL_RTX;
>> > +
>> > +  return src;
>> > +}
>> > +
>> >
>> >  
>> > /*----------------------------------------------------------------------------
>> >     Debugging and printing functions.
>> > diff --git a/gcc/df.h b/gcc/df.h
>> > index bd329205d08..71e249ad20a 100644
>> > --- a/gcc/df.h
>> > +++ b/gcc/df.h
>> > @@ -991,6 +991,7 @@ extern df_ref df_find_def (rtx_insn *, rtx);
>> >  extern bool df_reg_defined (rtx_insn *, rtx);
>> >  extern df_ref df_find_use (rtx_insn *, rtx);
>> >  extern bool df_reg_used (rtx_insn *, rtx);
>> > +extern rtx df_find_single_def_src (rtx);
>> >  extern void df_worklist_dataflow (struct dataflow *,bitmap, int *, int);
>> >  extern void df_print_regset (FILE *file, const_bitmap r);
>> >  extern void df_print_word_regset (FILE *file, const_bitmap r);
>> > diff --git a/gcc/dse.cc b/gcc/dse.cc
>> > index 30c11cee034..c915266f025 100644
>> > --- a/gcc/dse.cc
>> > +++ b/gcc/dse.cc
>> > @@ -1508,6 +1508,20 @@ record_store (rtx body, bb_info_t bb_info)
>> >  
>> >      if (tem && CONSTANT_P (tem))
>> >        const_rhs = tem;
>> > +    else
>> > +      {
>> > +        /* If RHS is set only once to a constant, set CONST_RHS
>> > +           to the constant.  */
>> > +        rtx def_src = df_find_single_def_src (rhs);
>> > +        if (def_src != nullptr && CONSTANT_P (def_src))
>> > +          {
>> > +            df_ref def = DF_REG_DEF_CHAIN (REGNO (rhs));
>> > +            rtx_insn *def_insn = DF_REF_INSN (def);
>> > +            rtx def_body = single_set (def_insn);
>> > +            if (rhs == SET_DEST (def_body))
>> > +              const_rhs = def_src;
>> > +          }
>> > +      }
>> 
>> This shouldn't be necessary now: we can rely on def_src being correct.
>> 
>> I.e. I think this can just be:
>> 
>>            rtx def_src = df_find_single_def_src (rhs);
>>            if (def_src != nullptr && CONSTANT_P (def_src))
>>              const_rhs = def_src;
>
> Fixed.
>
>> 
>> Thanks,
>> Richard
>> 
>
> Here is the v4 patch.  OK for master?
>
> Thanks.
>
>
> H.J.
> ---
> RTL DSE tracks redundant constant stores within a basic block.  When RTL
> loop invariant motion hoists a constant initialization out of the loop
> into a separate basic block, the constant store value becomes unknown
> within the original basic block.  When recording store for RTL DSE, check
> if the source register is set only once to a constant by a non-partial
> unconditional load.  If yes, record the constant as the constant store
> source.  It eliminates unrolled zero stores after memset 0 in a loop
> where a vector register is used as the zero store source.
>
> gcc/
>
>       PR rtl-optimization/105638
>       * df-core.cc (df_find_single_def_src): Moved and renamed from
>       find_single_def_src in loop-iv.cc.  Change the argument to rtx
>       and use rtx_equal_p.  Return null for partial or conditional
>       defs.
>       * df.h (df_find_single_def_src): New prototype.
>       * dse.cc (record_store): Use the constant source if the source
>       register is set only once.
>       * loop-iv.cc (find_single_def_src): Moved to df-core.cc.
>       (replace_single_def_regs): Replace find_single_def_src with
>       df_find_single_def_src.
>
> gcc/testsuite/
>
>       PR rtl-optimization/105638
>       * g++.target/i386/pr105638.C: New test.
> ---
>  gcc/df-core.cc                           | 44 +++++++++++++++++++++++
>  gcc/df.h                                 |  1 +
>  gcc/dse.cc                               |  8 +++++
>  gcc/loop-iv.cc                           | 45 +-----------------------
>  gcc/testsuite/g++.target/i386/pr105638.C | 44 +++++++++++++++++++++++
>  5 files changed, 98 insertions(+), 44 deletions(-)
>  create mode 100644 gcc/testsuite/g++.target/i386/pr105638.C
>
> diff --git a/gcc/df-core.cc b/gcc/df-core.cc
> index a901b84878f..e56d9bcf760 100644
> --- a/gcc/df-core.cc
> +++ b/gcc/df-core.cc
> @@ -2009,6 +2009,50 @@ df_reg_used (rtx_insn *insn, rtx reg)
>    return df_find_use (insn, reg) != NULL;
>  }
>  
> +/* If REG has a single definition, return its known value, otherwise return
> +   null.  */
> +
> +rtx
> +df_find_single_def_src (rtx reg)
> +{
> +  rtx src = NULL_RTX;
> +
> +  /* Don't look through unbounded number of single definition REG copies,
> +     there might be loops for sources with uninitialized variables.  */
> +  for (int cnt = 0; cnt < 128; cnt++)
> +    {
> +      df_ref adef = DF_REG_DEF_CHAIN (REGNO (reg));
> +      if (adef == NULL || DF_REF_NEXT_REG (adef) != NULL
> +       || DF_REF_IS_ARTIFICIAL (adef)
> +       || (DF_REF_FLAGS (adef)
> +           & (DF_REF_PARTIAL | DF_REF_CONDITIONAL)))
> +     return NULL_RTX;
> +
> +      rtx set = single_set (DF_REF_INSN (adef));
> +      if (set == NULL || !rtx_equal_p (SET_DEST (set), reg))
> +     return NULL_RTX;
> +
> +      rtx note = find_reg_equal_equiv_note (DF_REF_INSN (adef));
> +      if (note && function_invariant_p (XEXP (note, 0)))
> +     {
> +       src = XEXP (note, 0);
> +       return src;

Seems simpler without the assignment:

    return XEXP (note, 0);

OK with that change, thanks.

Richard

> +     }
> +      src = SET_SRC (set);
> +
> +      if (REG_P (src))
> +     {
> +       reg = src;
> +       continue;
> +     }
> +      break;
> +    }
> +  if (!function_invariant_p (src))
> +    return NULL_RTX;
> +
> +  return src;
> +}
> +
>  
>  
> /*----------------------------------------------------------------------------
>     Debugging and printing functions.
> diff --git a/gcc/df.h b/gcc/df.h
> index bd329205d08..71e249ad20a 100644
> --- a/gcc/df.h
> +++ b/gcc/df.h
> @@ -991,6 +991,7 @@ extern df_ref df_find_def (rtx_insn *, rtx);
>  extern bool df_reg_defined (rtx_insn *, rtx);
>  extern df_ref df_find_use (rtx_insn *, rtx);
>  extern bool df_reg_used (rtx_insn *, rtx);
> +extern rtx df_find_single_def_src (rtx);
>  extern void df_worklist_dataflow (struct dataflow *,bitmap, int *, int);
>  extern void df_print_regset (FILE *file, const_bitmap r);
>  extern void df_print_word_regset (FILE *file, const_bitmap r);
> diff --git a/gcc/dse.cc b/gcc/dse.cc
> index 30c11cee034..994c60dc189 100644
> --- a/gcc/dse.cc
> +++ b/gcc/dse.cc
> @@ -1508,6 +1508,14 @@ record_store (rtx body, bb_info_t bb_info)
>  
>         if (tem && CONSTANT_P (tem))
>           const_rhs = tem;
> +       else
> +         {
> +           /* If RHS is set only once to a constant, set CONST_RHS
> +              to the constant.  */
> +           rtx def_src = df_find_single_def_src (rhs);
> +           if (def_src != nullptr && CONSTANT_P (def_src))
> +             const_rhs = def_src;
> +         }
>       }
>      }
>  
> diff --git a/gcc/loop-iv.cc b/gcc/loop-iv.cc
> index 0eafe7d2362..d639336445a 100644
> --- a/gcc/loop-iv.cc
> +++ b/gcc/loop-iv.cc
> @@ -1378,49 +1378,6 @@ simple_rhs_p (rtx rhs)
>      }
>  }
>  
> -/* If REGNO has a single definition, return its known value, otherwise return
> -   null.  */
> -
> -static rtx
> -find_single_def_src (unsigned int regno)
> -{
> -  rtx src = NULL_RTX;
> -
> -  /* Don't look through unbounded number of single definition REG copies,
> -     there might be loops for sources with uninitialized variables.  */
> -  for (int cnt = 0; cnt < 128; cnt++)
> -    {
> -      df_ref adef = DF_REG_DEF_CHAIN (regno);
> -      if (adef == NULL || DF_REF_NEXT_REG (adef) != NULL
> -       || DF_REF_IS_ARTIFICIAL (adef))
> -     return NULL_RTX;
> -
> -      rtx set = single_set (DF_REF_INSN (adef));
> -      if (set == NULL || !REG_P (SET_DEST (set))
> -       || REGNO (SET_DEST (set)) != regno)
> -     return NULL_RTX;
> -
> -      rtx note = find_reg_equal_equiv_note (DF_REF_INSN (adef));
> -      if (note && function_invariant_p (XEXP (note, 0)))
> -     {
> -       src = XEXP (note, 0);
> -       break;
> -     }
> -      src = SET_SRC (set);
> -
> -      if (REG_P (src))
> -     {
> -       regno = REGNO (src);
> -       continue;
> -     }
> -      break;
> -    }
> -  if (!function_invariant_p (src))
> -    return NULL_RTX;
> -
> -  return src;
> -}
> -
>  /* If any registers in *EXPR that have a single definition, try to replace
>     them with the known-equivalent values.  */
>  
> @@ -1433,7 +1390,7 @@ replace_single_def_regs (rtx *expr)
>      {
>        rtx x = *iter;
>        if (REG_P (x))
> -     if (rtx new_x = find_single_def_src (REGNO (x)))
> +     if (rtx new_x = df_find_single_def_src (x))
>         {
>           *expr = simplify_replace_rtx (*expr, x, new_x);
>           goto repeat;
> diff --git a/gcc/testsuite/g++.target/i386/pr105638.C 
> b/gcc/testsuite/g++.target/i386/pr105638.C
> new file mode 100644
> index 00000000000..ff40a459de1
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr105638.C
> @@ -0,0 +1,44 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-std=gnu++20 -O2 -march=skylake" } */
> +/* { dg-final { scan-assembler-not "vpxor" } } */
> +
> +#include <stdint.h>
> +#include <vector>
> +#include <tr1/array>
> +
> +class FastBoard {
> +public:
> +    typedef std::pair<int, int> movescore_t;
> +    typedef std::tr1::array<movescore_t, 24> scoredlist_t;
> +    
> +protected:
> +    std::vector<int> m_critical;
> +
> +    int m_boardsize;    
> +};
> +
> +class FastState {
> +public:        
> +    FastBoard board;
> +    
> +    int movenum;              
> +protected:
> +    FastBoard::scoredlist_t scoredmoves;
> +};
> +
> +class KoState : public FastState {
> +private:         
> +    std::vector<uint64_t> ko_hash_history;   
> +    std::vector<uint64_t> hash_history;     
> +};
> +
> +class GameState : public KoState {
> +public:                    
> +    void foo ();      
> +private:
> +    std::vector<KoState> game_history;                          
> +};
> +
> +void GameState::foo() {
> +    game_history.resize(movenum);
> +}

Reply via email to