On Thu, 7 Nov 2019 at 16:28, Jakub Jelinek <ja...@redhat.com> wrote:
>
> Hi!
>
> The following patch adds handling of clobbers in store-merging.  The intent
> is if we have a clobber followed by some stores into the clobbered area,
> even if don't store all the bytes in the area, we can avoid masking, because
> the non-stored bytes are undefined and in some cases we can even overwrite
> the whole area with the same or smaller number of stores compared to the
> original IL.
> Clobbers aren't removed from the IL, even if the following stores completely
> cover the whole area, as clobbers carry important additional information
> that the old value is gone, e.g. for tail call discovery if address taken
> before the clobber but not after it, removing the clobbers would disable
> tail call optimization.
> The patch right now treats the clobbered non-stored bytes as non-masked zero
> stores, except that we don't add stores to whole words etc. if there are no
> other overlapping stores; I have a separate patch that also computed
> defined_mask which contained whether some bytes are just undefined and we
> could in theory try different bit patterns in those bytes, but in the end
> decided it is too complicated and if needed, could be done as a follow-up.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>

Hi Jakub,

I've noticed that the new test store-merging-2.C fails on arm:
FAIL: g++.dg/opt/store-merging-2.C  -std=gnu++14  scan-tree-dump
store-merging "New sequence of 2 stores to replace old one of 3
stores"

Christophe



> 2019-11-07  Jakub Jelinek  <ja...@redhat.com>
>
>         PR target/92038
>         * gimple-ssa-store-merging.c (find_constituent_stores): For return
>         value only, return non-NULL if there is a single non-clobber
>         constituent store even if there are constituent clobbers and return
>         one of clobber constituent stores if all constituent stores are
>         clobbers.
>         (split_group): Handle clobbers.
>         (imm_store_chain_info::output_merged_store): When computing
>         bzero_first, look after all clobbers at the start.  Don't count
>         clobber stmts in orig_num_stmts, except if the first orig store is
>         a clobber covering the whole area and split_stores cover the whole
>         area, consider equal number of stmts ok.  Punt if split_stores
>         contains only ->orig stores and their number plus number of original
>         clobbers is equal to original number of stmts.  For ->orig, look past
>         clobbers in the constituent stores.
>         (imm_store_chain_info::output_merged_stores): Don't remove clobber
>         stmts.
>         (rhs_valid_for_store_merging_p): Don't return false for clobber stmt
>         rhs.
>         (store_valid_for_store_merging_p): Allow clobber stmts.
>         (verify_clear_bit_region_be): Fix up a thinko in function comment.
>
>         * g++.dg/opt/store-merging-1.C: New test.
>         * g++.dg/opt/store-merging-2.C: New test.
>         * g++.dg/opt/store-merging-3.C: New test.
>
> --- gcc/gimple-ssa-store-merging.c.jj   2019-11-07 09:50:38.029447052 +0100
> +++ gcc/gimple-ssa-store-merging.c      2019-11-07 12:13:15.048531180 +0100
> @@ -3110,7 +3110,8 @@ split_store::split_store (unsigned HOST_
>  /* Record all stores in GROUP that write to the region starting at BITPOS and
>     is of size BITSIZE.  Record infos for such statements in STORES if
>     non-NULL.  The stores in GROUP must be sorted by bitposition.  Return INFO
> -   if there is exactly one original store in the range.  */
> +   if there is exactly one original store in the range (in that case ignore
> +   clobber stmts, unless there are only clobber stmts).  */
>
>  static store_immediate_info *
>  find_constituent_stores (class merged_store_group *group,
> @@ -3146,16 +3147,24 @@ find_constituent_stores (class merged_st
>        if (stmt_start >= end)
>         return ret;
>
> +      if (gimple_clobber_p (info->stmt))
> +       {
> +         if (stores)
> +           stores->safe_push (info);
> +         if (ret == NULL)
> +           ret = info;
> +         continue;
> +       }
>        if (stores)
>         {
>           stores->safe_push (info);
> -         if (ret)
> +         if (ret && !gimple_clobber_p (ret->stmt))
>             {
>               ret = NULL;
>               second = true;
>             }
>         }
> -      else if (ret)
> +      else if (ret && !gimple_clobber_p (ret->stmt))
>         return NULL;
>        if (!second)
>         ret = info;
> @@ -3347,13 +3356,17 @@ split_group (merged_store_group *group,
>
>    if (bzero_first)
>      {
> -      first = 1;
> +      store_immediate_info *gstore;
> +      FOR_EACH_VEC_ELT (group->stores, first, gstore)
> +       if (!gimple_clobber_p (gstore->stmt))
> +         break;
> +      ++first;
>        ret = 1;
>        if (split_stores)
>         {
>           split_store *store
> -           = new split_store (bytepos, group->stores[0]->bitsize, 
> align_base);
> -         store->orig_stores.safe_push (group->stores[0]);
> +           = new split_store (bytepos, gstore->bitsize, align_base);
> +         store->orig_stores.safe_push (gstore);
>           store->orig = true;
>           any_orig = true;
>           split_stores->safe_push (store);
> @@ -3377,6 +3390,7 @@ split_group (merged_store_group *group,
>        unsigned HOST_WIDE_INT align_bitpos
>         = (try_bitpos - align_base) & (group_align - 1);
>        unsigned HOST_WIDE_INT align = group_align;
> +      bool found_orig = false;
>        if (align_bitpos)
>         align = least_bit_hwi (align_bitpos);
>        if (!allow_unaligned_store)
> @@ -3408,7 +3422,7 @@ split_group (merged_store_group *group,
>         }
>        store_immediate_info *info
>         = find_constituent_stores (group, NULL, &first, try_bitpos, try_size);
> -      if (info)
> +      if (info && !gimple_clobber_p (info->stmt))
>         {
>           /* If there is just one original statement for the range, see if
>              we can just reuse the original store which could be even larger
> @@ -3419,8 +3433,30 @@ split_group (merged_store_group *group,
>                                           stmt_end - try_bitpos);
>           if (info && info->bitpos >= try_bitpos)
>             {
> -             try_size = stmt_end - try_bitpos;
> -             goto found;
> +             store_immediate_info *info2 = NULL;
> +             unsigned int first_copy = first;
> +             if (info->bitpos > try_bitpos
> +                 && stmt_end - try_bitpos <= try_size)
> +               {
> +                 info2 = find_constituent_stores (group, NULL, &first_copy,
> +                                                  try_bitpos,
> +                                                  info->bitpos - try_bitpos);
> +                 gcc_assert (info2 == NULL || gimple_clobber_p 
> (info2->stmt));
> +               }
> +             if (info2 == NULL && stmt_end - try_bitpos < try_size)
> +               {
> +                 info2 = find_constituent_stores (group, NULL, &first_copy,
> +                                                  stmt_end,
> +                                                  (try_bitpos + try_size)
> +                                                  - stmt_end);
> +                 gcc_assert (info2 == NULL || gimple_clobber_p 
> (info2->stmt));
> +               }
> +             if (info2 == NULL)
> +               {
> +                 try_size = stmt_end - try_bitpos;
> +                 found_orig = true;
> +                 goto found;
> +               }
>             }
>         }
>
> @@ -3435,7 +3471,7 @@ split_group (merged_store_group *group,
>             && (!bzero_first
>                 || group->val[try_pos - bytepos + nonmasked - 1] != 0))
>           break;
> -      if (nonmasked == 0)
> +      if (nonmasked == 0 || (info && gimple_clobber_p (info->stmt)))
>         {
>           /* If entire try_size range is padding, skip it.  */
>           try_pos += try_size / BITS_PER_UNIT;
> @@ -3483,8 +3519,14 @@ split_group (merged_store_group *group,
>           info = find_constituent_stores (group, &store->orig_stores,
>                                           &first, try_bitpos, try_size);
>           if (info
> +             && !gimple_clobber_p (info->stmt)
>               && info->bitpos >= try_bitpos
> -             && info->bitpos + info->bitsize <= try_bitpos + try_size)
> +             && info->bitpos + info->bitsize <= try_bitpos + try_size
> +             && (store->orig_stores.length () == 1
> +                 || found_orig
> +                 || (info->bitpos == try_bitpos
> +                     && (info->bitpos + info->bitsize
> +                         == try_bitpos + try_size))))
>             {
>               store->orig = true;
>               any_orig = true;
> @@ -3671,14 +3713,32 @@ imm_store_chain_info::output_merged_stor
>      = !STRICT_ALIGNMENT && PARAM_VALUE (PARAM_STORE_MERGING_ALLOW_UNALIGNED);
>    bool allow_unaligned_load = allow_unaligned_store;
>    bool bzero_first = false;
> -  if (group->stores[0]->rhs_code == INTEGER_CST
> -      && TREE_CODE (gimple_assign_rhs1 (group->stores[0]->stmt)) == 
> CONSTRUCTOR
> -      && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (group->stores[0]->stmt)) == 0
> -      && group->start == group->stores[0]->bitpos
> -      && group->width == group->stores[0]->bitsize
> -      && (group->start % BITS_PER_UNIT) == 0
> -      && (group->width % BITS_PER_UNIT) == 0)
> -    bzero_first = true;
> +  store_immediate_info *store;
> +  unsigned int num_clobber_stmts = 0;
> +  if (group->stores[0]->rhs_code == INTEGER_CST)
> +    {
> +      FOR_EACH_VEC_ELT (group->stores, i, store)
> +       if (gimple_clobber_p (store->stmt))
> +         num_clobber_stmts++;
> +       else if (TREE_CODE (gimple_assign_rhs1 (store->stmt)) == CONSTRUCTOR
> +                && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (store->stmt)) == 0
> +                && group->start == store->bitpos
> +                && group->width == store->bitsize
> +                && (group->start % BITS_PER_UNIT) == 0
> +                && (group->width % BITS_PER_UNIT) == 0)
> +         {
> +           bzero_first = true;
> +           break;
> +         }
> +       else
> +         break;
> +      FOR_EACH_VEC_ELT_FROM (group->stores, i, store, i)
> +       if (gimple_clobber_p (store->stmt))
> +         num_clobber_stmts++;
> +      if (num_clobber_stmts == orig_num_stmts)
> +       return false;
> +      orig_num_stmts -= num_clobber_stmts;
> +    }
>    if (allow_unaligned_store || bzero_first)
>      {
>        /* If unaligned stores are allowed, see how many stores we'd emit
> @@ -3710,8 +3770,35 @@ imm_store_chain_info::output_merged_stor
>    split_group (group, allow_unaligned_store, allow_unaligned_load, 
> bzero_first,
>                &split_stores, &total_orig, &total_new);
>
> -  if (split_stores.length () >= orig_num_stmts)
> +  /* Determine if there is a clobber covering the whole group at the start,
> +     followed by proposed split stores that cover the whole group.  In that
> +     case, prefer the transformation even if
> +     split_stores.length () == orig_num_stmts.  */
> +  bool clobber_first = false;
> +  if (num_clobber_stmts
> +      && gimple_clobber_p (group->stores[0]->stmt)
> +      && group->start == group->stores[0]->bitpos
> +      && group->width == group->stores[0]->bitsize
> +      && (group->start % BITS_PER_UNIT) == 0
> +      && (group->width % BITS_PER_UNIT) == 0)
>      {
> +      clobber_first = true;
> +      unsigned HOST_WIDE_INT pos = group->start / BITS_PER_UNIT;
> +      FOR_EACH_VEC_ELT (split_stores, i, split_store)
> +       if (split_store->bytepos != pos)
> +         {
> +           clobber_first = false;
> +           break;
> +         }
> +       else
> +         pos += split_store->size / BITS_PER_UNIT;
> +      if (pos != (group->start + group->width) / BITS_PER_UNIT)
> +       clobber_first = false;
> +    }
> +
> +  if (split_stores.length () >= orig_num_stmts + clobber_first)
> +    {
> +
>        /* We didn't manage to reduce the number of statements.  Bail out.  */
>        if (dump_file && (dump_flags & TDF_DETAILS))
>         fprintf (dump_file, "Exceeded original number of stmts (%u)."
> @@ -3734,6 +3821,36 @@ imm_store_chain_info::output_merged_stor
>         delete split_store;
>        return false;
>      }
> +  if (group->stores[0]->rhs_code == INTEGER_CST)
> +    {
> +      bool all_orig = true;
> +      FOR_EACH_VEC_ELT (split_stores, i, split_store)
> +       if (!split_store->orig)
> +         {
> +           all_orig = false;
> +           break;
> +         }
> +      if (all_orig)
> +       {
> +         unsigned int cnt = split_stores.length ();
> +         store_immediate_info *store;
> +         FOR_EACH_VEC_ELT (group->stores, i, store)
> +           if (gimple_clobber_p (store->stmt))
> +             ++cnt;
> +         /* Punt if we wouldn't make any real changes, i.e. keep all
> +            orig stmts + all clobbers.  */
> +         if (cnt == group->stores.length ())
> +           {
> +             if (dump_file && (dump_flags & TDF_DETAILS))
> +               fprintf (dump_file, "Exceeded original number of stmts (%u)."
> +                                   "  Not profitable to emit new 
> sequence.\n",
> +                        orig_num_stmts);
> +             FOR_EACH_VEC_ELT (split_stores, i, split_store)
> +               delete split_store;
> +             return false;
> +           }
> +       }
> +    }
>
>    gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt);
>    gimple_seq seq = NULL;
> @@ -3742,6 +3859,13 @@ imm_store_chain_info::output_merged_stor
>    new_vuse = gimple_vuse (group->last_stmt);
>    tree bswap_res = NULL_TREE;
>
> +  /* Clobbers are not removed.  */
> +  if (gimple_clobber_p (group->last_stmt))
> +    {
> +      new_vuse = make_ssa_name (gimple_vop (cfun), group->last_stmt);
> +      gimple_set_vdef (group->last_stmt, new_vuse);
> +    }
> +
>    if (group->stores[0]->rhs_code == LROTATE_EXPR
>        || group->stores[0]->rhs_code == NOP_EXPR)
>      {
> @@ -3865,9 +3989,17 @@ imm_store_chain_info::output_merged_stor
>        location_t loc;
>        if (split_store->orig)
>         {
> -         /* If there is just a single constituent store which covers
> -            the whole area, just reuse the lhs and rhs.  */
> -         gimple *orig_stmt = split_store->orig_stores[0]->stmt;
> +         /* If there is just a single non-clobber constituent store
> +            which covers the whole area, just reuse the lhs and rhs.  */
> +         gimple *orig_stmt = NULL;
> +         store_immediate_info *store;
> +         unsigned int j;
> +         FOR_EACH_VEC_ELT (split_store->orig_stores, j, store)
> +           if (!gimple_clobber_p (store->stmt))
> +             {
> +               orig_stmt = store->stmt;
> +               break;
> +             }
>           dest = gimple_assign_lhs (orig_stmt);
>           src = gimple_assign_rhs1 (orig_stmt);
>           loc = gimple_location (orig_stmt);
> @@ -4194,6 +4326,9 @@ imm_store_chain_info::output_merged_stor
>         print_gimple_seq (dump_file, seq, 0, TDF_VOPS | TDF_MEMSYMS);
>      }
>
> +  if (gimple_clobber_p (group->last_stmt))
> +    update_stmt (group->last_stmt);
> +
>    if (group->lp_nr > 0)
>      {
>        /* We're going to insert a sequence of (potentially) throwing stores
> @@ -4279,6 +4414,10 @@ imm_store_chain_info::output_merged_stor
>             {
>               gimple *stmt = store->stmt;
>               gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
> +             /* Don't remove clobbers, they are still useful even if
> +                everything is overwritten afterwards.  */
> +             if (gimple_clobber_p (stmt))
> +               continue;
>               gsi_remove (&gsi, true);
>               if (store->lp_nr)
>                 remove_stmt_from_eh_lp (stmt);
> @@ -4361,7 +4500,6 @@ rhs_valid_for_store_merging_p (tree rhs)
>  {
>    unsigned HOST_WIDE_INT size;
>    if (TREE_CODE (rhs) == CONSTRUCTOR
> -      && !TREE_CLOBBER_P (rhs)
>        && CONSTRUCTOR_NELTS (rhs) == 0
>        && TYPE_SIZE_UNIT (TREE_TYPE (rhs))
>        && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (rhs))))
> @@ -4794,7 +4932,7 @@ store_valid_for_store_merging_p (gimple
>    return gimple_assign_single_p (stmt)
>          && gimple_vdef (stmt)
>          && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt))
> -        && !gimple_has_volatile_ops (stmt);
> +        && (!gimple_has_volatile_ops (stmt) || gimple_clobber_p (stmt));
>  }
>
>  enum basic_block_status { BB_INVALID, BB_VALID, BB_EXTENDED_VALID };
> @@ -4875,7 +5013,7 @@ pass_store_merging::execute (function *f
>           if (is_gimple_debug (stmt))
>             continue;
>
> -         if (gimple_has_volatile_ops (stmt))
> +         if (gimple_has_volatile_ops (stmt) && !gimple_clobber_p (stmt))
>             {
>               /* Terminate all chains.  */
>               if (dump_file && (dump_flags & TDF_DETAILS))
> @@ -5022,7 +5160,7 @@ verify_clear_bit_region (void)
>    verify_array_eq (in, expected, sizeof in);
>  }
>
> -/* Test verify_clear_bit_region_be that it clears exactly the bits asked and
> +/* Test clear_bit_region_be that it clears exactly the bits asked and
>     nothing more.  */
>
>  static void
> --- gcc/testsuite/g++.dg/opt/store-merging-1.C.jj       2019-11-07 
> 09:53:18.204021914 +0100
> +++ gcc/testsuite/g++.dg/opt/store-merging-1.C  2019-11-07 12:16:48.270326348 
> +0100
> @@ -0,0 +1,9 @@
> +// PR target/92038
> +// { dg-do compile { target int32 } }
> +// { dg-require-effective-target store_merge }
> +// { dg-options "-O2 -flifetime-dse=2 -fdump-tree-store-merging-details" }
> +// { dg-final { scan-tree-dump "New sequence of \[12] stores to replace old 
> one of 2 stores" "store-merging" } }
> +
> +struct S { S () : a (0), b (0) {} int a; char b; char c[3]; };
> +void foo (struct S);
> +void bar (void) { struct S s; foo (s); }
> --- gcc/testsuite/g++.dg/opt/store-merging-2.C.jj       2019-11-07 
> 09:53:18.204021914 +0100
> +++ gcc/testsuite/g++.dg/opt/store-merging-2.C  2019-11-07 12:16:59.482157827 
> +0100
> @@ -0,0 +1,10 @@
> +// PR target/92038
> +// { dg-do compile { target int32 } }
> +// { dg-require-effective-target store_merge }
> +// { dg-options "-O2 -flifetime-dse=2 -fdump-tree-store-merging-details" }
> +// { dg-final { scan-tree-dump "New sequence of 2 stores to replace old one 
> of 3 stores" "store-merging" } }
> +
> +struct T { char a[128]; };
> +struct S { S () : a () { a.a[12] = 0; a.a[13] = 1; a.a[14] = 0; a.a[15] = 6; 
> } T a; };
> +void foo (S &);
> +void bar (void) { S s; foo (s); }
> --- gcc/testsuite/g++.dg/opt/store-merging-3.C.jj       2019-11-07 
> 12:15:22.816610757 +0100
> +++ gcc/testsuite/g++.dg/opt/store-merging-3.C  2019-11-07 12:14:43.651199435 
> +0100
> @@ -0,0 +1,8 @@
> +// PR target/92038
> +// { dg-do compile }
> +// { dg-options "-O2 -flifetime-dse=2" }
> +
> +struct A { A (int); int a; };
> +struct B { B () : b(0) {} A b; };
> +struct C { C () : c () {} bool c; B d; };
> +void foo () { C d; }
>
>         Jakub
>

Reply via email to