On Wed, 12 Sep 2018, Jakub Jelinek wrote:

> On Tue, Sep 11, 2018 at 04:06:40PM +0200, Andreas Krebbel wrote:
> > > It is a fix, but not optimal.
> > > We have essentially:
> > >      MEM[(int *)p_28] = 0;
> > >      MEM[(char *)p_28 + 3B] = 1;
> > >      MEM[(char *)p_28 + 1B] = 2;
> > >      MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B];
> > > It is useful to merge the first 3 stores into:
> > >      MEM[(int *)p_28] = 0x01000200; // or 0x00020001; depending on 
> > > endianity
> > >      MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B];
> > > rather than punt, and just ignore (i.e. make sure it isn't merged with
> > > anything else) the non-INTEGER_CST store).  If you don't mind, I'll take 
> > > this
> > > PR over and handle it tomorrow.
> > 
> > Please do. Thanks!
> 
> Here it is, I hope the added comments are clear enough on what's going on
> and when we can and when we can't handle it.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk and
> eventually 8.3?

OK.  We need to fix the bug on the branch, so either we go with this 
variant or the other (which was pessimizing some cases?)

Richard.

> 2018-09-12  Jakub Jelinek  <ja...@redhat.com>
>           Andreas Krebbel  <kreb...@linux.ibm.com>
> 
>       PR tree-optimization/86844
>       * gimple-ssa-store-merging.c
>       (imm_store_chain_info::coalesce_immediate): For overlapping stores, if
>       there are any overlapping stores in between them, make sure they are
>       also coalesced or we give up completely.
> 
>       * gcc.c-torture/execute/pr86844.c: New test.
>       * gcc.dg/store_merging_22.c: New test.
>       * gcc.dg/store_merging_23.c: New test.
> 
> --- gcc/gimple-ssa-store-merging.c.jj 2018-07-12 09:38:46.137335036 +0200
> +++ gcc/gimple-ssa-store-merging.c    2018-09-11 22:47:41.406582798 +0200
> @@ -2702,15 +2702,80 @@ imm_store_chain_info::coalesce_immediate
>       {
>         /* Only allow overlapping stores of constants.  */
>         if (info->rhs_code == INTEGER_CST
> -           && merged_store->stores[0]->rhs_code == INTEGER_CST
> -           && check_no_overlap (m_store_info, i, INTEGER_CST,
> -                                MAX (merged_store->last_order, info->order),
> -                                MAX (merged_store->start
> -                                     + merged_store->width,
> -                                     info->bitpos + info->bitsize)))
> +           && merged_store->stores[0]->rhs_code == INTEGER_CST)
>           {
> -           merged_store->merge_overlapping (info);
> -           goto done;
> +           unsigned int last_order
> +             = MAX (merged_store->last_order, info->order);
> +           unsigned HOST_WIDE_INT end
> +             = MAX (merged_store->start + merged_store->width,
> +                    info->bitpos + info->bitsize);
> +           if (check_no_overlap (m_store_info, i, INTEGER_CST,
> +                                 last_order, end))
> +             {
> +               /* check_no_overlap call above made sure there are no
> +                  overlapping stores with non-INTEGER_CST rhs_code
> +                  in between the first and last of the stores we've
> +                  just merged.  If there are any INTEGER_CST rhs_code
> +                  stores in between, we need to merge_overlapping them
> +                  even if in the sort_by_bitpos order there are other
> +                  overlapping stores in between.  Keep those stores as is.
> +                  Example:
> +                     MEM[(int *)p_28] = 0;
> +                     MEM[(char *)p_28 + 3B] = 1;
> +                     MEM[(char *)p_28 + 1B] = 2;
> +                     MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B];
> +                  We can't merge the zero store with the store of two and
> +                  not merge anything else, because the store of one is
> +                  in the original order in between those two, but in
> +                  store_by_bitpos order it comes after the last store that
> +                  we can't merge with them.  We can merge the first 3 stores
> +                  and keep the last store as is though.  */
> +               unsigned int len = m_store_info.length (), k = i;
> +               for (unsigned int j = i + 1; j < len; ++j)
> +                 {
> +                   store_immediate_info *info2 = m_store_info[j];
> +                   if (info2->bitpos >= end)
> +                     break;
> +                   if (info2->order < last_order)
> +                     {
> +                       if (info2->rhs_code != INTEGER_CST)
> +                         {
> +                           /* Normally check_no_overlap makes sure this
> +                              doesn't happen, but if end grows below, then
> +                              we need to process more stores than
> +                              check_no_overlap verified.  Example:
> +                                 MEM[(int *)p_5] = 0;
> +                                 MEM[(short *)p_5 + 3B] = 1;
> +                                 MEM[(char *)p_5 + 4B] = _9;
> +                                 MEM[(char *)p_5 + 2B] = 2;  */
> +                           k = 0;
> +                           break;
> +                         }
> +                       k = j;
> +                       end = MAX (end, info2->bitpos + info2->bitsize);
> +                     }
> +                 }
> +
> +               if (k != 0)
> +                 {
> +                   merged_store->merge_overlapping (info);
> +
> +                   for (unsigned int j = i + 1; j <= k; j++)
> +                     {
> +                       store_immediate_info *info2 = m_store_info[j];
> +                       gcc_assert (info2->bitpos < end);
> +                       if (info2->order < last_order)
> +                         {
> +                           gcc_assert (info2->rhs_code == INTEGER_CST);
> +                           merged_store->merge_overlapping (info2);
> +                         }
> +                       /* Other stores are kept and not merged in any
> +                          way.  */
> +                     }
> +                   ignore = k;
> +                   goto done;
> +                 }
> +             }
>           }
>       }
>        /* |---store 1---||---store 2---|
> --- gcc/testsuite/gcc.c-torture/execute/pr86844.c.jj  2018-09-11 
> 18:25:16.882452028 +0200
> +++ gcc/testsuite/gcc.c-torture/execute/pr86844.c     2018-09-11 
> 18:25:16.882452028 +0200
> @@ -0,0 +1,24 @@
> +/* PR tree-optimization/86844 */
> +
> +__attribute__((noipa)) void
> +foo (int *p)
> +{
> +  *p = 0;
> +  *((char *)p + 3) = 1;
> +  *((char *)p + 1) = 2;
> +  *((char *)p + 2) = *((char *)p + 6);
> +}
> +
> +int
> +main ()
> +{
> +  int a[2] = { -1, 0 };
> +  if (sizeof (int) != 4)
> +    return 0;
> +  ((char *)a)[6] = 3;
> +  foo (a);
> +  if (((char *)a)[0] != 0 || ((char *)a)[1] != 2
> +      || ((char *)a)[2] != 3 || ((char *)a)[3] != 1)
> +    __builtin_abort ();
> +  return 0;
> +}
> --- gcc/testsuite/gcc.dg/store_merging_22.c.jj        2018-09-12 
> 10:14:34.286704682 +0200
> +++ gcc/testsuite/gcc.dg/store_merging_22.c   2018-09-12 10:25:30.156727153 
> +0200
> @@ -0,0 +1,16 @@
> +/* PR tree-optimization/86844 */
> +/* { dg-do compile } */
> +/* { dg-require-effective-target store_merge } */
> +/* { dg-options "-O2 -fdump-tree-store-merging" } */
> +
> +void
> +foo (int *p)
> +{
> +  *p = 0;
> +  *((char *)p + 3) = 1;
> +  *((char *)p + 1) = 2;
> +  *((char *)p + 2) = *((char *)p + 38);
> +}
> +
> +/* { dg-final { scan-tree-dump-times "Merging successful" 1 "store-merging" 
> } } */
> +/* { dg-final { scan-tree-dump-times "New sequence of 1 stores to replace 
> old one of 3 stores" 1 "store-merging" } } */
> --- gcc/testsuite/gcc.dg/store_merging_23.c.jj        2018-09-12 
> 10:25:41.099544995 +0200
> +++ gcc/testsuite/gcc.dg/store_merging_23.c   2018-09-12 10:32:27.956772277 
> +0200
> @@ -0,0 +1,16 @@
> +/* PR tree-optimization/86844 */
> +/* { dg-do compile } */
> +/* { dg-require-effective-target store_merge } */
> +/* { dg-options "-O2 -fdump-tree-store-merging" } */
> +
> +void
> +foo (int *p)
> +{
> +  *p = 0;
> +  int one = 1;
> +  __builtin_memcpy ((char *) p + 3, &one, sizeof (int));
> +  *((char *)p + 4) = *((char *)p + 36);
> +  *((char *)p + 1) = 2;
> +}
> +
> +/* { dg-final { scan-tree-dump-not "Merging successful" "store-merging" } } 
> */
> 
> 
>       Jakub
> 
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 
21284 (AG Nuernberg)

Reply via email to