On Tue, Sep 11, 2018 at 04:06:40PM +0200, Andreas Krebbel wrote:
> > It is a fix, but not optimal.
> > We have essentially:
> >      MEM[(int *)p_28] = 0;
> >      MEM[(char *)p_28 + 3B] = 1;
> >      MEM[(char *)p_28 + 1B] = 2;
> >      MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B];
> > It is useful to merge the first 3 stores into:
> >      MEM[(int *)p_28] = 0x01000200; // or 0x00020001; depending on endianity
> >      MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B];
> > rather than punt, and just ignore (i.e. make sure it isn't merged with
> > anything else) the non-INTEGER_CST store).  If you don't mind, I'll take 
> > this
> > PR over and handle it tomorrow.
> 
> Please do. Thanks!

Here it is, I hope the added comments are clear enough on what's going on
and when we can and when we can't handle it.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk and
eventually 8.3?

2018-09-12  Jakub Jelinek  <ja...@redhat.com>
            Andreas Krebbel  <kreb...@linux.ibm.com>

        PR tree-optimization/86844
        * gimple-ssa-store-merging.c
        (imm_store_chain_info::coalesce_immediate): For overlapping stores, if
        there are any overlapping stores in between them, make sure they are
        also coalesced or we give up completely.

        * gcc.c-torture/execute/pr86844.c: New test.
        * gcc.dg/store_merging_22.c: New test.
        * gcc.dg/store_merging_23.c: New test.

--- gcc/gimple-ssa-store-merging.c.jj   2018-07-12 09:38:46.137335036 +0200
+++ gcc/gimple-ssa-store-merging.c      2018-09-11 22:47:41.406582798 +0200
@@ -2702,15 +2702,80 @@ imm_store_chain_info::coalesce_immediate
        {
          /* Only allow overlapping stores of constants.  */
          if (info->rhs_code == INTEGER_CST
-             && merged_store->stores[0]->rhs_code == INTEGER_CST
-             && check_no_overlap (m_store_info, i, INTEGER_CST,
-                                  MAX (merged_store->last_order, info->order),
-                                  MAX (merged_store->start
-                                       + merged_store->width,
-                                       info->bitpos + info->bitsize)))
+             && merged_store->stores[0]->rhs_code == INTEGER_CST)
            {
-             merged_store->merge_overlapping (info);
-             goto done;
+             unsigned int last_order
+               = MAX (merged_store->last_order, info->order);
+             unsigned HOST_WIDE_INT end
+               = MAX (merged_store->start + merged_store->width,
+                      info->bitpos + info->bitsize);
+             if (check_no_overlap (m_store_info, i, INTEGER_CST,
+                                   last_order, end))
+               {
+                 /* check_no_overlap call above made sure there are no
+                    overlapping stores with non-INTEGER_CST rhs_code
+                    in between the first and last of the stores we've
+                    just merged.  If there are any INTEGER_CST rhs_code
+                    stores in between, we need to merge_overlapping them
+                    even if in the sort_by_bitpos order there are other
+                    overlapping stores in between.  Keep those stores as is.
+                    Example:
+                       MEM[(int *)p_28] = 0;
+                       MEM[(char *)p_28 + 3B] = 1;
+                       MEM[(char *)p_28 + 1B] = 2;
+                       MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B];
+                    We can't merge the zero store with the store of two and
+                    not merge anything else, because the store of one is
+                    in the original order in between those two, but in
+                    store_by_bitpos order it comes after the last store that
+                    we can't merge with them.  We can merge the first 3 stores
+                    and keep the last store as is though.  */
+                 unsigned int len = m_store_info.length (), k = i;
+                 for (unsigned int j = i + 1; j < len; ++j)
+                   {
+                     store_immediate_info *info2 = m_store_info[j];
+                     if (info2->bitpos >= end)
+                       break;
+                     if (info2->order < last_order)
+                       {
+                         if (info2->rhs_code != INTEGER_CST)
+                           {
+                             /* Normally check_no_overlap makes sure this
+                                doesn't happen, but if end grows below, then
+                                we need to process more stores than
+                                check_no_overlap verified.  Example:
+                                   MEM[(int *)p_5] = 0;
+                                   MEM[(short *)p_5 + 3B] = 1;
+                                   MEM[(char *)p_5 + 4B] = _9;
+                                   MEM[(char *)p_5 + 2B] = 2;  */
+                             k = 0;
+                             break;
+                           }
+                         k = j;
+                         end = MAX (end, info2->bitpos + info2->bitsize);
+                       }
+                   }
+
+                 if (k != 0)
+                   {
+                     merged_store->merge_overlapping (info);
+
+                     for (unsigned int j = i + 1; j <= k; j++)
+                       {
+                         store_immediate_info *info2 = m_store_info[j];
+                         gcc_assert (info2->bitpos < end);
+                         if (info2->order < last_order)
+                           {
+                             gcc_assert (info2->rhs_code == INTEGER_CST);
+                             merged_store->merge_overlapping (info2);
+                           }
+                         /* Other stores are kept and not merged in any
+                            way.  */
+                       }
+                     ignore = k;
+                     goto done;
+                   }
+               }
            }
        }
       /* |---store 1---||---store 2---|
--- gcc/testsuite/gcc.c-torture/execute/pr86844.c.jj    2018-09-11 
18:25:16.882452028 +0200
+++ gcc/testsuite/gcc.c-torture/execute/pr86844.c       2018-09-11 
18:25:16.882452028 +0200
@@ -0,0 +1,24 @@
+/* PR tree-optimization/86844 */
+
+__attribute__((noipa)) void
+foo (int *p)
+{
+  *p = 0;
+  *((char *)p + 3) = 1;
+  *((char *)p + 1) = 2;
+  *((char *)p + 2) = *((char *)p + 6);
+}
+
+int
+main ()
+{
+  int a[2] = { -1, 0 };
+  if (sizeof (int) != 4)
+    return 0;
+  ((char *)a)[6] = 3;
+  foo (a);
+  if (((char *)a)[0] != 0 || ((char *)a)[1] != 2
+      || ((char *)a)[2] != 3 || ((char *)a)[3] != 1)
+    __builtin_abort ();
+  return 0;
+}
--- gcc/testsuite/gcc.dg/store_merging_22.c.jj  2018-09-12 10:14:34.286704682 
+0200
+++ gcc/testsuite/gcc.dg/store_merging_22.c     2018-09-12 10:25:30.156727153 
+0200
@@ -0,0 +1,16 @@
+/* PR tree-optimization/86844 */
+/* { dg-do compile } */
+/* { dg-require-effective-target store_merge } */
+/* { dg-options "-O2 -fdump-tree-store-merging" } */
+
+void
+foo (int *p)
+{
+  *p = 0;
+  *((char *)p + 3) = 1;
+  *((char *)p + 1) = 2;
+  *((char *)p + 2) = *((char *)p + 38);
+}
+
+/* { dg-final { scan-tree-dump-times "Merging successful" 1 "store-merging" } 
} */
+/* { dg-final { scan-tree-dump-times "New sequence of 1 stores to replace old 
one of 3 stores" 1 "store-merging" } } */
--- gcc/testsuite/gcc.dg/store_merging_23.c.jj  2018-09-12 10:25:41.099544995 
+0200
+++ gcc/testsuite/gcc.dg/store_merging_23.c     2018-09-12 10:32:27.956772277 
+0200
@@ -0,0 +1,16 @@
+/* PR tree-optimization/86844 */
+/* { dg-do compile } */
+/* { dg-require-effective-target store_merge } */
+/* { dg-options "-O2 -fdump-tree-store-merging" } */
+
+void
+foo (int *p)
+{
+  *p = 0;
+  int one = 1;
+  __builtin_memcpy ((char *) p + 3, &one, sizeof (int));
+  *((char *)p + 4) = *((char *)p + 36);
+  *((char *)p + 1) = 2;
+}
+
+/* { dg-final { scan-tree-dump-not "Merging successful" "store-merging" } } */


        Jakub

Reply via email to