On Tue, Sep 11, 2018 at 04:06:40PM +0200, Andreas Krebbel wrote: > > It is a fix, but not optimal. > > We have essentially: > > MEM[(int *)p_28] = 0; > > MEM[(char *)p_28 + 3B] = 1; > > MEM[(char *)p_28 + 1B] = 2; > > MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B]; > > It is useful to merge the first 3 stores into: > > MEM[(int *)p_28] = 0x01000200; // or 0x00020001; depending on endianity > > MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B]; > > rather than punt, and just ignore (i.e. make sure it isn't merged with > > anything else) the non-INTEGER_CST store). If you don't mind, I'll take > > this > > PR over and handle it tomorrow. > > Please do. Thanks!
Here it is, I hope the added comments are clear enough on what's going on and when we can and when we can't handle it. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk and eventually 8.3? 2018-09-12 Jakub Jelinek <ja...@redhat.com> Andreas Krebbel <kreb...@linux.ibm.com> PR tree-optimization/86844 * gimple-ssa-store-merging.c (imm_store_chain_info::coalesce_immediate): For overlapping stores, if there are any overlapping stores in between them, make sure they are also coalesced or we give up completely. * gcc.c-torture/execute/pr86844.c: New test. * gcc.dg/store_merging_22.c: New test. * gcc.dg/store_merging_23.c: New test. --- gcc/gimple-ssa-store-merging.c.jj 2018-07-12 09:38:46.137335036 +0200 +++ gcc/gimple-ssa-store-merging.c 2018-09-11 22:47:41.406582798 +0200 @@ -2702,15 +2702,80 @@ imm_store_chain_info::coalesce_immediate { /* Only allow overlapping stores of constants. */ if (info->rhs_code == INTEGER_CST - && merged_store->stores[0]->rhs_code == INTEGER_CST - && check_no_overlap (m_store_info, i, INTEGER_CST, - MAX (merged_store->last_order, info->order), - MAX (merged_store->start - + merged_store->width, - info->bitpos + info->bitsize))) + && merged_store->stores[0]->rhs_code == INTEGER_CST) { - merged_store->merge_overlapping (info); - goto done; + unsigned int last_order + = MAX (merged_store->last_order, info->order); + unsigned HOST_WIDE_INT end + = MAX (merged_store->start + merged_store->width, + info->bitpos + info->bitsize); + if (check_no_overlap (m_store_info, i, INTEGER_CST, + last_order, end)) + { + /* check_no_overlap call above made sure there are no + overlapping stores with non-INTEGER_CST rhs_code + in between the first and last of the stores we've + just merged. If there are any INTEGER_CST rhs_code + stores in between, we need to merge_overlapping them + even if in the sort_by_bitpos order there are other + overlapping stores in between. Keep those stores as is. + Example: + MEM[(int *)p_28] = 0; + MEM[(char *)p_28 + 3B] = 1; + MEM[(char *)p_28 + 1B] = 2; + MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B]; + We can't merge the zero store with the store of two and + not merge anything else, because the store of one is + in the original order in between those two, but in + store_by_bitpos order it comes after the last store that + we can't merge with them. We can merge the first 3 stores + and keep the last store as is though. */ + unsigned int len = m_store_info.length (), k = i; + for (unsigned int j = i + 1; j < len; ++j) + { + store_immediate_info *info2 = m_store_info[j]; + if (info2->bitpos >= end) + break; + if (info2->order < last_order) + { + if (info2->rhs_code != INTEGER_CST) + { + /* Normally check_no_overlap makes sure this + doesn't happen, but if end grows below, then + we need to process more stores than + check_no_overlap verified. Example: + MEM[(int *)p_5] = 0; + MEM[(short *)p_5 + 3B] = 1; + MEM[(char *)p_5 + 4B] = _9; + MEM[(char *)p_5 + 2B] = 2; */ + k = 0; + break; + } + k = j; + end = MAX (end, info2->bitpos + info2->bitsize); + } + } + + if (k != 0) + { + merged_store->merge_overlapping (info); + + for (unsigned int j = i + 1; j <= k; j++) + { + store_immediate_info *info2 = m_store_info[j]; + gcc_assert (info2->bitpos < end); + if (info2->order < last_order) + { + gcc_assert (info2->rhs_code == INTEGER_CST); + merged_store->merge_overlapping (info2); + } + /* Other stores are kept and not merged in any + way. */ + } + ignore = k; + goto done; + } + } } } /* |---store 1---||---store 2---| --- gcc/testsuite/gcc.c-torture/execute/pr86844.c.jj 2018-09-11 18:25:16.882452028 +0200 +++ gcc/testsuite/gcc.c-torture/execute/pr86844.c 2018-09-11 18:25:16.882452028 +0200 @@ -0,0 +1,24 @@ +/* PR tree-optimization/86844 */ + +__attribute__((noipa)) void +foo (int *p) +{ + *p = 0; + *((char *)p + 3) = 1; + *((char *)p + 1) = 2; + *((char *)p + 2) = *((char *)p + 6); +} + +int +main () +{ + int a[2] = { -1, 0 }; + if (sizeof (int) != 4) + return 0; + ((char *)a)[6] = 3; + foo (a); + if (((char *)a)[0] != 0 || ((char *)a)[1] != 2 + || ((char *)a)[2] != 3 || ((char *)a)[3] != 1) + __builtin_abort (); + return 0; +} --- gcc/testsuite/gcc.dg/store_merging_22.c.jj 2018-09-12 10:14:34.286704682 +0200 +++ gcc/testsuite/gcc.dg/store_merging_22.c 2018-09-12 10:25:30.156727153 +0200 @@ -0,0 +1,16 @@ +/* PR tree-optimization/86844 */ +/* { dg-do compile } */ +/* { dg-require-effective-target store_merge } */ +/* { dg-options "-O2 -fdump-tree-store-merging" } */ + +void +foo (int *p) +{ + *p = 0; + *((char *)p + 3) = 1; + *((char *)p + 1) = 2; + *((char *)p + 2) = *((char *)p + 38); +} + +/* { dg-final { scan-tree-dump-times "Merging successful" 1 "store-merging" } } */ +/* { dg-final { scan-tree-dump-times "New sequence of 1 stores to replace old one of 3 stores" 1 "store-merging" } } */ --- gcc/testsuite/gcc.dg/store_merging_23.c.jj 2018-09-12 10:25:41.099544995 +0200 +++ gcc/testsuite/gcc.dg/store_merging_23.c 2018-09-12 10:32:27.956772277 +0200 @@ -0,0 +1,16 @@ +/* PR tree-optimization/86844 */ +/* { dg-do compile } */ +/* { dg-require-effective-target store_merge } */ +/* { dg-options "-O2 -fdump-tree-store-merging" } */ + +void +foo (int *p) +{ + *p = 0; + int one = 1; + __builtin_memcpy ((char *) p + 3, &one, sizeof (int)); + *((char *)p + 4) = *((char *)p + 36); + *((char *)p + 1) = 2; +} + +/* { dg-final { scan-tree-dump-not "Merging successful" "store-merging" } } */ Jakub